{
  "id": "huggingface/accelerate-multi-gpu-device-map-conflict",
  "signature": "RuntimeError: You cannot set both `device_map` and `device` when using the `accelerate` launcher with multiple GPUs.",
  "signature_zh": "RuntimeError: 在使用 `accelerate` 启动器且多 GPU 时，不能同时设置 `device_map` 和 `device`。",
  "regex": "You cannot set both.*device_map.*device.*when using the.*accelerate.*launcher with multiple GPUs",
  "domain": "huggingface",
  "category": "config_error",
  "subcategory": null,
  "root_cause": "When using accelerate launcher with multiple GPUs, the device is automatically managed; providing both device_map and device arguments causes a conflict in device placement.",
  "root_cause_type": "generic",
  "root_cause_zh": "在使用 accelerate 启动器且多 GPU 时，设备是自动管理的；同时提供 device_map 和 device 参数会导致设备放置冲突。",
  "versions": [
    {
      "version": "accelerate>=0.20.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "transformers>=4.30.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The error is raised explicitly; both arguments are passed and cause a conflict in the model loading logic.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "With device=0, the model is placed only on GPU 0, wasting other GPUs and potentially causing OOM on GPU 0.",
      "fail_rate": 0.5,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "This bypasses accelerate's device management entirely, causing the model to be on a single GPU and not utilizing multi-GPU parallelism.",
      "fail_rate": 0.4,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Remove the `device` argument and only use `device_map='auto'` (or a custom dict) when loading the model. Example: `model = AutoModelForCausalLM.from_pretrained('model-name', device_map='auto')`. The accelerate launcher will handle multi-GPU placement automatically.",
      "success_rate": 0.95,
      "how": "Remove the `device` argument and only use `device_map='auto'` (or a custom dict) when loading the model. Example: `model = AutoModelForCausalLM.from_pretrained('model-name', device_map='auto')`. The accelerate launcher will handle multi-GPU placement automatically.",
      "condition": "",
      "sources": []
    },
    {
      "action": "If you must set a specific device, do not use the accelerate launcher; instead, use `with torch.device('cuda:0'): model = ...` and manually wrap with DataParallel or DistributedDataParallel.",
      "success_rate": 0.8,
      "how": "If you must set a specific device, do not use the accelerate launcher; instead, use `with torch.device('cuda:0'): model = ...` and manually wrap with DataParallel or DistributedDataParallel.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use `accelerate launch` without any device_map or device argument in the script; let accelerate handle device placement via its config file (e.g., `--num_processes=4`).",
      "success_rate": 0.9,
      "how": "Use `accelerate launch` without any device_map or device argument in the script; let accelerate handle device placement via its config file (e.g., `--num_processes=4`).",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Remove the `device` argument and only use `device_map='auto'` (or a custom dict) when loading the model. Example: `model = AutoModelForCausalLM.from_pretrained('model-name', device_map='auto')`. The accelerate launcher will handle multi-GPU placement automatically.",
    "If you must set a specific device, do not use the accelerate launcher; instead, use `with torch.device('cuda:0'): model = ...` and manually wrap with DataParallel or DistributedDataParallel.",
    "Use `accelerate launch` without any device_map or device argument in the script; let accelerate handle device placement via its config file (e.g., `--num_processes=4`)."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://huggingface.co/docs/accelerate/en/package_reference/launcher",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.86,
  "fix_success_rate": 0.9,
  "resolvable": "true",
  "first_seen": "2023-05-10",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}