{
  "id": "pytorch/dataloader-pin-memory-cuda",
  "signature": "RuntimeError: DataLoader worker (pid 12345) pin_memory(): CUDA error: invalid device context",
  "signature_zh": "RuntimeError：DataLoader工作进程（pid 12345）pin_memory()：CUDA错误：无效的设备上下文",
  "regex": "DataLoader worker.*pin_memory.*CUDA error: invalid device context",
  "domain": "pytorch",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "DataLoader with pin_memory=True spawns workers that attempt to use CUDA from forked processes, causing invalid device context due to CUDA not supporting fork after initialization.",
  "root_cause_type": "generic",
  "root_cause_zh": "使用pin_memory=True的DataLoader会生成工作进程，这些进程尝试从fork的子进程中使用CUDA，导致设备上下文无效，因为CUDA在初始化后不支持fork。",
  "versions": [
    {
      "version": "torch>=1.6.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "CUDA>=11.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "Workers need CUDA context for pin_memory; hiding devices breaks the purpose.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "More workers increase chance of CUDA context issues.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Set multiprocessing start method to 'spawn': torch.multiprocessing.set_start_method('spawn', force=True) before creating DataLoader",
      "success_rate": 0.95,
      "how": "Set multiprocessing start method to 'spawn': torch.multiprocessing.set_start_method('spawn', force=True) before creating DataLoader",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use pin_memory=False in DataLoader and manually move tensors to GPU after loading",
      "success_rate": 0.85,
      "how": "Use pin_memory=False in DataLoader and manually move tensors to GPU after loading",
      "condition": "",
      "sources": []
    },
    {
      "action": "Move CUDA initialization after DataLoader creation or use single-process loading with num_workers=0",
      "success_rate": 0.9,
      "how": "Move CUDA initialization after DataLoader creation or use single-process loading with num_workers=0",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Set multiprocessing start method to 'spawn': torch.multiprocessing.set_start_method('spawn', force=True) before creating DataLoader",
    "Use pin_memory=False in DataLoader and manually move tensors to GPU after loading",
    "Move CUDA initialization after DataLoader creation or use single-process loading with num_workers=0"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://pytorch.org/docs/stable/data.html#multi-process-data-loading",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.86,
  "fix_success_rate": 0.88,
  "resolvable": "true",
  "first_seen": "2023-02-10",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}