{
  "id": "cuda/illegal-memory-access-after-free",
  "signature": "RuntimeError: CUDA error: an illegal memory access was encountered after a cudaFree call on a tensor still in use",
  "signature_zh": "运行时错误：CUDA错误：在释放仍在使用的张量后遇到非法内存访问",
  "regex": "illegal memory access was encountered after a cudaFree",
  "domain": "cuda",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "A tensor or buffer was freed via cudaFree or torch.cuda.empty_cache while a kernel or asynchronous operation still holds a reference, leading to a use-after-free on the GPU.",
  "root_cause_type": "generic",
  "root_cause_zh": "张量或缓冲区通过cudaFree或torch.cuda.empty_cache被释放，而内核或异步操作仍持有引用，导致GPU上的释放后使用。",
  "versions": [
    {
      "version": "CUDA 12.2",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "PyTorch 2.2.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "NVIDIA Driver 550.54.14",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "Synchronization may hide the bug but does not fix the root cause; the free still happens before all uses complete.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Memory size is unrelated; the error is about lifetime management, not capacity.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.",
      "success_rate": 0.85,
      "how": "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration.",
      "success_rate": 0.82,
      "how": "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.",
    "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://docs.nvidia.com/cuda/cuda-runtime-api/api-sync-behavior.html",
  "official_doc_section": null,
  "error_code": "cudaErrorIllegalAddress",
  "verification_tier": "ai_generated",
  "confidence": 0.82,
  "fix_success_rate": 0.79,
  "resolvable": "true",
  "first_seen": "2025-01-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}