{ "id": "cuda/illegal-memory-access-after-free", "signature": "RuntimeError: CUDA error: an illegal memory access was encountered after a cudaFree call on a tensor still in use", "signature_zh": "运行时错误：CUDA错误：在释放仍在使用的张量后遇到非法内存访问", "regex": "illegal memory access was encountered after a cudaFree", "domain": "cuda", "category": "runtime_error", "subcategory": null, "root_cause": "A tensor or buffer was freed via cudaFree or torch.cuda.empty_cache while a kernel or asynchronous operation still holds a reference, leading to a use-after-free on the GPU.", "root_cause_type": "generic", "root_cause_zh": "张量或缓冲区通过cudaFree或torch.cuda.empty_cache被释放，而内核或异步操作仍持有引用，导致GPU上的释放后使用。", "versions": [ { "version": "CUDA 12.2", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "PyTorch 2.2.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "NVIDIA Driver 550.54.14", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "Synchronization may hide the bug but does not fix the root cause; the free still happens before all uses complete.", "fail_rate": 0.7, "condition": "", "sources": [] }, { "action": "", "why_fails": "Memory size is unrelated; the error is about lifetime management, not capacity.", "fail_rate": 0.95, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.", "success_rate": 0.85, "how": "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.", "condition": "", "sources": [] }, { "action": "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration.", "success_rate": 0.82, "how": "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration.", "condition": "", "sources": [] } ], "workarounds_zh": [ "Ensure all CUDA streams are synchronized before freeing tensors. Example: torch.cuda.synchronize() before calling del tensor or torch.cuda.empty_cache(). For custom kernels, use cudaStreamSynchronize on the relevant stream.", "Use reference counting or weak references to track tensor lifetimes. In PyTorch, keep a strong reference to the tensor until the kernel completes, e.g., by storing it in a list until the next iteration." ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://docs.nvidia.com/cuda/cuda-runtime-api/api-sync-behavior.html", "official_doc_section": null, "error_code": "cudaErrorIllegalAddress", "verification_tier": "ai_generated", "confidence": 0.82, "fix_success_rate": 0.79, "resolvable": "true", "first_seen": "2025-01-20", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }