{ "id": "cuda/cublas-alloc-failed-cublaslt", "signature": "RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling cublasLtMatmulAlgoGetHeuristic", "signature_zh": "运行时错误：CUDA 错误：调用 cublasLtMatmulAlgoGetHeuristic 时返回 CUBLAS_STATUS_ALLOC_FAILED", "regex": "CUBLAS_STATUS_ALLOC_FAILED when calling cublasLtMatmulAlgoGetHeuristic", "domain": "cuda", "category": "resource_error", "subcategory": null, "root_cause": "cuBLASLt heuristic search for matrix multiplication algorithms fails due to insufficient GPU memory, often caused by memory fragmentation or large workspace requirements.", "root_cause_type": "generic", "root_cause_zh": "cuBLASLt 的矩阵乘法算法启发式搜索失败，原因是 GPU 内存不足，通常由内存碎片化或工作区需求过大引起。", "versions": [ { "version": "CUDA 12.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "CUDA 12.3", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "cuBLASLt 0.8", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "PyTorch 2.2", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "Larger batch sizes increase memory usage, exacerbating the allocation failure. The error occurs due to insufficient memory for workspace, not underutilization.", "fail_rate": 0.95, "condition": "", "sources": [] }, { "action": "", "why_fails": "The workspace config controls the internal buffer size but doesn't directly fix allocation failures during heuristic search; it may even increase memory pressure.", "fail_rate": 0.8, "condition": "", "sources": [] }, { "action": "", "why_fails": "cuBLASLt is often the default for certain operations; disabling it may fall back to cuBLAS but can cause performance degradation or different errors.", "fail_rate": 0.5, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Reduce memory usage by lowering batch size or using gradient checkpointing. For example, in PyTorch: model = torch.utils.checkpoint.checkpoint(model, *inputs). This frees memory for the heuristic allocation.", "success_rate": 0.8, "how": "Reduce memory usage by lowering batch size or using gradient checkpointing. For example, in PyTorch: model = torch.utils.checkpoint.checkpoint(model, *inputs). This frees memory for the heuristic allocation.", "condition": "", "sources": [] }, { "action": "Clear GPU cache before the operation: torch.cuda.empty_cache(). This can defragment memory and free up contiguous blocks needed for cuBLASLt workspace.", "success_rate": 0.7, "how": "Clear GPU cache before the operation: torch.cuda.empty_cache(). This can defragment memory and free up contiguous blocks needed for cuBLASLt workspace.", "condition": "", "sources": [] }, { "action": "Restrict the number of algorithms searched by setting the environment variable: CUBLASLT_HEURISTIC_MODE=1. This reduces workspace allocation size during the heuristic search.", "success_rate": 0.75, "how": "Restrict the number of algorithms searched by setting the environment variable: CUBLASLT_HEURISTIC_MODE=1. This reduces workspace allocation size during the heuristic search.", "condition": "", "sources": [] } ], "workarounds_zh": [ "通过降低批量大小或使用梯度检查点来减少内存使用。例如，在 PyTorch 中：model = torch.utils.checkpoint.checkpoint(model, *inputs)。这会释放内存供启发式分配使用。", "在操作前清除 GPU 缓存：torch.cuda.empty_cache()。这可以整理内存碎片，释放 cuBLASLt 工作区所需的连续块。", "通过设置环境变量 CUBLASLT_HEURISTIC_MODE=1 限制搜索的算法数量。这会减少启发式搜索期间的工作区分配大小。" ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://docs.nvidia.com/cuda/cublaslt/index.html#cublasltmatmulalgogetheuristic", "official_doc_section": null, "error_code": "CUBLAS_STATUS_ALLOC_FAILED", "verification_tier": "ai_generated", "confidence": 0.86, "fix_success_rate": 0.78, "resolvable": "true", "first_seen": "2024-01-15", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }