{
  "id": "cuda/cublas-api-error-on-shutdown",
  "signature": "CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling cublasCreate_v2",
  "signature_zh": "CUDA 错误：调用 cublasCreate_v2 时 CUBLAS_STATUS_ALLOC_FAILED",
  "regex": "CUBLAS_STATUS_ALLOC_FAILED when calling cublasCreate_v2",
  "domain": "cuda",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "cuBLAS handle allocation fails due to insufficient GPU memory or driver state corruption, often triggered during rapid context creation/destruction or after a previous CUDA error left the device in an inconsistent state.",
  "root_cause_type": "generic",
  "root_cause_zh": "cuBLAS 句柄分配失败，通常是由于 GPU 内存不足或驱动程序状态损坏，在快速创建/销毁上下文或在之前的 CUDA 错误使设备处于不一致状态后触发。",
  "versions": [
    {
      "version": "CUDA 11.8",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "CUDA 12.1",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "cuBLAS 11.11",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "cuBLAS 12.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The previous CUDA context may still be alive, and residual allocations prevent new handle creation; a full GPU reset or process kill is needed.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "The error is not about insufficient memory for tensors but about handle allocation; larger batch sizes exacerbate memory pressure.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "The issue is often runtime state corruption, not a missing library; driver version mismatch can cause other errors, but this specific error persists.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Reset the CUDA device by calling `torch.cuda.reset_peak_memory_stats()` and `torch.cuda.empty_cache()` before creating new cuBLAS handles. Then reinitialize the model in a fresh context.",
      "success_rate": 0.7,
      "how": "Reset the CUDA device by calling `torch.cuda.reset_peak_memory_stats()` and `torch.cuda.empty_cache()` before creating new cuBLAS handles. Then reinitialize the model in a fresh context.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Kill all processes using the GPU with `nvidia-smi` and restart the application. For persistent issues, reboot the machine to fully reset the GPU driver state.",
      "success_rate": 0.9,
      "how": "Kill all processes using the GPU with `nvidia-smi` and restart the application. For persistent issues, reboot the machine to fully reset the GPU driver state.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Reset the CUDA device by calling `torch.cuda.reset_peak_memory_stats()` and `torch.cuda.empty_cache()` before creating new cuBLAS handles. Then reinitialize the model in a fresh context.",
    "Kill all processes using the GPU with `nvidia-smi` and restart the application. For persistent issues, reboot the machine to fully reset the GPU driver state."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://docs.nvidia.com/cuda/cublas/index.html#cublascreate",
  "official_doc_section": null,
  "error_code": "CUBLAS_STATUS_ALLOC_FAILED",
  "verification_tier": "ai_generated",
  "confidence": 0.85,
  "fix_success_rate": 0.75,
  "resolvable": "partial",
  "first_seen": "2023-03-15",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}