{
  "id": "cuda/cudagraph-memory-pool-mismatch",
  "signature": "RuntimeError: CUDA error: the graph update was not performed because it included changes which violated constraints specific to instantiated CUDA graphs (cudaErrorGraphUpdateViolation) - memory pool mismatch",
  "signature_zh": "运行时错误：CUDA 错误：未执行图更新，因为它包含违反实例化 CUDA 图特定约束的更改 (cudaErrorGraphUpdateViolation) - 内存池不匹配",
  "regex": "cudaErrorGraphUpdateViolation.*memory pool",
  "domain": "cuda",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "When updating a CUDA graph using `cudaGraphInstantiate` with `cudaGraphInstantiateFlagAutoFreeOnLaunch`, the new graph nodes reference a different memory pool than the original instantiation, which is not allowed because the graph's memory pool is fixed after instantiation.",
  "root_cause_type": "generic",
  "root_cause_zh": "当使用 `cudaGraphInstantiate` 和 `cudaGraphInstantiateFlagAutoFreeOnLaunch` 更新 CUDA 图时，新图节点引用了与原始实例化不同的内存池，这是不允许的，因为图的内存池在实例化后是固定的。",
  "versions": [
    {
      "version": "CUDA 12.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "CUDA 12.3",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "PyTorch 2.3.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "NVIDIA Driver 545.23",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "Calling `torch.cuda.empty_cache()` before graph capture to free memory",
      "why_fails": "Emptying the cache does not change the memory pool assignment; the graph will still capture from the default pool, and the update will still fail if the pool changes.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "Using `cudaGraphInstantiate` without the `AutoFreeOnLaunch` flag",
      "why_fails": "While this avoids the pool mismatch error, it disables automatic memory management and may lead to memory leaks or performance degradation.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    },
    {
      "action": "Rebuilding the entire graph from scratch instead of updating",
      "why_fails": "Rebuilding works but is inefficient; the error is about update constraints, not about graph creation itself.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Ensure that all tensors used in the graph are allocated from the same memory pool by using `torch.cuda.cudart().cudaMemPoolSetAttribute` to create a custom pool and explicitly assigning it to all tensors before graph capture. Alternatively, avoid using `cudaGraphInstantiateFlagAutoFreeOnLaunch` and manage memory manually.",
      "success_rate": 0.85,
      "how": "Ensure that all tensors used in the graph are allocated from the same memory pool by using `torch.cuda.cudart().cudaMemPoolSetAttribute` to create a custom pool and explicitly assigning it to all tensors before graph capture. Alternatively, avoid using `cudaGraphInstantiateFlagAutoFreeOnLaunch` and manage memory manually.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Instead of updating the graph, capture a new graph each time the memory pool changes. Use `torch.cuda.CUDAGraph` and call `graph.replay()` only if the input shapes and memory pools are unchanged. If they change, call `graph.capture_begin()` again to recapture.",
      "success_rate": 0.9,
      "how": "Instead of updating the graph, capture a new graph each time the memory pool changes. Use `torch.cuda.CUDAGraph` and call `graph.replay()` only if the input shapes and memory pools are unchanged. If they change, call `graph.capture_begin()` again to recapture.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Set environment variable `CUDA_GRAPH_DEBUG=1` to enable verbose logging from the CUDA graph runtime, which prints the memory pool addresses and helps identify which node causes the mismatch.",
      "success_rate": 0.75,
      "how": "Set environment variable `CUDA_GRAPH_DEBUG=1` to enable verbose logging from the CUDA graph runtime, which prints the memory pool addresses and helps identify which node causes the mismatch.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "通过使用 `torch.cuda.cudart().cudaMemPoolSetAttribute` 创建自定义池并在图捕获前显式将其分配给所有张量，确保图中使用的所有张量来自同一内存池。或者，避免使用 `cudaGraphInstantiateFlagAutoFreeOnLaunch` 并手动管理内存。",
    "不要更新图，而是在内存池更改时每次捕获一个新图。使用 `torch.cuda.CUDAGraph`，并且仅在输入形状和内存池未更改时调用 `graph.replay()`。如果它们更改，再次调用 `graph.capture_begin()` 重新捕获。",
    "设置环境变量 `CUDA_GRAPH_DEBUG=1` 以启用 CUDA 图运行时的详细日志记录，这会打印内存池地址并帮助识别导致不匹配的节点。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH_1g1a5b9a2b8c3f4e5d6a7b8c9d0e1f2a3b",
  "official_doc_section": null,
  "error_code": "cudaErrorGraphUpdateViolation",
  "verification_tier": "ai_generated",
  "confidence": 0.86,
  "fix_success_rate": 0.8,
  "resolvable": "true",
  "first_seen": "2024-03-10",
  "last_confirmed": "2024-06-01",
  "last_updated": "2025-04-20",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}