{ "id": "cuda/cudnn-rnn-double-backward", "signature": "RuntimeError: cuDNN error: CUDNN_STATUS_NOT_SUPPORTED when calling cudnnRNNBackwardData_v8 with training mode enabled and double backward", "signature_zh": "RuntimeError: 在训练模式下调用 cudnnRNNBackwardData_v8 并启用双重反向传播时出现 cuDNN 错误：CUDNN_STATUS_NOT_SUPPORTED", "regex": "CUDNN_STATUS_NOT_SUPPORTED.*cudnnRNNBackwardData", "domain": "cuda", "category": "runtime_error", "subcategory": null, "root_cause": "cuDNN RNN backward operations (especially backward data with double backward) are not supported for certain RNN modes (e.g., LSTM with projection) or when the input tensor requires grad and the graph is retained; cuDNN v8 restricts double backward support to specific configurations.", "root_cause_type": "generic", "root_cause_zh": "cuDNN RNN 反向传播操作（特别是反向数据与双重反向传播）在特定 RNN 模式（如带投影的 LSTM）下不受支持，或者当输入张量需要梯度且计算图被保留时；cuDNN v8 将双重反向传播支持限制为特定配置。", "versions": [ { "version": "cuDNN 8.9.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "cuDNN 8.9.5", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "PyTorch 2.1.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "PyTorch 2.2.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "Increasing cuDNN version does not add double backward support for all RNN modes; the limitation is architectural in cuDNN v8.", "fail_rate": 0.8, "condition": "", "sources": [] }, { "action": "", "why_fails": "Setting torch.backends.cudnn.enabled=False forces a fallback to non-cuDNN RNN but may cause performance regression or different numerical behavior; double backward still fails if the custom RNN does not support it.", "fail_rate": 0.7, "condition": "", "sources": [] }, { "action": "", "why_fails": "Using retain_graph=True without detaching intermediate activations does not prevent the error; the double backward path still triggers the unsupported cuDNN routine.", "fail_rate": 0.9, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Switch to a non-projected LSTM (e.g., remove projection layer) or use GRU instead, which has broader double backward support. Example: change nn.LSTM(input_size, hidden_size, proj_size=hidden_size) to nn.LSTM(input_size, hidden_size).", "success_rate": 0.85, "how": "Switch to a non-projected LSTM (e.g., remove projection layer) or use GRU instead, which has broader double backward support. Example: change nn.LSTM(input_size, hidden_size, proj_size=hidden_size) to nn.LSTM(input_size, hidden_size).", "condition": "", "sources": [] }, { "action": "Use torch.autograd.grad with create_graph=False for the backward pass, and manually implement double backward using torch.autograd.Function with a custom backward that does not rely on cuDNN RNN backward data.", "success_rate": 0.75, "how": "Use torch.autograd.grad with create_graph=False for the backward pass, and manually implement double backward using torch.autograd.Function with a custom backward that does not rely on cuDNN RNN backward data.", "condition": "", "sources": [] } ], "workarounds_zh": [ "Switch to a non-projected LSTM (e.g., remove projection layer) or use GRU instead, which has broader double backward support. Example: change nn.LSTM(input_size, hidden_size, proj_size=hidden_size) to nn.LSTM(input_size, hidden_size).", "Use torch.autograd.grad with create_graph=False for the backward pass, and manually implement double backward using torch.autograd.Function with a custom backward that does not rely on cuDNN RNN backward data." ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnRNNBackwardData", "official_doc_section": null, "error_code": "CUDNN_STATUS_NOT_SUPPORTED (5)", "verification_tier": "ai_generated", "confidence": 0.82, "fix_success_rate": 0.78, "resolvable": "partial", "first_seen": "2023-10-25", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }