{ "id": "cuda/tensor-core-unsupported-arch", "signature": "RuntimeError: Tensor Cores are not supported on the current device architecture (compute capability < 7.0)", "signature_zh": "运行时错误：当前设备架构不支持张量核心（计算能力 < 7.0）", "regex": "Tensor Cores are not supported on the current device architecture", "domain": "cuda", "category": "type_error", "subcategory": null, "root_cause": "The GPU compute capability is below 7.0 (Volta), which is required for Tensor Core operations like mixed-precision training with float16 or bfloat16.", "root_cause_type": "generic", "root_cause_zh": "GPU 计算能力低于 7.0（Volta），这是张量核心操作（如使用 float16 或 bfloat16 的混合精度训练）所必需的。", "versions": [ { "version": "CUDA 11.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "CUDA 12.1", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "CUDA 12.4", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "Upgrading the CUDA toolkit does not add Tensor Core support to older GPU architectures.", "fail_rate": 0.9, "condition": "", "sources": [] }, { "action": "", "why_fails": "Setting environment variable CUDA_LAUNCH_BLOCKING=1 does not enable Tensor Cores; it only serializes kernel launches.", "fail_rate": 0.8, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Disable Tensor Core usage by setting torch.backends.cuda.matmul.allow_tf32 = False and torch.backends.cudnn.allow_tf32 = False, and use float32 precision instead of float16. For example: model.half() should be replaced with model.float(); and in training, use torch.amp.autocast(device_type='cuda', enabled=False).", "success_rate": 0.9, "how": "Disable Tensor Core usage by setting torch.backends.cuda.matmul.allow_tf32 = False and torch.backends.cudnn.allow_tf32 = False, and use float32 precision instead of float16. For example: model.half() should be replaced with model.float(); and in training, use torch.amp.autocast(device_type='cuda', enabled=False).", "condition": "", "sources": [] }, { "action": "If Tensor Cores are essential, migrate to a GPU with compute capability >= 7.0 (e.g., Tesla V100, RTX 20 series, or newer). Check your GPU's compute capability at https://developer.nvidia.com/cuda-gpus.", "success_rate": 0.95, "how": "If Tensor Cores are essential, migrate to a GPU with compute capability >= 7.0 (e.g., Tesla V100, RTX 20 series, or newer). Check your GPU's compute capability at https://developer.nvidia.com/cuda-gpus.", "condition": "", "sources": [] } ], "workarounds_zh": [ "Disable Tensor Core usage by setting torch.backends.cuda.matmul.allow_tf32 = False and torch.backends.cudnn.allow_tf32 = False, and use float32 precision instead of float16. For example: model.half() should be replaced with model.float(); and in training, use torch.amp.autocast(device_type='cuda', enabled=False).", "If Tensor Cores are essential, migrate to a GPU with compute capability >= 7.0 (e.g., Tesla V100, RTX 20 series, or newer). Check your GPU's compute capability at https://developer.nvidia.com/cuda-gpus." ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://docs.nvidia.com/deeplearning/cudnn/api/index.html#cudnnSetTensorNdDescriptor", "official_doc_section": null, "error_code": "CUDNN_STATUS_ARCH_MISMATCH", "verification_tier": "ai_generated", "confidence": 0.86, "fix_success_rate": 0.9, "resolvable": "true", "first_seen": "2024-01-20", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }