{
  "id": "huggingface/gradient-checkpointing-disable-error",
  "signature": "RuntimeError: gradient_checkpointing requires use_cache=False",
  "signature_zh": "运行时错误：梯度检查点要求use_cache=False",
  "regex": "gradient_checkpointing requires use_cache=False",
  "domain": "huggingface",
  "category": "config_error",
  "subcategory": null,
  "root_cause": "Gradient checkpointing is incompatible with the key-value cache used during generation; use_cache must be disabled to enable gradient checkpointing.",
  "root_cause_type": "generic",
  "root_cause_zh": "梯度检查点与生成过程中使用的键值缓存不兼容；必须禁用use_cache才能启用梯度检查点。",
  "versions": [
    {
      "version": "transformers>=4.25.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The two features are mutually exclusive during training; use_cache is only for inference.",
      "fail_rate": 0.98,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "The model configuration is checked at runtime; manual deletion does not bypass the check.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Disable use_cache before enabling gradient checkpointing.",
      "success_rate": 0.95,
      "how": "Disable use_cache before enabling gradient checkpointing.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use the Trainer with gradient_checkpointing=True argument, which handles this automatically.",
      "success_rate": 0.9,
      "how": "Use the Trainer with gradient_checkpointing=True argument, which handles this automatically.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Disable use_cache before enabling gradient checkpointing.",
    "Use the Trainer with gradient_checkpointing=True argument, which handles this automatically."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://huggingface.co/docs/transformers/en/perf_train_gpu_one#gradient-checkpointing",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.9,
  "fix_success_rate": 0.93,
  "resolvable": "true",
  "first_seen": "2023-03-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}