{
  "id": "tensorflow/tf-data-cache-corruption",
  "signature": "InternalError: TF_DATA cache file '/tmp/tf_data_cache_abc123' is corrupted: expected header size 1024 but got 512",
  "signature_zh": "InternalError：TF_DATA缓存文件'/tmp/tf_data_cache_abc123'已损坏：期望头部大小1024，但实际为512",
  "regex": "InternalError: TF_DATA cache file '.*' is corrupted: expected header size \\d+ but got \\d+",
  "domain": "tensorflow",
  "category": "data_error",
  "subcategory": null,
  "root_cause": "The tf.data service cache file was partially written due to an abrupt process termination or disk full condition, causing a mismatch in the expected header size.",
  "root_cause_type": "generic",
  "root_cause_zh": "tf.data服务缓存文件因进程意外终止或磁盘空间不足而部分写入，导致预期头部大小与实际不符。",
  "versions": [
    {
      "version": "tensorflow>=2.15.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "python>=3.10",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "Increasing the size of the cache by setting tf.data.experimental.service.CACHE_MAX_SIZE.",
      "why_fails": "The error is about file corruption, not capacity; a larger cache does not fix a corrupted file header.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "Reinstalling TensorFlow to fix the cache mechanism.",
      "why_fails": "The corruption is specific to the cache file on disk, not the TensorFlow installation; reinstalling does not remove the corrupted file.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Delete the corrupted cache file manually: `rm /tmp/tf_data_cache_abc123` (or the path in the error), then re-run the pipeline. The cache will be regenerated.",
      "success_rate": 0.95,
      "how": "Delete the corrupted cache file manually: `rm /tmp/tf_data_cache_abc123` (or the path in the error), then re-run the pipeline. The cache will be regenerated.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Disable caching for the dataset by removing the `.cache()` call or setting `cache=''` in the dataset creation, and rely on in-memory caching instead.",
      "success_rate": 0.9,
      "how": "Disable caching for the dataset by removing the `.cache()` call or setting `cache=''` in the dataset creation, and rely on in-memory caching instead.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Delete the corrupted cache file manually: `rm /tmp/tf_data_cache_abc123` (or the path in the error), then re-run the pipeline. The cache will be regenerated.",
    "Disable caching for the dataset by removing the `.cache()` call or setting `cache=''` in the dataset creation, and rely on in-memory caching instead."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://www.tensorflow.org/api_docs/python/tf/data/Dataset#cache",
  "official_doc_section": null,
  "error_code": "TDC",
  "verification_tier": "ai_generated",
  "confidence": 0.83,
  "fix_success_rate": 0.95,
  "resolvable": "true",
  "first_seen": "2024-05-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}