{
  "id": "elasticsearch/translog-corruption-during-recovery",
  "signature": "TranslogCorruptedException: translog corruption detected at position 67890 while recovering index [my_index] shard [0]",
  "signature_zh": "TranslogCorruptedException：在恢复索引 [my_index] 分片 [0] 时在位置 67890 检测到事务日志损坏",
  "regex": "TranslogCorruptedException.*translog corruption detected at position.*while recovering.*shard",
  "domain": "elasticsearch",
  "category": "system_error",
  "subcategory": null,
  "root_cause": "The transaction log file for a shard is corrupted, often due to abrupt node shutdown, disk errors, or filesystem issues, preventing shard recovery.",
  "root_cause_type": "generic",
  "root_cause_zh": "分片的事务日志文件损坏，通常由于节点突然关闭、磁盘错误或文件系统问题导致，阻止分片恢复。",
  "versions": [
    {
      "version": "Elasticsearch 7.16.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Elasticsearch 8.8.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Elasticsearch 8.15.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "This may cause data loss and prevent the shard from recovering at all because Elasticsearch expects a valid translog; the shard may become permanently unassigned.",
      "fail_rate": 0.85,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "If the corrupt shard is the primary, the cluster cannot allocate it, and reindexing from a snapshot may not include recent data not in the snapshot.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Use the Elasticsearch CLI tool `elasticsearch-shard` to truncate the translog. Run: `bin/elasticsearch-shard remove-corrupted-data --index my_index --shard 0`. This removes corrupted translog entries and allows the shard to recover with potential data loss of recent operations.",
      "success_rate": 0.8,
      "how": "Use the Elasticsearch CLI tool `elasticsearch-shard` to truncate the translog. Run: `bin/elasticsearch-shard remove-corrupted-data --index my_index --shard 0`. This removes corrupted translog entries and allows the shard to recover with potential data loss of recent operations.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Restore the shard from a snapshot. If a snapshot exists, delete the corrupt index and restore: `POST /_snapshot/my_repo/my_snapshot/_restore {\"indices\": \"my_index\", \"rename_pattern\": \"my_index\", \"rename_replacement\": \"my_index\"}`. Ensure the snapshot is recent enough.",
      "success_rate": 0.72,
      "how": "Restore the shard from a snapshot. If a snapshot exists, delete the corrupt index and restore: `POST /_snapshot/my_repo/my_snapshot/_restore {\"indices\": \"my_index\", \"rename_pattern\": \"my_index\", \"rename_replacement\": \"my_index\"}`. Ensure the snapshot is recent enough.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Use the Elasticsearch CLI tool `elasticsearch-shard` to truncate the translog. Run: `bin/elasticsearch-shard remove-corrupted-data --index my_index --shard 0`. This removes corrupted translog entries and allows the shard to recover with potential data loss of recent operations.",
    "Restore the shard from a snapshot. If a snapshot exists, delete the corrupt index and restore: `POST /_snapshot/my_repo/my_snapshot/_restore {\"indices\": \"my_index\", \"rename_pattern\": \"my_index\", \"rename_replacement\": \"my_index\"}`. Ensure the snapshot is recent enough."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/translog.html#translog-corruption",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.86,
  "fix_success_rate": 0.7,
  "resolvable": "partial",
  "first_seen": "2023-09-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}