{
  "id": "kubernetes/job-backoff-limit-exceeded",
  "signature": "Job has reached the specified backoff limit",
  "signature_zh": "Job 已达到指定的回退限制",
  "regex": "Job has reached the specified backoff limit",
  "domain": "kubernetes",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "A Kubernetes Job's pod has failed more times than the backoffLimit allows, causing the Job to stop retrying.",
  "root_cause_type": "generic",
  "root_cause_zh": "Kubernetes Job 的 Pod 失败次数超过了 backoffLimit 允许的值，导致 Job 停止重试。",
  "versions": [
    {
      "version": "Kubernetes v1.26.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Kubernetes v1.28.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Kubernetes v1.30.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "Increasing backoffLimit to a very high number without fixing the underlying pod failure",
      "why_fails": "The Job will still fail after exhausting the new limit; the root cause in the container remains.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    },
    {
      "action": "Deleting and recreating the Job with the same spec",
      "why_fails": "The same pod failures will repeat because the container image or command is still broken.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Check the logs of the failed pod: `kubectl logs job/my-job --previous` to see the last attempt's error.",
      "success_rate": 0.9,
      "how": "Check the logs of the failed pod: `kubectl logs job/my-job --previous` to see the last attempt's error.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Fix the container command or image, then delete and recreate the Job: `kubectl delete job my-job && kubectl create job my-job --image=correct-image -- /correct-command`.",
      "success_rate": 0.85,
      "how": "Fix the container command or image, then delete and recreate the Job: `kubectl delete job my-job && kubectl create job my-job --image=correct-image -- /correct-command`.",
      "condition": "",
      "sources": []
    },
    {
      "action": "If the failure is transient, increase backoffLimit and add a restartPolicy: set `backoffLimit: 10` and `restartPolicy: OnFailure` in the Job spec.",
      "success_rate": 0.7,
      "how": "If the failure is transient, increase backoffLimit and add a restartPolicy: set `backoffLimit: 10` and `restartPolicy: OnFailure` in the Job spec.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "检查失败 Pod 的日志：`kubectl logs job/my-job --previous` 查看上次尝试的错误。",
    "修复容器命令或镜像，然后删除并重新创建 Job：`kubectl delete job my-job && kubectl create job my-job --image=correct-image -- /correct-command`。",
    "如果失败是瞬时的，增加 backoffLimit 并添加 restartPolicy：在 Job spec 中设置 `backoffLimit: 10` 和 `restartPolicy: OnFailure`。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://kubernetes.io/docs/concepts/workloads/controllers/job/#pod-backoff-failure-policy",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.8,
  "fix_success_rate": 0.85,
  "resolvable": "true",
  "first_seen": "2023-10-01",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}