{
  "id": "kubernetes/etcd-leader-election-timeout",
  "signature": "Error from server: etcdserver: request timed out, possible leader election",
  "signature_zh": "来自服务器的错误：etcdserver：请求超时，可能正在进行领导者选举",
  "regex": "Error from server: etcdserver: request timed out, possible leader election",
  "domain": "kubernetes",
  "category": "system_error",
  "subcategory": null,
  "root_cause": "The etcd cluster is experiencing a leader election or network partition, causing API server requests to time out.",
  "root_cause_type": "generic",
  "root_cause_zh": "etcd 集群正在进行领导者选举或遇到网络分区，导致 API 服务器请求超时。",
  "versions": [
    {
      "version": "etcd 3.5",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "kubernetes 1.27",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "kubernetes 1.28",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The API server is not the root cause; restarting it won't fix etcd instability.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Longer timeouts may mask the issue but don't address the underlying etcd cluster problem.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "If the cluster is in a leader election, rebooting nodes can worsen the situation and cause data loss.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Run `etcdctl endpoint health --cluster` and `etcdctl endpoint status --cluster -w table` to identify unhealthy members. If a leader is missing, ensure a majority of etcd nodes are reachable.",
      "success_rate": 0.8,
      "how": "Run `etcdctl endpoint health --cluster` and `etcdctl endpoint status --cluster -w table` to identify unhealthy members. If a leader is missing, ensure a majority of etcd nodes are reachable.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use `ETCDCTL_API=3 etcdctl snapshot restore /path/to/backup.db --data-dir /var/lib/etcd` on a new etcd instance, then restart the API server pointing to the restored etcd.",
      "success_rate": 0.7,
      "how": "Use `ETCDCTL_API=3 etcdctl snapshot restore /path/to/backup.db --data-dir /var/lib/etcd` on a new etcd instance, then restart the API server pointing to the restored etcd.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "运行 `etcdctl endpoint health --cluster` 和 `etcdctl endpoint status --cluster -w table` 来识别不健康的成员。如果缺少领导者，确保大多数 etcd 节点可达。",
    "使用 `ETCDCTL_API=3 etcdctl snapshot restore /path/to/backup.db --data-dir /var/lib/etcd` 在新的 etcd 实例上，然后重启指向恢复后 etcd 的 API 服务器。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://etcd.io/docs/v3.5/faq/#what-does-etcd-request-timed-out-mean",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.85,
  "fix_success_rate": 0.75,
  "resolvable": "partial",
  "first_seen": "2023-06-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}