{
  "id": "cloud/gcp-gke-node-pool-upgrade-failed",
  "signature": "Node pool upgrade failed: Resource exhausted: insufficient CPU available in zone us-central1-a",
  "signature_zh": "节点池升级失败：资源耗尽：区域 us-central1-a 中 CPU 不足",
  "regex": "Node pool upgrade failed: Resource exhausted: insufficient .* in zone",
  "domain": "cloud",
  "category": "resource_error",
  "subcategory": null,
  "root_cause": "GKE cannot allocate new nodes during upgrade because the specified zone has insufficient CPU quota or capacity to host the additional temporary nodes required for the rolling update.",
  "root_cause_type": "generic",
  "root_cause_zh": "GKE 在升级期间无法分配新节点，因为指定区域的 CPU 配额或容量不足，无法容纳滚动更新所需的额外临时节点。",
  "versions": [
    {
      "version": "GKE: 1.28.5-gke.1500",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Kubernetes: 1.28",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Compute Engine: API v1",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "More nodes consume more quota, worsening the exhaustion; the upgrade needs additional quota for temporary nodes, not larger pool.",
      "fail_rate": 0.85,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Deletion frees quota but the new pool creation may still fail if zone capacity is insufficient at that time.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Smaller instances may not meet workload requirements; also, the zone may still lack capacity for any instance type.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Request a quota increase in the GCP Console for Compute Engine CPUs in the affected region: IAM & Admin > Quotas > 'CPUs' > Edit Quota.",
      "success_rate": 0.9,
      "how": "Request a quota increase in the GCP Console for Compute Engine CPUs in the affected region: IAM & Admin > Quotas > 'CPUs' > Edit Quota.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use a surge upgrade with a different zone by adding a node pool in a zone with available capacity, then migrate workloads.",
      "success_rate": 0.85,
      "how": "Use a surge upgrade with a different zone by adding a node pool in a zone with available capacity, then migrate workloads.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Temporarily reduce the number of replicas in the cluster to free up quota, then perform the upgrade.",
      "success_rate": 0.8,
      "how": "Temporarily reduce the number of replicas in the cluster to free up quota, then perform the upgrade.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "在 GCP 控制台中为受影响区域的 Compute Engine CPU 请求增加配额：IAM 与管理 > 配额 > 'CPU' > 编辑配额。",
    "使用不同区域的激增升级，在可用容量充足的区域添加节点池，然后迁移工作负载。",
    "临时减少集群中的副本数以释放配额，然后执行升级。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://cloud.google.com/kubernetes-engine/docs/how-to/upgrading-a-cluster",
  "official_doc_section": null,
  "error_code": "RESOURCE_EXHAUSTED",
  "verification_tier": "ai_generated",
  "confidence": 0.86,
  "fix_success_rate": 0.82,
  "resolvable": "true",
  "first_seen": "2024-09-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}