{
  "id": "api/http-504-gateway-timeout-upstream-connection-pool-exhausted",
  "signature": "504 Gateway Timeout: upstream connection pool exhausted",
  "signature_zh": "504 网关超时：上游连接池耗尽",
  "regex": "504\\s*(?:Gateway\\s*Timeout)?[\\s\\S]*connection\\s*pool\\s*(?:exhausted|full|max)",
  "domain": "api",
  "category": "resource_error",
  "subcategory": null,
  "root_cause": "The API gateway's connection pool to the upstream service is fully utilized, causing new requests to queue and eventually time out, often due to slow upstream responses or insufficient pool size.",
  "root_cause_type": "generic",
  "root_cause_zh": "API 网关到上游服务的连接池已完全占用，导致新请求排队并最终超时，通常是由于上游响应缓慢或连接池大小不足。",
  "versions": [
    {
      "version": "Nginx 1.26+",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Envoy 1.30+",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "AWS ALB (2024)",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The pool will quickly exhaust again if the upstream is slow or the pool size is too small; it's a temporary fix.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Timeouts only delay the error; the pool remains exhausted and requests still queue.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "The error is about connection pool capacity, not network connectivity.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Increase the upstream connection pool size. Example Nginx: `upstream backend { server 10.0.1.5:8080; keepalive 100; }` (increase from default 32).",
      "success_rate": 0.9,
      "how": "Increase the upstream connection pool size. Example Nginx: `upstream backend { server 10.0.1.5:8080; keepalive 100; }` (increase from default 32).",
      "condition": "",
      "sources": []
    },
    {
      "action": "Optimize upstream response time by adding caching, reducing database queries, or scaling upstream instances. Monitor upstream latency with tools like `nginx_upstream_check_module`.",
      "success_rate": 0.85,
      "how": "Optimize upstream response time by adding caching, reducing database queries, or scaling upstream instances. Monitor upstream latency with tools like `nginx_upstream_check_module`.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Implement connection pooling limits per client IP or rate limiting at the gateway to prevent abuse. Example Nginx: `limit_conn_zone $binary_remote_addr zone=addr:10m; limit_conn addr 10;`.",
      "success_rate": 0.8,
      "how": "Implement connection pooling limits per client IP or rate limiting at the gateway to prevent abuse. Example Nginx: `limit_conn_zone $binary_remote_addr zone=addr:10m; limit_conn addr 10;`.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "增加上游连接池大小。Nginx 示例：`upstream backend { server 10.0.1.5:8080; keepalive 100; }`（从默认的 32 增加）。",
    "通过添加缓存、减少数据库查询或扩展上游实例来优化上游响应时间。使用 `nginx_upstream_check_module` 等工具监控上游延迟。",
    "在网关处实现按客户端 IP 的连接池限制或速率限制以防止滥用。Nginx 示例：`limit_conn_zone $binary_remote_addr zone=addr:10m; limit_conn addr 10;`。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://nginx.org/en/docs/http/ngx_http_upstream_module.html#keepalive",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.87,
  "fix_success_rate": 0.85,
  "resolvable": "true",
  "first_seen": "2024-06-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}