{
  "id": "cloud/gcp-cloud-run-cold-start-http-503",
  "signature": "HTTP 503 Service Unavailable: The request failed because the service is scaling up. Try again later.",
  "signature_zh": "HTTP 503 服务不可用：请求失败，因为服务正在扩展。请稍后重试。",
  "regex": "HTTP 503.*Service Unavailable.*scaling up",
  "domain": "cloud",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "Cloud Run's cold start latency (due to container image pull and startup) exceeds the request timeout, causing the load balancer to return 503 before the container is ready.",
  "root_cause_type": "generic",
  "root_cause_zh": "Cloud Run 的冷启动延迟（因容器镜像拉取和启动）超过请求超时时间，导致负载均衡器在容器就绪前返回 503。",
  "versions": [
    {
      "version": "Cloud Run (fully managed) gen2",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Cloud Run for Anthos 1.28",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "Simply retrying the request without addressing cold start may succeed eventually but adds latency and costs.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Increasing max instances doesn't reduce cold start frequency; it only limits concurrency.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Setting min instances to 1 reduces cold start for the first instance but doesn't help if all instances are busy.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Set min instances to at least 1 to keep a warm instance always ready: gcloud run deploy SERVICE --min-instances 1. For production, use 2-3 to handle traffic spikes.",
      "success_rate": 0.85,
      "how": "Set min instances to at least 1 to keep a warm instance always ready: gcloud run deploy SERVICE --min-instances 1. For production, use 2-3 to handle traffic spikes.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Optimize container startup: use distroless base images, reduce image size, and move initialization to a background thread. Example Dockerfile: FROM gcr.io/distroless/java17-debian11, then use Spring Boot's lazy initialization.",
      "success_rate": 0.8,
      "how": "Optimize container startup: use distroless base images, reduce image size, and move initialization to a background thread. Example Dockerfile: FROM gcr.io/distroless/java17-debian11, then use Spring Boot's lazy initialization.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Enable 'startup CPU boost' to allocate additional CPU during container startup: gcloud run deploy SERVICE --cpu-boost",
      "success_rate": 0.75,
      "how": "Enable 'startup CPU boost' to allocate additional CPU during container startup: gcloud run deploy SERVICE --cpu-boost",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "将最小实例数设置为至少 1 以保持一个常驻实例：gcloud run deploy SERVICE --min-instances 1。生产环境建议设为 2-3 以应对流量峰值。",
    "优化容器启动：使用 distroless 基础镜像、减小镜像体积、将初始化移至后台线程。示例 Dockerfile：FROM gcr.io/distroless/java17-debian11，然后使用 Spring Boot 的懒加载。",
    "启用 'startup CPU boost' 在容器启动期间分配额外 CPU：gcloud run deploy SERVICE --cpu-boost"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://cloud.google.com/run/docs/troubleshooting#503-errors",
  "official_doc_section": null,
  "error_code": "HTTP 503",
  "verification_tier": "ai_generated",
  "confidence": 0.87,
  "fix_success_rate": 0.8,
  "resolvable": "partial",
  "first_seen": "2023-09-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}