{
  "id": "llm/seed-parameter-ignored-with-low-temp",
  "signature": "Warning: seed parameter may not produce deterministic results with temperature close to 0",
  "signature_zh": "警告：当温度接近 0 时，seed 参数可能无法产生确定性结果",
  "regex": "seed parameter may not produce deterministic results",
  "domain": "llm",
  "category": "runtime_error",
  "subcategory": null,
  "root_cause": "Even with temperature=0, some LLM providers (e.g., OpenAI) do not guarantee full determinism due to GPU non-determinism, batching, or model updates, and seed is only a best-effort hint.",
  "root_cause_type": "generic",
  "root_cause_zh": "即使 temperature=0，某些 LLM 提供商（如 OpenAI）也不能保证完全确定性，因为 GPU 非确定性、批处理或模型更新，seed 仅作为尽力而为的提示。",
  "versions": [
    {
      "version": "openai-python>=1.0.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "gpt-4-turbo-2024-04-09",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "gpt-3.5-turbo-0125",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "This is the standard approach but still fails; the warning indicates it's not a configuration issue but a platform limitation.",
      "fail_rate": 0.6,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "All seeds behave identically; the non-determinism is inherent to the API, not seed-specific.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Streaming vs non-streaming both exhibit the same non-determinism at the output level.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.",
      "success_rate": 0.85,
      "how": "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.",
      "success_rate": 0.9,
      "how": "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous.",
      "success_rate": 0.7,
      "how": "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.",
    "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.",
    "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://platform.openai.com/docs/guides/text-generation/reproducible-outputs",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.85,
  "fix_success_rate": 0.75,
  "resolvable": "partial",
  "first_seen": "2024-02-20",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}