{ "id": "llm/seed-parameter-ignored-with-low-temp", "signature": "Warning: seed parameter may not produce deterministic results with temperature close to 0", "signature_zh": "警告：当温度接近 0 时，seed 参数可能无法产生确定性结果", "regex": "seed parameter may not produce deterministic results", "domain": "llm", "category": "runtime_error", "subcategory": null, "root_cause": "Even with temperature=0, some LLM providers (e.g., OpenAI) do not guarantee full determinism due to GPU non-determinism, batching, or model updates, and seed is only a best-effort hint.", "root_cause_type": "generic", "root_cause_zh": "即使 temperature=0，某些 LLM 提供商（如 OpenAI）也不能保证完全确定性，因为 GPU 非确定性、批处理或模型更新，seed 仅作为尽力而为的提示。", "versions": [ { "version": "openai-python>=1.0.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "gpt-4-turbo-2024-04-09", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "gpt-3.5-turbo-0125", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "This is the standard approach but still fails; the warning indicates it's not a configuration issue but a platform limitation.", "fail_rate": 0.6, "condition": "", "sources": [] }, { "action": "", "why_fails": "All seeds behave identically; the non-determinism is inherent to the API, not seed-specific.", "fail_rate": 0.9, "condition": "", "sources": [] }, { "action": "", "why_fails": "Streaming vs non-streaming both exhibit the same non-determinism at the output level.", "fail_rate": 0.8, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.", "success_rate": 0.85, "how": "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.", "condition": "", "sources": [] }, { "action": "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.", "success_rate": 0.9, "how": "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.", "condition": "", "sources": [] }, { "action": "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous.", "success_rate": 0.7, "how": "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous.", "condition": "", "sources": [] } ], "workarounds_zh": [ "Accept non-determinism and implement idempotency in your application logic. For testing, compare outputs using fuzzy matching or semantic similarity instead of exact equality.", "Use a self-hosted model (e.g., Llama 3 with vLLM) where you can control CUDA determinism flags: `export CUBLAS_WORKSPACE_CONFIG=:4096:8` and set `torch.use_deterministic_algorithms(True)`.", "Log the full request parameters and response ID for reproducibility; retry with same parameters if output is anomalous." ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://platform.openai.com/docs/guides/text-generation/reproducible-outputs", "official_doc_section": null, "error_code": null, "verification_tier": "ai_generated", "confidence": 0.85, "fix_success_rate": 0.75, "resolvable": "partial", "first_seen": "2024-02-20", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }