{ "id": "huggingface/tokenizer-decoder-max-length-overflow", "signature": "Token indices sequence length is longer than the specified maximum sequence length for this model (2048 > 1024). Running out-of-order", "signature_zh": "令牌索引序列长度超过模型指定的最大序列长度 (2048 > 1024)。运行顺序错误", "regex": "Token indices sequence length is longer than the specified maximum sequence length for this model \\(\\d+ > \\d+\\).*", "domain": "huggingface", "category": "runtime_error", "subcategory": null, "root_cause": "Input text is too long for the model's max_position_embeddings, causing tokenizer to truncate incorrectly or overflow without proper truncation settings.", "root_cause_type": "generic", "root_cause_zh": "输入文本长度超过模型的 max_position_embeddings，导致分词器未正确截断或溢出。", "versions": [ { "version": "transformers>=4.30.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "tokenizers>=0.13.0", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" }, { "version": "python>=3.8", "introduced": null, "deprecated": null, "removed": null, "behavior_change": null, "status": "active" } ], "os_specific": {}, "dead_ends": [ { "action": "", "why_fails": "truncation=False disables truncation entirely, leading to a hard crash rather than graceful handling.", "fail_rate": 0.6, "condition": "", "sources": [] }, { "action": "", "why_fails": "Model's learned positional embeddings only support up to max_position_embeddings; exceeding it leads to out-of-range errors.", "fail_rate": 0.8, "condition": "", "sources": [] } ], "workarounds": [ { "action": "Set truncation=True and max_length=512 when encoding inputs. Example: tokenizer(text, truncation=True, max_length=512, return_tensors='pt')", "success_rate": 0.9, "how": "Set truncation=True and max_length=512 when encoding inputs. Example: tokenizer(text, truncation=True, max_length=512, return_tensors='pt')", "condition": "", "sources": [] }, { "action": "Use a model with larger max_position_embeddings (e.g., 4096) or switch to a long-context model like Longformer.", "success_rate": 0.8, "how": "Use a model with larger max_position_embeddings (e.g., 4096) or switch to a long-context model like Longformer.", "condition": "", "sources": [] } ], "workarounds_zh": [ "在编码输入时设置 truncation=True 和 max_length=512。示例：tokenizer(text, truncation=True, max_length=512, return_tensors='pt')", "使用具有更大 max_position_embeddings（如 4096）的模型，或切换到长上下文模型如 Longformer。" ], "transition_graph": { "leads_to": [], "preceded_by": [], "frequently_confused_with": [] }, "official_doc_url": "https://huggingface.co/docs/transformers/main/en/llm_tutorial#truncation", "official_doc_section": null, "error_code": null, "verification_tier": "ai_generated", "confidence": 0.85, "fix_success_rate": 0.85, "resolvable": "true", "first_seen": "2023-11-15", "last_confirmed": "2024-06-01", "last_updated": "2024-06-01", "evidence_count": 1, "tags": [], "locale": "en", "aliases": [] }