{
  "id": "llm/embedding-length-mismatch-on-insert",
  "signature": "chromadb.errors.DimensionError: Inserted embedding dimension (1536) does not match collection dimension (768)",
  "signature_zh": "chromadb.errors.DimensionError: 插入的嵌入向量维度 (1536) 与集合维度 (768) 不匹配",
  "regex": "Inserted embedding dimension \\(\\d+\\) does not match collection dimension \\(\\d+\\)",
  "domain": "llm",
  "category": "data_error",
  "subcategory": null,
  "root_cause": "The embedding model used for insertion produces vectors of a different size than the collection's expected dimension, often due to switching embedding models or mismatched model versions.",
  "root_cause_type": "generic",
  "root_cause_zh": "用于插入的嵌入模型产生的向量大小与集合期望的维度不同，通常是由于切换了嵌入模型或模型版本不匹配。",
  "versions": [
    {
      "version": "chromadb>=0.4.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "sentence-transformers>=2.2.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "text-embedding-3-small",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "text-embedding-ada-002",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "The collection dimension is fixed at creation time; upserting doesn't change the schema.",
      "fail_rate": 1.0,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Padding or truncating destroys semantic meaning and leads to poor retrieval results; the vector space becomes inconsistent.",
      "fail_rate": 0.95,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Different models have different output dimensions; you must use the same model for all inserts in a collection.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Create a new collection with the correct dimension and re-embed all documents. Example: `collection = client.create_collection(name=\"my_collection\", embedding_function=embedding_function, metadata={\"hnsw:space\": \"cosine\"})` where `embedding_function` outputs 1536 dimensions.",
      "success_rate": 0.95,
      "how": "Create a new collection with the correct dimension and re-embed all documents. Example: `collection = client.create_collection(name=\"my_collection\", embedding_function=embedding_function, metadata={\"hnsw:space\": \"cosine\"})` where `embedding_function` outputs 1536 dimensions.",
      "condition": "",
      "sources": []
    },
    {
      "action": "If using a different embedding model temporarily, keep a mapping of model to collection, or use a router that selects the correct collection based on the model.",
      "success_rate": 0.8,
      "how": "If using a different embedding model temporarily, keep a mapping of model to collection, or use a router that selects the correct collection based on the model.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use a unified embedding model that supports variable dimensions (e.g., text-embedding-3-small with `dimensions` parameter) to enforce consistency.",
      "success_rate": 0.7,
      "how": "Use a unified embedding model that supports variable dimensions (e.g., text-embedding-3-small with `dimensions` parameter) to enforce consistency.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Create a new collection with the correct dimension and re-embed all documents. Example: `collection = client.create_collection(name=\"my_collection\", embedding_function=embedding_function, metadata={\"hnsw:space\": \"cosine\"})` where `embedding_function` outputs 1536 dimensions.",
    "If using a different embedding model temporarily, keep a mapping of model to collection, or use a router that selects the correct collection based on the model.",
    "Use a unified embedding model that supports variable dimensions (e.g., text-embedding-3-small with `dimensions` parameter) to enforce consistency."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://docs.trychroma.com/usage-guide#creating-collections",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.9,
  "fix_success_rate": 0.95,
  "resolvable": "true",
  "first_seen": "2023-11-05",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}