{
  "id": "data/csv-encoding-utf8-with-bom-silent-corruption",
  "signature": "CSV file with UTF-8 BOM causes silent data corruption in Excel on Windows",
  "signature_zh": "带 UTF-8 BOM 的 CSV 文件在 Windows 上的 Excel 中导致静默数据损坏",
  "regex": ".*UTF-8.*BOM.*Excel.*corrupt.*|.*CSV.*encoding.*Excel.*",
  "domain": "data",
  "category": "encoding_error",
  "subcategory": null,
  "root_cause": "Excel on Windows interprets BOM-less UTF-8 CSV files as ANSI (Windows-1252), corrupting non-ASCII characters. Adding BOM fixes encoding detection but may cause issues with other tools that don't expect BOM.",
  "root_cause_type": "generic",
  "root_cause_zh": "Windows 上的 Excel 将无 BOM 的 UTF-8 CSV 文件解释为 ANSI（Windows-1252），损坏非 ASCII 字符。添加 BOM 可修复编码检测，但可能导致其他不期望 BOM 的工具出现问题。",
  "versions": [
    {
      "version": "Excel 2019",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Excel 365",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Excel 2021",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "This option adds BOM but also changes the file format slightly (e.g., quoting rules), and the file may not be re-importable correctly.",
      "fail_rate": 0.55,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "UTF-16 is not widely supported by CSV parsers and will cause issues with most data processing tools. It also doubles file size.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Add UTF-8 BOM to CSV files before opening in Excel. In Python: with open('output.csv', 'w', encoding='utf-8-sig') as f: writer = csv.writer(f); writer.writerows(data). The 'utf-8-sig' encoding adds BOM automatically. In command line: sed '1s/^/\\xef\\xbb\\xbf/' input.csv > output.csv",
      "success_rate": 0.95,
      "how": "Add UTF-8 BOM to CSV files before opening in Excel. In Python: with open('output.csv', 'w', encoding='utf-8-sig') as f: writer = csv.writer(f); writer.writerows(data). The 'utf-8-sig' encoding adds BOM automatically. In command line: sed '1s/^/\\xef\\xbb\\xbf/' input.csv > output.csv",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use Excel's 'Get Data from Text/CSV' feature instead of double-clicking: Data tab > Get Data > From File > From Text/CSV. Then choose UTF-8 encoding explicitly in the import wizard.",
      "success_rate": 0.9,
      "how": "Use Excel's 'Get Data from Text/CSV' feature instead of double-clicking: Data tab > Get Data > From File > From Text/CSV. Then choose UTF-8 encoding explicitly in the import wizard.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Add UTF-8 BOM to CSV files before opening in Excel. In Python: with open('output.csv', 'w', encoding='utf-8-sig') as f: writer = csv.writer(f); writer.writerows(data). The 'utf-8-sig' encoding adds BOM automatically. In command line: sed '1s/^/\\xef\\xbb\\xbf/' input.csv > output.csv",
    "Use Excel's 'Get Data from Text/CSV' feature instead of double-clicking: Data tab > Get Data > From File > From Text/CSV. Then choose UTF-8 encoding explicitly in the import wizard."
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://support.microsoft.com/en-us/office/import-or-export-text-txt-or-csv-files-5250ac4c-663c-47ce-937b-339e391393ba",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.9,
  "fix_success_rate": 0.92,
  "resolvable": "true",
  "first_seen": "2023-05-18",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}