{
  "id": "data/csv-encoding-iso-8859-1-vs-utf-8",
  "signature": "CSV parsing error: UnicodeDecodeError with 'charmap' codec when reading ISO-8859-1 encoded file as UTF-8",
  "signature_zh": "CSV解析错误：将ISO-8859-1编码文件作为UTF-8读取时出现'charmap'编解码器的UnicodeDecodeError",
  "regex": "UnicodeDecodeError.*'charmap' codec can't decode byte.*in position.*character maps to <undefined>",
  "domain": "data",
  "category": "encoding_error",
  "subcategory": null,
  "root_cause": "A CSV file encoded in ISO-8859-1 (Latin-1) contains byte sequences invalid in UTF-8 (e.g., accented characters like 'é' or 'ñ'), causing the default UTF-8 decoder to raise a UnicodeDecodeError.",
  "root_cause_type": "generic",
  "root_cause_zh": "一个以ISO-8859-1（Latin-1）编码的CSV文件包含在UTF-8中无效的字节序列（例如重音字符如'é'或'ñ'），导致默认的UTF-8解码器引发UnicodeDecodeError。",
  "versions": [
    {
      "version": "Python 3.10+",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "pandas 2.0+",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "Python csv module",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "Ignoring errors silently drops characters, leading to data corruption. For example, 'José' becomes 'Jos'.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Notepad++ may misinterpret the original encoding if auto-detect is wrong, or double-encode characters, producing mojibake.",
      "fail_rate": 0.5,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Excel may add a BOM, change delimiter to semicolon based on locale, or truncate leading zeros in numeric fields.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Detect and specify the correct encoding. Use `chardet` to auto-detect: `import chardet; with open('file.csv', 'rb') as f: result = chardet.detect(f.read(10000)); encoding = result['encoding']`. Then read with `pandas.read_csv('file.csv', encoding=encoding)`.",
      "success_rate": 0.9,
      "how": "Detect and specify the correct encoding. Use `chardet` to auto-detect: `import chardet; with open('file.csv', 'rb') as f: result = chardet.detect(f.read(10000)); encoding = result['encoding']`. Then read with `pandas.read_csv('file.csv', encoding=encoding)`.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Convert the file to UTF-8 using `iconv` command: `iconv -f ISO-8859-1 -t UTF-8 original.csv > converted.csv`. Then read the converted file with default UTF-8 encoding.",
      "success_rate": 0.95,
      "how": "Convert the file to UTF-8 using `iconv` command: `iconv -f ISO-8859-1 -t UTF-8 original.csv > converted.csv`. Then read the converted file with default UTF-8 encoding.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Read with `encoding='ISO-8859-1'` in pandas: `df = pd.read_csv('file.csv', encoding='ISO-8859-1')`.",
      "success_rate": 1.0,
      "how": "Read with `encoding='ISO-8859-1'` in pandas: `df = pd.read_csv('file.csv', encoding='ISO-8859-1')`.",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "检测并指定正确的编码。使用`chardet`自动检测：`import chardet; with open('file.csv', 'rb') as f: result = chardet.detect(f.read(10000)); encoding = result['encoding']`。然后使用`pandas.read_csv('file.csv', encoding=encoding)`读取。",
    "使用`iconv`命令将文件转换为UTF-8：`iconv -f ISO-8859-1 -t UTF-8 original.csv > converted.csv`。然后用默认UTF-8编码读取转换后的文件。",
    "在pandas中使用`encoding='ISO-8859-1'`读取：`df = pd.read_csv('file.csv', encoding='ISO-8859-1')`。"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://docs.python.org/3/library/csv.html#csv.reader",
  "official_doc_section": null,
  "error_code": "UnicodeDecodeError",
  "verification_tier": "ai_generated",
  "confidence": 0.9,
  "fix_success_rate": 0.95,
  "resolvable": "true",
  "first_seen": "2023-08-22",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}