{
  "id": "data/csv-null-vs-empty-string-ambiguity",
  "signature": "CSV null vs empty string ambiguity — \"\" and no-value both become None in pandas",
  "signature_zh": "CSV 空值与空字符串歧义——\"\" 和无值在 pandas 中均变为 None",
  "regex": ".*empty.*string.*null.*pandas.*|.*CSV.*null.*empty.*",
  "domain": "data",
  "category": "data_error",
  "subcategory": null,
  "root_cause": "Pandas read_csv treats both empty quoted strings and missing fields as NaN by default, losing the distinction between empty strings and null values.",
  "root_cause_type": "generic",
  "root_cause_zh": "Pandas read_csv 默认将空引号字符串和缺失字段均视为 NaN，丢失了空字符串与空值之间的区别。",
  "versions": [
    {
      "version": "pandas 1.5.3",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "pandas 2.0.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "pandas 2.1.4",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "This makes pandas treat no-value cells as empty strings too, but still converts empty quoted strings to NaN.",
      "fail_rate": 0.65,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Disables all NA detection, but also prevents legitimate NaN values from being recognized, breaking downstream null handling.",
      "fail_rate": 0.7,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Use pd.read_csv(..., keep_default_na=False, na_values=[''], dtype=str) and then manually convert empty strings to None where needed. Example: df = pd.read_csv('data.csv', keep_default_na=False, na_values=[''], dtype={'col1': str}); df['col1'] = df['col1'].replace('', pd.NA)",
      "success_rate": 0.85,
      "how": "Use pd.read_csv(..., keep_default_na=False, na_values=[''], dtype=str) and then manually convert empty strings to None where needed. Example: df = pd.read_csv('data.csv', keep_default_na=False, na_values=[''], dtype={'col1': str}); df['col1'] = df['col1'].replace('', pd.NA)",
      "condition": "",
      "sources": []
    },
    {
      "action": "Pre-process CSV by replacing empty quoted fields with a sentinel like '__NULL__', then map back after reading: sed 's/\"\"/__NULL__/g' input.csv | pd.read_csv(...); df.replace('__NULL__', pd.NA)",
      "success_rate": 0.78,
      "how": "Pre-process CSV by replacing empty quoted fields with a sentinel like '__NULL__', then map back after reading: sed 's/\"\"/__NULL__/g' input.csv | pd.read_csv(...); df.replace('__NULL__', pd.NA)",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "Use pd.read_csv(..., keep_default_na=False, na_values=[''], dtype=str) and then manually convert empty strings to None where needed. Example: df = pd.read_csv('data.csv', keep_default_na=False, na_values=[''], dtype={'col1': str}); df['col1'] = df['col1'].replace('', pd.NA)",
    "Pre-process CSV by replacing empty quoted fields with a sentinel like '__NULL__', then map back after reading: sed 's/\"\"/__NULL__/g' input.csv | pd.read_csv(...); df.replace('__NULL__', pd.NA)"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://pandas.pydata.org/docs/user_guide/io.html#io-read-csv-table",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.85,
  "fix_success_rate": 0.82,
  "resolvable": "true",
  "first_seen": "2023-03-15",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}