{
  "id": "php/domdocument-load-html-entity-warning",
  "signature": "Warning: DOMDocument::loadHTML(): htmlParseEntityRef: expecting ';' in Entity, line: 42 in /var/www/app/src/Parser/HtmlSanitizer.php:18",
  "signature_zh": "警告：DOMDocument::loadHTML()：htmlParseEntityRef：在实体中期望 ';'，行：42，位于 /var/www/app/src/Parser/HtmlSanitizer.php:18",
  "regex": "Warning: DOMDocument::loadHTML\\(\\): htmlParseEntityRef: expecting ';'",
  "domain": "php",
  "category": "encoding_error",
  "subcategory": null,
  "root_cause": "The HTML string passed to DOMDocument::loadHTML() contains a malformed HTML entity (e.g., &nbsp instead of &nbsp;), which causes the HTML parser to emit a warning and may result in incomplete parsing.",
  "root_cause_type": "generic",
  "root_cause_zh": "传递给 DOMDocument::loadHTML() 的 HTML 字符串包含格式错误的 HTML 实体（例如 &nbsp 而不是 &nbsp;），导致 HTML 解析器发出警告并可能导致解析不完整。",
  "versions": [
    {
      "version": "php:8.1.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "php:8.2.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    },
    {
      "version": "php:8.3.0",
      "introduced": null,
      "deprecated": null,
      "removed": null,
      "behavior_change": null,
      "status": "active"
    }
  ],
  "os_specific": {},
  "dead_ends": [
    {
      "action": "",
      "why_fails": "Suppressing the warning with @ (e.g., @$dom->loadHTML($html)) hides the error but does not fix the malformed entity, which can lead to corrupted DOM trees and unexpected behavior when traversing or querying the document.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Using htmlspecialchars() on the entire HTML input encodes all ampersands, including those that are part of valid entities (e.g., &amp; becomes &amp;amp;), breaking the HTML structure further.",
      "fail_rate": 0.8,
      "condition": "",
      "sources": []
    },
    {
      "action": "",
      "why_fails": "Switching to loadXML() instead of loadHTML() causes a fatal error because HTML5 documents with unclosed tags or non-well-formed structures are not valid XML.",
      "fail_rate": 0.9,
      "condition": "",
      "sources": []
    }
  ],
  "workarounds": [
    {
      "action": "Pre-process the HTML to fix common malformed entities using a regex: $html = preg_replace('/&(?![a-zA-Z0-9#]+;)/', '&amp;', $html); $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);",
      "success_rate": 0.85,
      "how": "Pre-process the HTML to fix common malformed entities using a regex: $html = preg_replace('/&(?![a-zA-Z0-9#]+;)/', '&amp;', $html); $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use the LIBXML_NOERROR flag to suppress the warning but still parse the document: $dom->loadHTML($html, LIBXML_NOERROR); however, be aware that this may hide other parsing issues.",
      "success_rate": 0.8,
      "how": "Use the LIBXML_NOERROR flag to suppress the warning but still parse the document: $dom->loadHTML($html, LIBXML_NOERROR); however, be aware that this may hide other parsing issues.",
      "condition": "",
      "sources": []
    },
    {
      "action": "Use a more forgiving HTML parser like 'html5-php' (masterminds/html5-php) which handles malformed entities gracefully: $html5 = new Masterminds\\HTML5(); $dom = $html5->loadHTML($html);",
      "success_rate": 0.9,
      "how": "Use a more forgiving HTML parser like 'html5-php' (masterminds/html5-php) which handles malformed entities gracefully: $html5 = new Masterminds\\HTML5(); $dom = $html5->loadHTML($html);",
      "condition": "",
      "sources": []
    }
  ],
  "workarounds_zh": [
    "使用正则表达式预处理 HTML 以修复常见的格式错误的实体：$html = preg_replace('/&(?![a-zA-Z0-9#]+;)/', '&amp;', $html); $dom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);",
    "使用 LIBXML_NOERROR 标志来抑制警告但仍然解析文档：$dom->loadHTML($html, LIBXML_NOERROR); 但请注意，这可能会隐藏其他解析问题。",
    "使用更宽容的 HTML 解析器，如 'html5-php' (masterminds/html5-php)，它可以优雅地处理格式错误的实体：$html5 = new Masterminds\\HTML5(); $dom = $html5->loadHTML($html);"
  ],
  "transition_graph": {
    "leads_to": [],
    "preceded_by": [],
    "frequently_confused_with": []
  },
  "official_doc_url": "https://www.php.net/manual/en/domdocument.loadhtml.php",
  "official_doc_section": null,
  "error_code": null,
  "verification_tier": "ai_generated",
  "confidence": 0.83,
  "fix_success_rate": 0.78,
  "resolvable": "true",
  "first_seen": "2023-07-22",
  "last_confirmed": "2024-06-01",
  "last_updated": "2024-06-01",
  "evidence_count": 1,
  "tags": [],
  "locale": "en",
  "aliases": []
}