From 080fd14458d3185b879ee2d0fbb6987b90539469 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 28 Aug 2025 21:55:33 +0200 Subject: [PATCH] Fix GH-19612: Mitigate libxml2 tree dictionary bug This code is very similar to code on PHP 8.4 and higher, but the mitigation is extended to entity references and to attribute children. Closes GH-19620. --- NEWS | 3 ++ ext/dom/document.c | 61 ++++++++++++++++++++++++++++++++++++-- ext/dom/tests/gh19612.phpt | 30 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/gh19612.phpt diff --git a/NEWS b/NEWS index a85a48b3e0f..0435cb4a8c5 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,9 @@ PHP NEWS . Fixed date_sunrise() and date_sunset() with partial-hour UTC offset. (ilutov) +- DOM: + . Fixed bug GH-19612 (Mitigate libxml2 tree dictionary bug). (nielsdos) + - FPM: . Fixed failed debug assertion when php_admin_value setting fails. (ilutov) diff --git a/ext/dom/document.c b/ext/dom/document.c index e622a09309b..e48cafbabe9 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1076,10 +1076,62 @@ static void php_dom_transfer_document_ref(xmlNodePtr node, php_libxml_ref_obj *n } } +/* Workaround for bug that was fixed in https://github.com/GNOME/libxml2/commit/4bc3ebf3eaba352fbbce2ef70ad00a3c7752478a + * and https://github.com/GNOME/libxml2/commit/bc7ab5a2e61e4b36accf6803c5b0e245c11154b1 */ +#if LIBXML_VERSION < 21300 +static xmlChar *libxml_copy_dicted_string(xmlDictPtr src_dict, xmlDictPtr dst_dict, xmlChar *str) +{ + if (str == NULL) { + return NULL; + } + if (xmlDictOwns(src_dict, str) == 1) { + if (dst_dict == NULL) { + return xmlStrdup(str); + } + return BAD_CAST xmlDictLookup(dst_dict, str, -1); + } + return str; +} + +static void libxml_fixup_name_and_content(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + if (node->type == XML_ENTITY_REF_NODE) { + node->children = NULL; /* Break link with original document. */ + } + if (src_doc != NULL && src_doc->dict != NULL) { + ZEND_ASSERT(dst_doc != src_doc); + node->name = libxml_copy_dicted_string(src_doc->dict, dst_doc->dict, BAD_CAST node->name); + node->content = libxml_copy_dicted_string(src_doc->dict, NULL, node->content); + } +} + +static void libxml_fixup_name_and_content_outer(xmlDocPtr src_doc, xmlDocPtr dst_doc, xmlNodePtr node) +{ + libxml_fixup_name_and_content(src_doc, dst_doc, node); + + if (node->type == XML_ELEMENT_NODE) { + for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, (xmlNodePtr) attr); + for (xmlNodePtr attr_child = attr->children; attr_child != NULL; attr_child = attr_child->next) { + libxml_fixup_name_and_content(src_doc, dst_doc, attr_child); + } + } + } + + if (node->type == XML_ELEMENT_NODE || node->type == XML_ATTRIBUTE_NODE) { + for (xmlNodePtr child = node->children; child != NULL; child = child->next) { + libxml_fixup_name_and_content_outer(src_doc, dst_doc, child); + } + } +} +#endif + bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, xmlDocPtr new_document) { - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); - if (nodep->doc != new_document) { + xmlDocPtr old_doc = nodep->doc; + + php_libxml_invalidate_node_list_cache_from_doc(old_doc); + if (old_doc != new_document) { php_libxml_invalidate_node_list_cache(dom_object_new_document->document); /* Note for ATTRIBUTE_NODE: specified is always true in ext/dom, @@ -1089,6 +1141,11 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x return false; } +#if LIBXML_VERSION < 21300 + /* Must be first before transferring the ref to ensure the old document dictionary stays alive. */ + libxml_fixup_name_and_content_outer(old_doc, new_document, nodep); +#endif + php_dom_transfer_document_ref(nodep, dom_object_new_document->document); } else { xmlUnlinkNode(nodep); diff --git a/ext/dom/tests/gh19612.phpt b/ext/dom/tests/gh19612.phpt new file mode 100644 index 00000000000..38554f3c836 --- /dev/null +++ b/ext/dom/tests/gh19612.phpt @@ -0,0 +1,30 @@ +--TEST-- +GH-19612 (Mitigate libxml2 tree dictionary bug) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< +]> + +XML); +$html = new DOMDocument; +$html->loadHTML('

foo

', LIBXML_NOERROR); +$p = $html->documentElement->firstChild->firstChild; +$p->appendChild($html->adoptNode($xml->documentElement->firstElementChild->cloneNode(true))); + +echo $html->saveXML(); +echo $xml->saveXML(); +?> +--EXPECT-- + + +

foo

+ + +]> +