From 7eb3e9cd173fbdd39eefa791aab610858e76399d Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Sat, 17 Jun 2023 00:37:48 +0200 Subject: [PATCH] Fix GH-11404: DOMDocument::savexml and friends ommit xmlns="" declaration for null namespace, creating incorrect xml representation of the DOM The NULL namespace is only correct when there is no default namespace override. When there is, we need to manually set it to the empty string namespace. Closes GH-11428. --- NEWS | 2 + ext/dom/document.c | 4 + ext/dom/element.c | 4 + ext/dom/node.c | 16 ++-- ext/dom/php_dom.c | 38 +++++++++ ext/dom/tests/bug47530.phpt | 2 +- ext/dom/tests/gh11404.phpt | 160 ++++++++++++++++++++++++++++++++++++ 7 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 ext/dom/tests/gh11404.phpt diff --git a/NEWS b/NEWS index b89fb82ac44..f0cbc5a6c97 100644 --- a/NEWS +++ b/NEWS @@ -38,6 +38,8 @@ PHP NEWS . Fix "invalid state error" with cloned namespace declarations. (nielsdos) . Fixed bug #55294 and #47530 and #47847 (various namespace reconciliation issues). (nielsdos) + . Fixed bug GH-11404 (DOMDocument::saveXML and friends omit xmlns="" + declaration for null namespace). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/document.c b/ext/dom/document.c index 93091df83a0..199c39b5ff5 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -878,6 +878,10 @@ PHP_METHOD(DOMDocument, createElementNS) if (errorcode == 0) { if (xmlValidateName((xmlChar *) localname, 0) == 0) { + /* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */ + if (uri_len == 0) { + uri = NULL; + } nodep = xmlNewDocNode(docp, NULL, (xmlChar *) localname, (xmlChar *) value); if (nodep != NULL && uri != NULL) { nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); diff --git a/ext/dom/element.c b/ext/dom/element.c index 44c576a0736..79786ed9078 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -56,6 +56,10 @@ PHP_METHOD(DOMElement, __construct) if (uri_len > 0) { errorcode = dom_check_qname(name, &localname, &prefix, uri_len, name_len); if (errorcode == 0) { + /* https://dom.spec.whatwg.org/#validate-and-extract: demands us to set an empty string uri to NULL */ + if (uri_len == 0) { + uri = NULL; + } nodep = xmlNewNode (NULL, (xmlChar *)localname); if (nodep != NULL && uri != NULL) { nsptr = dom_get_ns(nodep, uri, &errorcode, prefix); diff --git a/ext/dom/node.c b/ext/dom/node.c index bcf4ee487d3..dc8c96b85ff 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -531,7 +531,6 @@ Since: DOM Level 2 int dom_node_namespace_uri_read(dom_object *obj, zval *retval) { xmlNode *nodep = dom_object_get_node(obj); - char *str = NULL; if (nodep == NULL) { php_dom_throw_error(INVALID_STATE_ERR, 1); @@ -543,20 +542,19 @@ int dom_node_namespace_uri_read(dom_object *obj, zval *retval) case XML_ATTRIBUTE_NODE: case XML_NAMESPACE_DECL: if (nodep->ns != NULL) { - str = (char *) nodep->ns->href; + char *str = (char *) nodep->ns->href; + /* https://dom.spec.whatwg.org/#concept-attribute: namespaceUri is "null or a non-empty string" */ + if (str != NULL && str[0] != '\0') { + ZVAL_STRING(retval, str); + return SUCCESS; + } } break; default: - str = NULL; break; } - if (str != NULL) { - ZVAL_STRING(retval, str); - } else { - ZVAL_NULL(retval); - } - + ZVAL_NULL(retval); return SUCCESS; } diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 4a6ab2fee9e..1cd035b62f5 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1478,6 +1478,22 @@ static void dom_libxml_reconcile_ensure_namespaces_are_declared(xmlNodePtr nodep xmlDOMWrapReconcileNamespaces(&dummy_ctxt, nodep, /* options */ 0); } +static bool dom_must_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep) +{ + xmlNsPtr default_ns = xmlSearchNs(doc, nodep->parent, NULL); + return default_ns != NULL && default_ns->href != NULL && default_ns->href[0] != '\0'; +} + +static void dom_replace_namespace_by_empty_default(xmlDocPtr doc, xmlNodePtr nodep) +{ + ZEND_ASSERT(nodep->ns == NULL); + /* The node uses the default empty namespace, but the current default namespace is non-empty. + * We can't unconditionally do this because otherwise libxml2 creates an xmlns="" declaration. + * Note: there's no point searching the oldNs list, because we haven't found it in the tree anyway. + * Ideally this would be pre-allocated but unfortunately libxml2 doesn't offer such a functionality. */ + xmlSetNs(nodep, xmlNewNs(nodep, (const xmlChar *) "", NULL)); +} + void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ { /* Although the node type will be checked by the libxml2 API, @@ -1485,6 +1501,10 @@ void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ if (nodep->type == XML_ELEMENT_NODE) { dom_reconcile_ns_internal(doc, nodep, nodep->parent); dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); + /* Check nodep->ns first to avoid an expensive lookup. */ + if (nodep->ns == NULL && dom_must_replace_namespace_by_empty_default(doc, nodep)) { + dom_replace_namespace_by_empty_default(doc, nodep); + } } } /* }}} */ @@ -1508,12 +1528,30 @@ static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlN void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) { + bool did_compute_must_replace_namespace_by_empty_default = false; + bool must_replace_namespace_by_empty_default = false; + dom_reconcile_ns_list_internal(doc, nodep, last, nodep->parent); + /* The loop is outside of the recursion in the above call because * dom_libxml_reconcile_ensure_namespaces_are_declared() performs its own recursion. */ while (true) { /* The internal libxml2 call will already check the node type, no need for us to do it here. */ dom_libxml_reconcile_ensure_namespaces_are_declared(nodep); + + /* We don't have to handle the children, because if their ns's are NULL they'll just take on the default + * which should've been reconciled before. */ + if (nodep->ns == NULL) { + /* This is an optimistic approach: we assume that most of the time we don't need the result of the computation. */ + if (!did_compute_must_replace_namespace_by_empty_default) { + did_compute_must_replace_namespace_by_empty_default = true; + must_replace_namespace_by_empty_default = dom_must_replace_namespace_by_empty_default(doc, nodep); + } + if (must_replace_namespace_by_empty_default) { + dom_replace_namespace_by_empty_default(doc, nodep); + } + } + if (nodep == last) { break; } diff --git a/ext/dom/tests/bug47530.phpt b/ext/dom/tests/bug47530.phpt index 0fb990e0e7b..ab0b030c94b 100644 --- a/ext/dom/tests/bug47530.phpt +++ b/ext/dom/tests/bug47530.phpt @@ -121,7 +121,7 @@ test_appendChild_with_shadowing(); -- Test document fragment without import -- - + string(7) "foo:bar" string(19) "https://php.net/bar" -- Test document import -- diff --git a/ext/dom/tests/gh11404.phpt b/ext/dom/tests/gh11404.phpt new file mode 100644 index 00000000000..6c868de508c --- /dev/null +++ b/ext/dom/tests/gh11404.phpt @@ -0,0 +1,160 @@ +--TEST-- +GH-11404: DOMDocument::savexml and friends ommit xmlns="" declaration for null namespace, creating incorrect xml representation of the DOM +--EXTENSIONS-- +dom +--FILE-- +createElement('a'); + $nodeB = $dom->createElementNS(null, 'b'); + $nodeC = $dom->createElementNS('', 'c'); + $nodeD = $dom->createElement('d'); + $nodeD->setAttributeNS('some:ns', 'x:attrib', 'val'); + $nodeE = $dom->createElementNS('some:ns', 'e'); + // And these two respect the default ns. + $nodeE->setAttributeNS(null, 'attrib1', 'val'); + $nodeE->setAttributeNS('', 'attrib2', 'val'); + + $dom->documentElement->appendChild($nodeA); + $dom->documentElement->appendChild($nodeB); + $dom->documentElement->appendChild($nodeC); + $dom->documentElement->appendChild($nodeD); + $dom->documentElement->appendChild($nodeE); + + var_dump($nodeA->namespaceURI); + var_dump($nodeB->namespaceURI); + var_dump($nodeC->namespaceURI); + var_dump($nodeD->namespaceURI); + var_dump($nodeE->namespaceURI); + + echo $dom->saveXML(); + + // Create a subtree without using a fragment + $subtree = $dom->createElement('subtree'); + $subtree->appendChild($dom->createElementNS('some:ns', 'subtreechild1')); + $subtree->firstElementChild->appendChild($dom->createElement('subtreechild2')); + $dom->documentElement->appendChild($subtree); + + echo $dom->saveXML(); + + // Create a subtree with the use of a fragment + $subtree = $dom->createDocumentFragment(); + $subtree->appendChild($child3 = $dom->createElement('child3')); + $child3->appendChild($dom->createElement('child4')); + $subtree->appendChild($dom->createElement('child5')); + $dom->documentElement->appendChild($subtree); + + echo $dom->saveXML(); +} + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +testAppendAndAttributes($dom1); + +echo "-- Test append and attributes: without default namespace variation --\n"; + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +testAppendAndAttributes($dom1); + +echo "-- Test import --\n"; + +function testImport(?string $href, string $toBeImported) { + $dom1 = new DOMDocument; + $decl = $href === NULL ? '' : "xmlns=\"$href\""; + $dom1->loadXML(''); + + $dom2 = new DOMDocument; + $dom2->loadXML('' . $toBeImported); + + $dom1->documentElement->append( + $imported = $dom1->importNode($dom2->documentElement, true) + ); + + var_dump($imported->namespaceURI); + + echo $dom1->saveXML(); +} + +testImport(null, ''); +testImport('', ''); +testImport('some:ns', ''); +testImport('', '
'); +testImport('some:ns', '
'); + +echo "-- Namespace URI comparison --\n"; + +$dom1 = new DOMDocument; +$dom1->loadXML('
'); +var_dump($dom1->firstElementChild->namespaceURI); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); + +$dom1 = new DOMDocument; +$dom1->appendChild($dom1->createElementNS('a:b', 'parent')); +$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'child1')); +$dom1->firstElementChild->appendChild($second = $dom1->createElement('child2')); +var_dump($dom1->firstElementChild->namespaceURI); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); +var_dump($second->namespaceURI); +echo $dom1->saveXML(); + +$dom1 = new DOMDocument; +$dom1->loadXML(''); +var_dump($dom1->firstElementChild->namespaceURI); +$dom1->firstElementChild->appendChild($dom1->createElementNS('a:b', 'tag')); +var_dump($dom1->firstElementChild->firstElementChild->namespaceURI); +?> +--EXPECT-- +-- Test append and attributes: with default namespace variation -- +NULL +NULL +NULL +NULL +string(7) "some:ns" + + + + + + +-- Test append and attributes: without default namespace variation -- +NULL +NULL +NULL +NULL +string(7) "some:ns" + + + + + + +-- Test import -- +NULL + + +NULL + + +NULL + + +NULL + +
+string(7) "some:ns" + +
+-- Namespace URI comparison -- +string(3) "a:b" +string(3) "a:b" +string(3) "a:b" +string(3) "a:b" +NULL + + +string(3) "a:b" +string(3) "a:b"