diff --git a/NEWS b/NEWS index 2c84a7fb780..53d6a71945a 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ Core: DOM: . Added DOMNode::compareDocumentPosition(). (nielsdos) + . Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents). + (nielsdos) Intl: . Added IntlDateFormatter::PATTERN constant. (David Carlier) diff --git a/UPGRADING b/UPGRADING index d37d5e78270..54a2f0e8c33 100644 --- a/UPGRADING +++ b/UPGRADING @@ -107,3 +107,6 @@ PHP 8.4 UPGRADE NOTES 14. Performance Improvements ======================================== +* The performance of DOMNode::C14N() is greatly improved for the case without + an xpath query. This can give a time improvement of easily two order of + magnitude for documents with tens of thousands of nodes. diff --git a/ext/dom/node.c b/ext/dom/node.c index 9bcf8384e58..b208a2d8a8f 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -1736,6 +1736,25 @@ PHP_METHOD(DOMNode, lookupNamespaceURI) } /* }}} end dom_node_lookup_namespace_uri */ +static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent) +{ + xmlNodePtr root = user_data; + /* We have to unroll the first iteration because node->parent + * is not necessarily equal to parent due to libxml2 tree rules (ns decls out of the tree for example). */ + if (node == root) { + return 1; + } + node = parent; + while (node != NULL) { + if (node == root) { + return 1; + } + node = node->parent; + } + + return 0; +} + static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ { zval *id; @@ -1779,22 +1798,10 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ php_libxml_invalidate_node_list_cache_from_doc(docp); + bool simple_node_parent_lookup_callback = false; if (xpath_array == NULL) { if (nodep->type != XML_DOCUMENT_NODE) { - ctxp = xmlXPathNewContext(docp); - ctxp->node = nodep; - xpathobjp = xmlXPathEvalExpression((xmlChar *) "(.//. | .//@* | .//namespace::*)", ctxp); - ctxp->node = NULL; - if (xpathobjp && xpathobjp->type == XPATH_NODESET) { - nodeset = xpathobjp->nodesetval; - } else { - if (xpathobjp) { - xmlXPathFreeObject(xpathobjp); - } - xmlXPathFreeContext(ctxp); - zend_throw_error(NULL, "XPath query did not return a nodeset"); - RETURN_THROWS(); - } + simple_node_parent_lookup_callback = true; } } else { /*xpath query from xpath_array */ @@ -1873,8 +1880,11 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ } if (buf != NULL) { - ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes, - with_comments, buf); + if (simple_node_parent_lookup_callback) { + ret = xmlC14NExecute(docp, dom_canonicalize_node_parent_lookup_cb, nodep, exclusive, inclusive_ns_prefixes, with_comments, buf); + } else { + ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes, with_comments, buf); + } } if (inclusive_ns_prefixes != NULL) {