mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Implement #53655: Improve speed of DOMNode::C14N() on large XML documents
The XPath query is in accordance to spec [1]. However, we can do it in a simpler way. We can use a custom callback function instead of a linear search in XPath to check if a node is visible. Note that comment nodes are handled internally by libxml2 already, so we do not need to differentiate between node types. The callback will do an upwards traversal of the tree until the root of the canonicalization is reached. In practice this will speed up the application a lot. [1] https://www.w3.org/TR/2001/REC-xml-c14n-20010315 section 2.1 Closes GH-12278.
This commit is contained in:
2
NEWS
2
NEWS
@@ -7,6 +7,8 @@ Core:
|
||||
|
||||
DOM:
|
||||
. Added DOMNode::compareDocumentPosition(). (nielsdos)
|
||||
. Implement #53655 (Improve speed of DOMNode::C14N() on large XML documents).
|
||||
(nielsdos)
|
||||
|
||||
Intl:
|
||||
. Added IntlDateFormatter::PATTERN constant. (David Carlier)
|
||||
|
||||
@@ -107,3 +107,6 @@ PHP 8.4 UPGRADE NOTES
|
||||
14. Performance Improvements
|
||||
========================================
|
||||
|
||||
* The performance of DOMNode::C14N() is greatly improved for the case without
|
||||
an xpath query. This can give a time improvement of easily two order of
|
||||
magnitude for documents with tens of thousands of nodes.
|
||||
|
||||
@@ -1736,6 +1736,25 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
|
||||
}
|
||||
/* }}} end dom_node_lookup_namespace_uri */
|
||||
|
||||
static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
|
||||
{
|
||||
xmlNodePtr root = user_data;
|
||||
/* We have to unroll the first iteration because node->parent
|
||||
* is not necessarily equal to parent due to libxml2 tree rules (ns decls out of the tree for example). */
|
||||
if (node == root) {
|
||||
return 1;
|
||||
}
|
||||
node = parent;
|
||||
while (node != NULL) {
|
||||
if (node == root) {
|
||||
return 1;
|
||||
}
|
||||
node = node->parent;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
|
||||
{
|
||||
zval *id;
|
||||
@@ -1779,22 +1798,10 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
|
||||
|
||||
php_libxml_invalidate_node_list_cache_from_doc(docp);
|
||||
|
||||
bool simple_node_parent_lookup_callback = false;
|
||||
if (xpath_array == NULL) {
|
||||
if (nodep->type != XML_DOCUMENT_NODE) {
|
||||
ctxp = xmlXPathNewContext(docp);
|
||||
ctxp->node = nodep;
|
||||
xpathobjp = xmlXPathEvalExpression((xmlChar *) "(.//. | .//@* | .//namespace::*)", ctxp);
|
||||
ctxp->node = NULL;
|
||||
if (xpathobjp && xpathobjp->type == XPATH_NODESET) {
|
||||
nodeset = xpathobjp->nodesetval;
|
||||
} else {
|
||||
if (xpathobjp) {
|
||||
xmlXPathFreeObject(xpathobjp);
|
||||
}
|
||||
xmlXPathFreeContext(ctxp);
|
||||
zend_throw_error(NULL, "XPath query did not return a nodeset");
|
||||
RETURN_THROWS();
|
||||
}
|
||||
simple_node_parent_lookup_callback = true;
|
||||
}
|
||||
} else {
|
||||
/*xpath query from xpath_array */
|
||||
@@ -1873,8 +1880,11 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
|
||||
}
|
||||
|
||||
if (buf != NULL) {
|
||||
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes,
|
||||
with_comments, buf);
|
||||
if (simple_node_parent_lookup_callback) {
|
||||
ret = xmlC14NExecute(docp, dom_canonicalize_node_parent_lookup_cb, nodep, exclusive, inclusive_ns_prefixes, with_comments, buf);
|
||||
} else {
|
||||
ret = xmlC14NDocSaveTo(docp, nodeset, exclusive, inclusive_ns_prefixes, with_comments, buf);
|
||||
}
|
||||
}
|
||||
|
||||
if (inclusive_ns_prefixes != NULL) {
|
||||
|
||||
Reference in New Issue
Block a user