From eebc528cbfaa219789a6c80368d8180469a0e5aa Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 1 Oct 2023 14:26:24 +0200 Subject: [PATCH] Fix broken cache invalidation with deallocated and reallocated document node The original caching implementation had an oversight in combination with the new lifetime management in DOM for 8.3. The modification counter is stored on the document object itself, but as that can get deallocated when all references disappear, stale cache data can be used. Normally this isn't a problem, unless getElementsByTagName is called not on the document but on a child node. Fix it by moving caching data into the ref object, which will outlive all nodes from a document even if the document object disappears. Closes GH-12338. --- NEWS | 2 ++ ext/dom/document.c | 15 +++++---- ext/dom/node.c | 14 ++++----- ext/dom/parentnode.c | 14 ++++----- ext/dom/php_dom.h | 21 ++++++++++--- ...TagName_liveness_deallocated_document.phpt | 31 +++++++++++++++++++ ext/libxml/libxml.c | 9 ++---- ext/libxml/php_libxml.h | 26 +++++++++------- 8 files changed, 86 insertions(+), 46 deletions(-) create mode 100644 ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt diff --git a/NEWS b/NEWS index 5022360cb72..80fe15b38f6 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,8 @@ PHP NEWS - DOM: . Restore old namespace reconciliation behaviour. (nielsdos) + . Fix broken cache invalidation with deallocated and reallocated document + node. (nielsdos) - Fileinfo: . Fixed bug GH-11891 (fileinfo returns text/xml for some svg files). (usarise) diff --git a/ext/dom/document.c b/ext/dom/document.c index 234b71be1d8..29bc8b8c156 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -818,7 +818,7 @@ PHP_METHOD(DOMDocument, importNode) } } - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern); } @@ -1031,7 +1031,7 @@ bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, x { php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); if (nodep->doc != new_document) { - php_libxml_invalidate_node_list_cache_from_doc(new_document); + php_libxml_invalidate_node_list_cache(dom_object_new_document->document); /* Note for ATTRIBUTE_NODE: specified is always true in ext/dom, * and since this unlink it; the owner element will be unset (i.e. parentNode). */ @@ -1101,7 +1101,7 @@ PHP_METHOD(DOMDocument, normalizeDocument) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); dom_normalize((xmlNodePtr) docp); } @@ -1327,7 +1327,7 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt xmlDocPtr docp = (xmlDocPtr) dom_object_get_node(intern); dom_doc_propsptr doc_prop = NULL; if (docp != NULL) { - const php_libxml_doc_ptr *doc_ptr = docp->_private; + const php_libxml_ref_obj *doc_ptr = intern->document; ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); @@ -1348,9 +1348,8 @@ static void dom_finish_loading_document(zval *this, zval *return_value, xmlDocPt php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); /* Since iterators should invalidate, we need to start the modification number from the old counter */ if (old_modification_nr != 0) { - php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ - doc_ptr->cache_tag.modification_nr = old_modification_nr; - php_libxml_invalidate_node_list_cache(doc_ptr); + intern->document->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(intern->document); } RETURN_TRUE; @@ -1611,7 +1610,7 @@ PHP_METHOD(DOMDocument, xinclude) php_dom_remove_xinclude_nodes(root); } - php_libxml_invalidate_node_list_cache_from_doc(docp); + php_libxml_invalidate_node_list_cache(intern->document); if (err) { RETVAL_LONG(err); diff --git a/ext/dom/node.c b/ext/dom/node.c index 627f24cb339..e2b4977d892 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -201,7 +201,7 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) break; } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(obj->document); zend_string_release_ex(str, 0); return SUCCESS; @@ -794,7 +794,7 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(obj->document); /* Typed property, this is already a string */ ZEND_ASSERT(Z_TYPE_P(newval) == IS_STRING); @@ -919,7 +919,7 @@ PHP_METHOD(DOMNode, insertBefore) php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL); } - php_libxml_invalidate_node_list_cache_from_doc(parentp->doc); + php_libxml_invalidate_node_list_cache(intern->document); if (ref != NULL) { DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj); @@ -1124,7 +1124,7 @@ PHP_METHOD(DOMNode, replaceChild) nodep->doc->intSubset = (xmlDtd *) newchild; } } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(oldchild, &ret, intern); } /* }}} end dom_node_replace_child */ @@ -1166,7 +1166,7 @@ PHP_METHOD(DOMNode, removeChild) } xmlUnlinkNode(child); - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(child, &ret, intern); } /* }}} end dom_node_remove_child */ @@ -1271,7 +1271,7 @@ PHP_METHOD(DOMNode, appendChild) dom_reconcile_ns(nodep->doc, new_child); } - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); DOM_RET_OBJ(new_child, &ret, intern); return; @@ -1387,7 +1387,7 @@ PHP_METHOD(DOMNode, normalize) DOM_GET_OBJ(nodep, id, xmlNodePtr, intern); - php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + php_libxml_invalidate_node_list_cache(intern->document); dom_normalize(nodep); diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 8b225d75c92..1c8e1887f64 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -309,7 +309,7 @@ void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc) return; } - php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -353,7 +353,7 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc) return; } - php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -404,7 +404,7 @@ void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc) doc = prevsib->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -456,7 +456,7 @@ void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc) doc = nextsib->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -523,7 +523,7 @@ void dom_child_node_remove(dom_object *context) return; } - php_libxml_invalidate_node_list_cache_from_doc(child->doc); + php_libxml_invalidate_node_list_cache(context->document); xmlUnlinkNode(child); } @@ -557,7 +557,7 @@ void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc) } xmlDocPtr doc = parentNode->doc; - php_libxml_invalidate_node_list_cache_from_doc(doc); + php_libxml_invalidate_node_list_cache(context->document); /* Spec step 4: convert nodes into fragment */ xmlNodePtr fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -602,7 +602,7 @@ void dom_parent_node_replace_children(dom_object *context, zval *nodes, uint32_t return; } - php_libxml_invalidate_node_list_cache_from_doc(thisp->doc); + php_libxml_invalidate_node_list_cache(context->document); dom_remove_all_children(thisp); diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 4d415256788..46900e8e5fe 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -200,7 +200,7 @@ int php_dom_get_nodelist_length(dom_object *obj); #define DOM_NODELIST 0 #define DOM_NAMEDNODEMAP 1 -static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr) +static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_ref_obj *doc_ptr) { ZEND_ASSERT(cache_tag != NULL); ZEND_ASSERT(doc_ptr != NULL); @@ -215,15 +215,26 @@ static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node) { ZEND_ASSERT(node != NULL); - return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private); + php_libxml_node_ptr *private = node->_private; + if (!private) { + return true; + } + php_libxml_node_object *object_private = private->_private; + if (!object_private || !object_private->document) { + return true; + } + return php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, object_private->document); } static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node) { ZEND_ASSERT(cache_tag != NULL); - if (node->doc && node->doc->_private) { - const php_libxml_doc_ptr* doc_ptr = node->doc->_private; - cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr; + php_libxml_node_ptr *private = node->_private; + if (private) { + php_libxml_node_object *object_private = private->_private; + if (object_private->document) { + cache_tag->modification_nr = object_private->document->cache_tag.modification_nr; + } } } diff --git a/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt new file mode 100644 index 00000000000..d926747399b --- /dev/null +++ b/ext/dom/tests/getElementsByTagName_liveness_deallocated_document.phpt @@ -0,0 +1,31 @@ +--TEST-- +getElementsByTagName() liveness with deallocated document +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + +

1

2

3

+
+XML); + +$ps = $dom->documentElement->getElementsByTagName('p'); +$second = $ps->item(1); +var_dump($second->textContent); +var_dump($ps->length); + +unset($dom); +$dom = $second->ownerDocument; + +$second->parentNode->appendChild($dom->createElement('p', '4')); +var_dump($ps->length); + +?> +--EXPECT-- +string(1) "2" +int(3) +int(4) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 48b981a95a5..db3f696ce03 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1293,13 +1293,7 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, object->node->_private = private_data; } } else { - if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) { - php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr)); - doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ - object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */ - } else { - object->node = emalloc(sizeof(php_libxml_node_ptr)); - } + object->node = emalloc(sizeof(php_libxml_node_ptr)); ret_refcount = 1; object->node->node = node; object->node->refcount = 1; @@ -1344,6 +1338,7 @@ PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object, object->document->ptr = docp; object->document->refcount = ret_refcount; object->document->doc_props = NULL; + object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ } return ret_refcount; diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index 78a5df8f97e..b325f17adc9 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -57,10 +57,15 @@ typedef struct _libxml_doc_props { bool recover; } libxml_doc_props; +typedef struct { + size_t modification_nr; +} php_libxml_cache_tag; + typedef struct _php_libxml_ref_obj { void *ptr; int refcount; libxml_doc_props *doc_props; + php_libxml_cache_tag cache_tag; } php_libxml_ref_obj; typedef struct _php_libxml_node_ptr { @@ -69,16 +74,6 @@ typedef struct _php_libxml_node_ptr { void *_private; } php_libxml_node_ptr; -typedef struct { - size_t modification_nr; -} php_libxml_cache_tag; - -/* extends php_libxml_node_ptr */ -typedef struct { - php_libxml_node_ptr node_ptr; - php_libxml_cache_tag cache_tag; -} php_libxml_doc_ptr; - typedef struct _php_libxml_node_object { php_libxml_node_ptr *node; php_libxml_ref_obj *document; @@ -91,8 +86,11 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object * return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset); } -static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr) +static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ref_obj *doc_ptr) { + if (!doc_ptr) { + return; + } #if SIZEOF_SIZE_T == 8 /* If one operation happens every nanosecond, then it would still require 584 years to overflow * the counter. So we'll just assume this never happens. */ @@ -108,7 +106,11 @@ static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp) { if (docp && docp->_private) { /* docp is NULL for detached nodes */ - php_libxml_invalidate_node_list_cache((php_libxml_doc_ptr *)docp->_private); + php_libxml_node_ptr *private = docp->_private; + php_libxml_node_object *object_private = private->_private; + if (object_private) { + php_libxml_invalidate_node_list_cache(object_private->document); + } } }