1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Support templated content

The template element in HTML 5 is special in the sense that it does not
add its contents into the DOM tree, but instead keeps them in a separate
shadow DOM document fragment. Interacting with the DOM tree cannot touch
the elements in the document fragment.

Closes GH-14906.
This commit is contained in:
Niels Dossche
2024-07-10 17:07:12 +02:00
parent 8ad5c64393
commit 6980eba863
35 changed files with 723 additions and 121 deletions

3
NEWS
View File

@@ -6,6 +6,9 @@ PHP NEWS
. Fixed bug GH-14801 (Fix build for armv7). (andypost)
. Implemented property hooks RFC. (ilutov)
- DOM:
. Improve support for template elements. (nielsdos)
- GD:
. Check overflow/underflow for imagescale/imagefilter. (David Carlier)

View File

@@ -27,7 +27,7 @@ if test "$PHP_DOM" != "no"; then
$LEXBOR_DIR/ns/ns.c \
$LEXBOR_DIR/tag/tag.c"
PHP_NEW_EXTENSION(dom, [php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
domexception.c \
parentnode/tree.c parentnode/css_selectors.c \
processinginstruction.c cdatasection.c \

View File

@@ -8,7 +8,7 @@ if (PHP_DOM == "yes") {
CHECK_HEADER_ADD_INCLUDE("libxml/parser.h", "CFLAGS_DOM", PHP_PHP_BUILD + "\\include\\libxml2")
) {
EXTENSION("dom", "php_dom.c attr.c document.c infra.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \
xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c private_data.c \
domexception.c processinginstruction.c \
cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \
node.c characterdata.c documenttype.c \

View File

@@ -23,6 +23,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "xml_serializer.h"
#include "internal_helpers.h"
#include "dom_properties.h"

View File

@@ -23,6 +23,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
/*
* class DOMImplementation

View File

@@ -24,6 +24,7 @@
#include "zend_enum.h"
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"
#include "dom_properties.h"
#include "token_list.h"
@@ -2030,6 +2031,16 @@ PHP_METHOD(Dom_Element, rename)
}
goto cleanup;
}
/* If we currently have a template but the new element type won't be a template, then throw away the templated content. */
if (is_currently_html_ns && xmlStrEqual(nodep->name, BAD_CAST "template") && !xmlStrEqual(localname, BAD_CAST "template")) {
php_dom_throw_error_with_message(
INVALID_MODIFICATION_ERR,
"It is not possible to rename the template element because it hosts a document fragment",
/* strict */ true
);
goto cleanup;
}
}
php_libxml_invalidate_node_list_cache(intern->document);

View File

@@ -22,12 +22,13 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "html5_parser.h"
#include "private_data.h"
#include <lexbor/html/parser.h>
#include <lexbor/html/interfaces/element.h>
#include <lexbor/html/interfaces/template_element.h>
#include <lexbor/dom/dom.h>
#include <libxml/parserInternals.h>
#include <libxml/HTMLtree.h>
#include <Zend/zend.h>
#define WORK_LIST_INIT_SIZE 128
/* libxml2 reserves 2 pointer-sized words for interned strings */
@@ -63,14 +64,20 @@ static unsigned short sanitize_line_nr(size_t line)
return (unsigned short) line;
}
static const php_dom_ns_magic_token *get_libxml_namespace_href(uintptr_t lexbor_namespace)
struct lxml_ns {
const php_dom_ns_magic_token *token;
const char *href;
size_t href_len;
};
static struct lxml_ns get_libxml_namespace_href(uintptr_t lexbor_namespace)
{
if (lexbor_namespace == LXB_NS_SVG) {
return php_dom_ns_is_svg_magic_token;
return (struct lxml_ns) { php_dom_ns_is_svg_magic_token, ZEND_STRL(DOM_SVG_NS_URI) };
} else if (lexbor_namespace == LXB_NS_MATH) {
return php_dom_ns_is_mathml_magic_token;
return (struct lxml_ns) { php_dom_ns_is_mathml_magic_token, ZEND_STRL(DOM_MATHML_NS_URI) };
} else {
return php_dom_ns_is_html_magic_token;
return (struct lxml_ns) { php_dom_ns_is_html_magic_token, ZEND_STRL(DOM_XHTML_NS_URI) };
}
}
@@ -102,11 +109,12 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
xmlNodePtr root,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
php_dom_private_data *private_data
)
{
lexbor_libxml2_bridge_status retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OK;
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
xmlNsPtr html_ns = php_dom_libxml_ns_mapper_ensure_html_ns(ns_mapper);
xmlNsPtr xlink_ns = NULL;
xmlNsPtr prefixed_xmlns_ns = NULL;
@@ -146,24 +154,47 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert(
if (entering_namespace == LXB_NS_HTML) {
current_lxml_ns = html_ns;
} else {
const php_dom_ns_magic_token *magic_token = get_libxml_namespace_href(entering_namespace);
zend_string *uri = zend_string_init((char *) magic_token, strlen((char *) magic_token), false);
struct lxml_ns ns = get_libxml_namespace_href(entering_namespace);
zend_string *uri = zend_string_init(ns.href, ns.href_len, false);
current_lxml_ns = php_dom_libxml_ns_mapper_get_ns(ns_mapper, NULL, uri);
zend_string_release_ex(uri, false);
if (EXPECTED(current_lxml_ns != NULL)) {
current_lxml_ns->_private = (void *) magic_token;
current_lxml_ns->_private = (void *) ns.token;
}
}
}
/* Instead of xmlSetNs() because we know the arguments are valid. Prevents overhead. */
lxml_element->ns = current_lxml_ns;
for (lxb_dom_node_t *child_node = element->node.last_child; child_node != NULL; child_node = child_node->prev) {
/* Handle template element by creating a fragment node to contain its children.
* Other types of nodes contain their children directly. */
xmlNodePtr lxml_child_parent = lxml_element;
lxb_dom_node_t *child_node = element->node.last_child;
if (lxb_html_tree_node_is(&element->node, LXB_TAG_TEMPLATE)) {
if (create_default_ns) {
lxml_child_parent = xmlNewDocFragment(lxml_doc);
if (UNEXPECTED(lxml_child_parent == NULL)) {
retval = LEXBOR_LIBXML2_BRIDGE_STATUS_OOM;
break;
}
lxml_child_parent->parent = lxml_element;
dom_add_element_ns_hook(private_data, lxml_element);
php_dom_add_templated_content(private_data, lxml_element, lxml_child_parent);
}
lxb_html_template_element_t *template = lxb_html_interface_template(&element->node);
if (template->content != NULL) {
child_node = template->content->node.last_child;
}
}
for (; child_node != NULL; child_node = child_node->prev) {
lexbor_libxml2_bridge_work_list_item_push(
&work_list,
child_node,
entering_namespace,
lxml_element,
lxml_child_parent,
current_lxml_ns
);
}
@@ -307,7 +338,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
xmlDocPtr *doc_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
php_dom_private_data *private_data
)
{
xmlDocPtr lxml_doc = php_dom_create_html_doc();
@@ -320,7 +351,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
(xmlNodePtr) lxml_doc,
compact_text_nodes,
create_default_ns,
ns_mapper
private_data
);
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
xmlFreeDoc(lxml_doc);
@@ -336,7 +367,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
xmlNodePtr *fragment_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
php_dom_private_data *private_data
)
{
xmlNodePtr fragment = xmlNewDocFragment(lxml_doc);
@@ -349,7 +380,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
fragment,
compact_text_nodes,
create_default_ns,
ns_mapper
private_data
);
if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) {
xmlFreeNode(fragment);

View File

@@ -71,7 +71,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document(
xmlDocPtr *doc_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
php_dom_private_data *private_data
);
lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
lxb_dom_node_t *start_node,
@@ -79,7 +79,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment(
xmlNodePtr *fragment_out,
bool compact_text_nodes,
bool create_default_ns,
php_dom_libxml_ns_mapper *ns_mapper
php_dom_private_data *private_data
);
void lexbor_libxml2_bridge_report_errors(
const lexbor_libxml2_bridge_parse_context *ctx,

View File

@@ -289,9 +289,13 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
case XML_ELEMENT_NODE: {
TRY(dom_html5_serialize_element_start(ctx, node));
if (node->children) {
const xmlNode *children = node->children;
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token) && xmlStrEqual(node->name, BAD_CAST "template")) {
children = php_dom_retrieve_templated_content(ctx->private_data, node);
}
if (children) {
if (!dom_html5_serializes_as_void(node)) {
node = node->children;
node = children;
continue;
}
} else {
@@ -301,6 +305,14 @@ static zend_result dom_html5_serialize_node(dom_html5_serialize_context *ctx, co
break;
}
case XML_DOCUMENT_FRAG_NODE: {
if (node->children) {
node = node->children;
continue;
}
break;
}
/* Only exists for compatibility with XML and old DOM. */
case XML_ENTITY_REF_NODE: {
TRY(dom_html5_serialize_entity_ref(ctx, node));
@@ -346,10 +358,15 @@ zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode
}
/* Step 2 not needed because we're not using a string to store the serialized data */
/* Step 3 not needed because we don't support template contents yet */
/* Step 3. If the node is a template element, then let the node instead be the template element's template contents (a DocumentFragment node). */
xmlNodePtr children = php_dom_retrieve_templated_content(ctx->private_data, node);
if (!children) {
children = node->children;
}
/* Step 4 */
return dom_html5_serialize_node(ctx, node->children, node);
return dom_html5_serialize_node(ctx, children, node);
}
/* Variant on the above that is equivalent to the "outer HTML". */

View File

@@ -19,11 +19,13 @@
#include <Zend/zend_types.h>
#include <libxml/tree.h>
#include "private_data.h"
typedef struct {
zend_result (*write_string)(void *application_data, const char *buf);
zend_result (*write_string_len)(void *application_data, const char *buf, size_t len);
void *application_data;
php_dom_private_data *private_data;
} dom_html5_serialize_context;
zend_result dom_html5_serialize(dom_html5_serialize_context *ctx, const xmlNode *node);

View File

@@ -25,6 +25,7 @@
#include "html5_parser.h"
#include "html5_serializer.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "dom_properties.h"
#include <Zend/zend_smart_string.h>
#include <lexbor/html/encoding.h>
@@ -879,7 +880,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
}
php_dom_private_data *private_data = php_dom_private_data_create();
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
xmlDocPtr lxml_doc;
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
@@ -887,7 +887,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromString)
&lxml_doc,
options & XML_PARSE_COMPACT,
!(options & DOM_HTML_NO_DEFAULT_NS),
ns_mapper
private_data
);
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1071,7 +1071,6 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
}
private_data = php_dom_private_data_create();
php_dom_libxml_ns_mapper *ns_mapper = php_dom_ns_mapper_from_private(private_data);
xmlDocPtr lxml_doc;
lexbor_libxml2_bridge_status bridge_status = lexbor_libxml2_bridge_convert_document(
@@ -1079,7 +1078,7 @@ PHP_METHOD(Dom_HTMLDocument, createFromFile)
&lxml_doc,
options & XML_PARSE_COMPACT,
!(options & DOM_HTML_NO_DEFAULT_NS),
ns_mapper
private_data
);
lexbor_libxml2_bridge_copy_observations(parser->tree, &ctx.observations);
if (UNEXPECTED(bridge_status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
@@ -1206,7 +1205,7 @@ static zend_result dom_saveHTML_write_string(void *application_data, const char
return dom_saveHTML_write_string_len(application_data, buf, strlen(buf));
}
static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *docp, const xmlNode *node)
static zend_result dom_common_save(dom_output_ctx *output_ctx, dom_object *intern, const xmlDoc *docp, const xmlNode *node)
{
/* Initialize everything related to encoding & decoding */
const lxb_encoding_data_t *decoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8);
@@ -1239,6 +1238,7 @@ static zend_result dom_common_save(dom_output_ctx *output_ctx, const xmlDoc *doc
ctx.write_string_len = dom_saveHTML_write_string_len;
ctx.write_string = dom_saveHTML_write_string;
ctx.application_data = output_ctx;
ctx.private_data = php_dom_get_private_data(intern);
if (UNEXPECTED(dom_html5_serialize_outer(&ctx, node) != SUCCESS)) {
return FAILURE;
}
@@ -1297,7 +1297,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtmlFile)
dom_output_ctx output_ctx;
output_ctx.output_data = stream;
output_ctx.write_output = dom_write_output_stream;
if (UNEXPECTED(dom_common_save(&output_ctx, docp, (const xmlNode *) docp) != SUCCESS)) {
if (UNEXPECTED(dom_common_save(&output_ctx, intern, docp, (const xmlNode *) docp) != SUCCESS)) {
php_stream_close(stream);
RETURN_FALSE;
}
@@ -1336,7 +1336,7 @@ PHP_METHOD(Dom_HTMLDocument, saveHtml)
output_ctx.output_data = &buf;
output_ctx.write_output = dom_write_output_smart_str;
/* Can't fail because dom_write_output_smart_str() can't fail. */
zend_result result = dom_common_save(&output_ctx, docp, node);
zend_result result = dom_common_save(&output_ctx, intern, docp, node);
ZEND_ASSERT(result == SUCCESS);
RETURN_STR(smart_str_extract(&buf));
@@ -1644,4 +1644,19 @@ zend_result dom_html_document_title_write(dom_object *obj, zval *newval)
return SUCCESS;
}
#if ZEND_DEBUG
PHP_METHOD(Dom_HTMLDocument, debugGetTemplateCount)
{
xmlDocPtr doc;
dom_object *intern;
ZEND_PARSE_PARAMETERS_NONE();
DOM_GET_OBJ(doc, ZEND_THIS, xmlDocPtr, intern);
ZEND_IGNORE_VALUE(doc);
RETURN_LONG((zend_long) php_dom_get_template_count((const php_dom_private_data *) intern->document->private_data));
}
#endif
#endif /* HAVE_LIBXML && HAVE_DOM */

View File

@@ -68,6 +68,7 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
if (context_document->type == XML_HTML_DOCUMENT_NODE) {
smart_str output = {0};
dom_html5_serialize_context ctx;
ctx.private_data = php_dom_get_private_data(obj);
ctx.application_data = &output;
ctx.write_string = dom_inner_html_write_string;
ctx.write_string_len = dom_inner_html_write_string_len;
@@ -86,11 +87,12 @@ zend_result dom_element_inner_html_read(dom_object *obj, zval *retval)
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8");
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) {
php_dom_private_data *private_data = php_dom_get_private_data(obj);
/* Note: the innerHTML mixin sets the well-formed flag to true. */
xmlNodePtr child = node->children;
status = 0;
while (child != NULL && status == 0) {
status = dom_xml_serialize(ctxt, out, child, false, true);
status = dom_xml_serialize(ctxt, out, child, false, true, private_data);
child = child->next;
}
status |= xmlOutputBufferFlush(out);
@@ -205,7 +207,7 @@ static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePt
xmlNodePtr fragment = NULL;
if (node != NULL) {
/* node->last_child could be NULL, but that is allowed. */
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_ns_mapper(obj));
lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_private_data(obj));
if (UNEXPECTED(status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) {
php_dom_throw_error(INVALID_STATE_ERR, true);
}
@@ -349,6 +351,14 @@ zend_result dom_element_inner_html_write(dom_object *obj, zval *newval)
return FAILURE;
}
if (php_dom_ns_is_fast(context_node, php_dom_ns_is_html_magic_token) && xmlStrEqual(context_node->name, BAD_CAST "template")) {
context_node = php_dom_ensure_templated_content(php_dom_get_private_data(obj), context_node);
if (context_node == NULL) {
xmlFreeNode(fragment);
return FAILURE;
}
}
/* We skip the steps involving the template element as context node since we don't do special handling for that. */
dom_remove_all_children(context_node);
return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE;

View File

@@ -89,4 +89,12 @@ static zend_always_inline bool dom_is_document_cache_modified_since_parsing(php_
return !doc_ptr || doc_ptr->cache_tag.modification_nr > dom_minimum_modification_nr_since_parsing(doc_ptr);
}
static zend_always_inline zend_long dom_mangle_pointer_for_key(const void *ptr)
{
zend_ulong value = (zend_ulong) (uintptr_t) ptr;
/* Rotate 3/4 bits for better hash distribution because the low 3/4 bits are normally 0. */
const size_t rol_amount = (SIZEOF_ZEND_LONG == 8) ? 4 : 3;
return (value >> rol_amount) | (value << (sizeof(value) * 8 - rol_amount));
}
#endif

View File

@@ -22,27 +22,25 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_html_magic_token = (const php_dom_ns_magic_token *) DOM_XHTML_NS_URI;
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_mathml_magic_token = (const php_dom_ns_magic_token *) DOM_MATHML_NS_URI;
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_svg_magic_token = (const php_dom_ns_magic_token *) DOM_SVG_NS_URI;
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_xlink_magic_token = (const php_dom_ns_magic_token *) DOM_XLINK_NS_URI;
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_xml_magic_token = (const php_dom_ns_magic_token *) DOM_XML_NS_URI;
PHP_DOM_EXPORT const php_dom_ns_magic_token *php_dom_ns_is_xmlns_magic_token = (const php_dom_ns_magic_token *) DOM_XMLNS_NS_URI;
struct php_dom_libxml_ns_mapper {
/* This is used almost all the time for HTML documents, so it makes sense to cache this. */
xmlNsPtr html_ns;
/* Used for every prefixless namespace declaration in XML, so also very common. */
xmlNsPtr prefixless_xmlns_ns;
HashTable uri_to_prefix_map;
};
typedef struct php_dom_private_data {
php_libxml_private_data_header header;
struct php_dom_libxml_ns_mapper ns_mapper;
} php_dom_private_data;
/* The actual value of these doesn't matter as long as they serve as a unique ID.
* They need to be pointers because the `_private` field is a pointer, however we can choose the contents ourselves.
* We need keep these at least 4-byte aligned because the pointer may be tagged (although for now 2 byte alignment works too).
* We use a trick: we declare a struct with a double member to force the alignment. */
#define DECLARE_NS_TOKEN(name, uri) \
static const struct { \
char val[sizeof(uri)]; \
double align; \
} decl_##name = { uri, 0.0 }; \
PHP_DOM_EXPORT const php_dom_ns_magic_token *(name) = (const php_dom_ns_magic_token *) &decl_##name;
DECLARE_NS_TOKEN(php_dom_ns_is_html_magic_token, DOM_XHTML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_mathml_magic_token, DOM_MATHML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_svg_magic_token, DOM_SVG_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xlink_magic_token, DOM_XLINK_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xml_magic_token, DOM_XML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xmlns_magic_token, DOM_XMLNS_NS_URI);
static void php_dom_libxml_ns_mapper_prefix_map_element_dtor(zval *zv)
{
@@ -73,27 +71,6 @@ static HashTable *php_dom_libxml_ns_mapper_ensure_prefix_map(php_dom_libxml_ns_m
return prefix_map;
}
static void php_dom_libxml_private_data_destroy(php_libxml_private_data_header *header)
{
php_dom_private_data_destroy((php_dom_private_data *) header);
}
PHP_DOM_EXPORT php_dom_private_data *php_dom_private_data_create(void)
{
php_dom_private_data *mapper = emalloc(sizeof(*mapper));
mapper->header.dtor = php_dom_libxml_private_data_destroy;
mapper->ns_mapper.html_ns = NULL;
mapper->ns_mapper.prefixless_xmlns_ns = NULL;
zend_hash_init(&mapper->ns_mapper.uri_to_prefix_map, 0, NULL, ZVAL_PTR_DTOR, false);
return mapper;
}
void php_dom_private_data_destroy(php_dom_private_data *data)
{
zend_hash_destroy(&data->ns_mapper.uri_to_prefix_map);
efree(data);
}
static xmlNsPtr php_dom_libxml_ns_mapper_ensure_cached_ns(php_dom_libxml_ns_mapper *mapper, xmlNsPtr *ptr, const char *uri, size_t length, const php_dom_ns_magic_token *token)
{
if (EXPECTED(*ptr != NULL)) {
@@ -233,21 +210,6 @@ static xmlNsPtr php_dom_libxml_ns_mapper_store_and_normalize_parsed_ns(php_dom_l
return ns;
}
PHP_DOM_EXPORT php_libxml_private_data_header *php_dom_libxml_private_data_header(php_dom_private_data *private_data)
{
return private_data == NULL ? NULL : &private_data->header;
}
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_ns_mapper_from_private(php_dom_private_data *private_data)
{
return private_data == NULL ? NULL : &private_data->ns_mapper;
}
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_get_ns_mapper(dom_object *object)
{
return &php_dom_get_private_data(object)->ns_mapper;
}
typedef struct {
/* Fast lookup for created mappings. */
HashTable old_ns_to_new_ns_ptr;
@@ -257,6 +219,11 @@ typedef struct {
php_dom_libxml_ns_mapper *ns_mapper;
} dom_libxml_reconcile_ctx;
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_get_ns_mapper(dom_object *object)
{
return &php_dom_get_private_data(object)->ns_mapper;
}
PHP_DOM_EXPORT xmlAttrPtr php_dom_ns_compat_mark_attribute(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node, xmlNsPtr ns)
{
xmlNsPtr xmlns_ns;
@@ -316,13 +283,16 @@ PHP_DOM_EXPORT bool php_dom_ns_is_fast_ex(xmlNsPtr ns, const php_dom_ns_magic_to
/* cached for fast checking */
if (ns->_private == magic_token) {
return true;
} else if (ns->_private != NULL) {
} else if (ns->_private != NULL && ((uintptr_t) ns->_private & 1) == 0) {
/* Other token stored */
return false;
}
/* Slow path */
if (xmlStrEqual(ns->href, BAD_CAST magic_token)) {
ns->_private = (void *) magic_token;
if (ns->_private == NULL) {
/* Only overwrite the private data if there is no other token stored. */
ns->_private = (void *) magic_token;
}
return true;
}
return false;
@@ -366,14 +336,6 @@ PHP_DOM_EXPORT void php_dom_reconcile_attribute_namespace_after_insertion(xmlAtt
}
}
static zend_always_inline zend_long dom_mangle_pointer_for_key(void *ptr)
{
zend_ulong value = (zend_ulong) (uintptr_t) ptr;
/* Rotate 3/4 bits for better hash distribution because the low 3/4 bits are normally 0. */
const size_t rol_amount = (SIZEOF_ZEND_LONG == 8) ? 4 : 3;
return (value >> rol_amount) | (value << (sizeof(value) * 8 - rol_amount));
}
static zend_always_inline void php_dom_libxml_reconcile_modern_single_node(dom_libxml_reconcile_ctx *ctx, xmlNodePtr node)
{
ZEND_ASSERT(node->ns != NULL);

View File

@@ -33,12 +33,6 @@ typedef struct php_dom_ns_magic_token php_dom_ns_magic_token;
struct php_dom_libxml_ns_mapper;
typedef struct php_dom_libxml_ns_mapper php_dom_libxml_ns_mapper;
struct php_dom_private_data;
typedef struct php_dom_private_data php_dom_private_data;
typedef struct php_libxml_private_data_header php_libxml_private_data_header;
struct php_libxml_private_data_header;
PHP_DOM_EXPORT extern const php_dom_ns_magic_token *php_dom_ns_is_html_magic_token;
PHP_DOM_EXPORT extern const php_dom_ns_magic_token *php_dom_ns_is_mathml_magic_token;
PHP_DOM_EXPORT extern const php_dom_ns_magic_token *php_dom_ns_is_svg_magic_token;
@@ -49,17 +43,13 @@ PHP_DOM_EXPORT extern const php_dom_ns_magic_token *php_dom_ns_is_xmlns_magic_to
/* These functions make it possible to make a namespace declaration also visible as an attribute by
* creating an equivalent attribute node. */
PHP_DOM_EXPORT php_dom_private_data *php_dom_private_data_create(void);
PHP_DOM_EXPORT void php_dom_private_data_destroy(php_dom_private_data *data);
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_html_ns(php_dom_libxml_ns_mapper *mapper);
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_prefixless_xmlns_ns(php_dom_libxml_ns_mapper *mapper);
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns(php_dom_libxml_ns_mapper *mapper, zend_string *prefix, zend_string *uri);
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(php_dom_libxml_ns_mapper *mapper, const xmlChar *prefix, size_t prefix_len, zend_string *uri);
PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(php_dom_libxml_ns_mapper *mapper, const char *prefix, const char *uri);
PHP_DOM_EXPORT php_libxml_private_data_header *php_dom_libxml_private_data_header(php_dom_private_data *private_data);
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_get_ns_mapper(dom_object *object);
PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_ns_mapper_from_private(php_dom_private_data *private_data);
PHP_DOM_EXPORT void php_dom_ns_compat_mark_attribute_list(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node);
PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node);
PHP_DOM_EXPORT void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp);

View File

@@ -23,6 +23,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"
#include "dom_properties.h"

View File

@@ -27,6 +27,7 @@
#include "nodelist.h"
#include "html_collection.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"
#include "php_dom_arginfo.h"
#include "dom_properties.h"
@@ -1389,8 +1390,11 @@ void dom_objects_free_storage(zend_object *object)
zend_object_std_dtor(&intern->std);
if (intern->ptr != NULL && ((php_libxml_node_ptr *)intern->ptr)->node != NULL) {
if (((xmlNodePtr) ((php_libxml_node_ptr *)intern->ptr)->node)->type != XML_DOCUMENT_NODE && ((xmlNodePtr) ((php_libxml_node_ptr *)intern->ptr)->node)->type != XML_HTML_DOCUMENT_NODE) {
php_libxml_node_ptr *ptr = intern->ptr;
if (ptr != NULL && ptr->node != NULL) {
xmlNodePtr node = ptr->node;
if (node->type != XML_DOCUMENT_NODE && node->type != XML_HTML_DOCUMENT_NODE) {
php_libxml_node_decrement_resource((php_libxml_node_object *) intern);
} else {
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);

View File

@@ -1643,6 +1643,10 @@ namespace Dom
public function saveHtml(?Node $node = null): string {}
public function saveHtmlFile(string $filename): int|false {}
#if ZEND_DEBUG
public function debugGetTemplateCount(): int {}
#endif
}
final class XMLDocument extends Document

View File

@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 1af73c3b63ebeb5e59948990892dcf6b627a1671 */
* Stub hash: 9a1e6842b2c5b891e11087d40aa8c9f56a2269a3 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -1059,6 +1059,11 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_Dom_HTMLDocument_saveHtmlF
ZEND_ARG_TYPE_INFO(0, filename, IS_STRING, 0)
ZEND_END_ARG_INFO()
#if ZEND_DEBUG
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_Dom_HTMLDocument_debugGetTemplateCount, 0, 0, IS_LONG, 0)
ZEND_END_ARG_INFO()
#endif
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_XMLDocument_createEmpty, 0, 0, Dom\\XMLDocument, 0)
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, version, IS_STRING, 0, "\"1.0\"")
ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"")
@@ -1367,6 +1372,9 @@ ZEND_METHOD(Dom_HTMLDocument, createFromString);
ZEND_METHOD(Dom_XMLDocument, saveXml);
ZEND_METHOD(Dom_HTMLDocument, saveHtml);
ZEND_METHOD(Dom_HTMLDocument, saveHtmlFile);
#if ZEND_DEBUG
ZEND_METHOD(Dom_HTMLDocument, debugGetTemplateCount);
#endif
ZEND_METHOD(Dom_XMLDocument, createEmpty);
ZEND_METHOD(Dom_XMLDocument, createFromFile);
ZEND_METHOD(Dom_XMLDocument, createFromString);
@@ -1885,6 +1893,9 @@ static const zend_function_entry class_Dom_HTMLDocument_methods[] = {
ZEND_RAW_FENTRY("saveXmlFile", zim_DOMDocument_save, arginfo_class_Dom_HTMLDocument_saveXmlFile, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_ME(Dom_HTMLDocument, saveHtml, arginfo_class_Dom_HTMLDocument_saveHtml, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_HTMLDocument, saveHtmlFile, arginfo_class_Dom_HTMLDocument_saveHtmlFile, ZEND_ACC_PUBLIC)
#if ZEND_DEBUG
ZEND_ME(Dom_HTMLDocument, debugGetTemplateCount, arginfo_class_Dom_HTMLDocument_debugGetTemplateCount, ZEND_ACC_PUBLIC)
#endif
ZEND_FE_END
};

169
ext/dom/private_data.c Normal file
View File

@@ -0,0 +1,169 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "private_data.h"
#include "internal_helpers.h"
static void php_dom_libxml_private_data_destroy(php_libxml_private_data_header *header)
{
php_dom_private_data_destroy((php_dom_private_data *) header);
}
static void php_dom_libxml_private_data_ns_hook(php_libxml_private_data_header *header, xmlNodePtr node)
{
php_dom_remove_templated_content((php_dom_private_data *) header, node);
}
php_libxml_private_data_header *php_dom_libxml_private_data_header(php_dom_private_data *private_data)
{
return private_data == NULL ? NULL : &private_data->header;
}
php_dom_libxml_ns_mapper *php_dom_ns_mapper_from_private(php_dom_private_data *private_data)
{
return private_data == NULL ? NULL : &private_data->ns_mapper;
}
php_dom_private_data *php_dom_private_data_create(void)
{
php_dom_private_data *private_data = emalloc(sizeof(*private_data));
private_data->header.dtor = php_dom_libxml_private_data_destroy;
private_data->header.ns_hook = php_dom_libxml_private_data_ns_hook;
private_data->ns_mapper.html_ns = NULL;
private_data->ns_mapper.prefixless_xmlns_ns = NULL;
zend_hash_init(&private_data->ns_mapper.uri_to_prefix_map, 0, NULL, ZVAL_PTR_DTOR, false);
private_data->template_fragments = NULL;
return private_data;
}
void php_dom_private_data_destroy(php_dom_private_data *data)
{
zend_hash_destroy(&data->ns_mapper.uri_to_prefix_map);
if (data->template_fragments != NULL) {
xmlNodePtr node;
ZEND_HASH_MAP_FOREACH_PTR(data->template_fragments, node) {
xmlFreeNode(node);
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(data->template_fragments);
FREE_HASHTABLE(data->template_fragments);
}
efree(data);
}
static void php_dom_free_templated_content(php_dom_private_data *private_data, xmlNodePtr base)
{
/* Note: it's not possible to obtain a userland reference to these yet, so we can just free them without worrying
* about their proxies.
* Note 2: it's possible to have nested template content. */
if (zend_hash_num_elements(private_data->template_fragments) > 0) {
/* There's more templated content, try to free it. */
xmlNodePtr current = base->children;
while (current != NULL) {
if (current->type == XML_ELEMENT_NODE) {
php_dom_remove_templated_content(private_data, current);
}
current = php_dom_next_in_tree_order(current, base);
}
}
xmlFreeNode(base);
}
void php_dom_add_templated_content(php_dom_private_data *private_data, const xmlNode *template_node, xmlNodePtr fragment)
{
if (private_data->template_fragments == NULL) {
ALLOC_HASHTABLE(private_data->template_fragments);
zend_hash_init(private_data->template_fragments, 0, NULL, NULL, false);
zend_hash_real_init_mixed(private_data->template_fragments);
}
zend_hash_index_add_new_ptr(private_data->template_fragments, dom_mangle_pointer_for_key(template_node), fragment);
}
xmlNodePtr php_dom_retrieve_templated_content(php_dom_private_data *private_data, const xmlNode *template_node)
{
if (private_data->template_fragments == NULL) {
return NULL;
}
return zend_hash_index_find_ptr(private_data->template_fragments, dom_mangle_pointer_for_key(template_node));
}
xmlNodePtr php_dom_ensure_templated_content(php_dom_private_data *private_data, xmlNodePtr template_node)
{
xmlNodePtr result = php_dom_retrieve_templated_content(private_data, template_node);
if (result == NULL) {
result = xmlNewDocFragment(template_node->doc);
if (EXPECTED(result != NULL)) {
result->parent = template_node;
dom_add_element_ns_hook(private_data, template_node);
php_dom_add_templated_content(private_data, template_node, result);
}
}
return result;
}
void php_dom_remove_templated_content(php_dom_private_data *private_data, const xmlNode *template_node)
{
if (private_data->template_fragments != NULL) {
/* Deletion needs to be done not via a destructor because we can't access private_data from there. */
zval *zv = zend_hash_index_find(private_data->template_fragments, dom_mangle_pointer_for_key(template_node));
if (zv != NULL) {
xmlNodePtr node = Z_PTR_P(zv);
ZEND_ASSERT(offsetof(Bucket, val) == 0 && "Type cast only works if this is true");
Bucket* bucket = (Bucket*) zv;
/* First remove it from the bucket before freeing the content, otherwise recursion could make the bucket
* pointer invalid due to hash table structure changes. */
zend_hash_del_bucket(private_data->template_fragments, bucket);
php_dom_free_templated_content(private_data, node);
}
}
}
uint32_t php_dom_get_template_count(const php_dom_private_data *private_data)
{
if (private_data->template_fragments != NULL) {
return zend_hash_num_elements(private_data->template_fragments);
} else {
return 0;
}
}
void dom_add_element_ns_hook(php_dom_private_data *private_data, xmlNodePtr element)
{
xmlNsPtr ns = pemalloc(sizeof(*ns), true);
/* The private data is a tagged data structure where only tag 1 is defined by ext/libxml to register a hook. */
memset(ns, 0, sizeof(*ns));
ns->prefix = xmlStrdup(element->ns->prefix);
ns->href = xmlStrdup(element->ns->href);
ns->type = XML_LOCAL_NAMESPACE;
ns->_private = (void *) ((uintptr_t) private_data | LIBXML_NS_TAG_HOOK);
element->ns = ns;
php_libxml_set_old_ns(element->doc, ns);
}
#endif /* HAVE_LIBXML && HAVE_DOM */

56
ext/dom/private_data.h Normal file
View File

@@ -0,0 +1,56 @@
/*
+----------------------------------------------------------------------+
| Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Niels Dossche <nielsdos@php.net> |
+----------------------------------------------------------------------+
*/
#ifndef PRIVATE_DATA_H
#define PRIVATE_DATA_H
#include "xml_common.h"
struct php_dom_libxml_ns_mapper {
/* This is used almost all the time for HTML documents, so it makes sense to cache this. */
xmlNsPtr html_ns;
/* Used for every prefixless namespace declaration in XML, so also very common. */
xmlNsPtr prefixless_xmlns_ns;
HashTable uri_to_prefix_map;
};
typedef struct php_dom_private_data {
php_libxml_private_data_header header;
struct php_dom_libxml_ns_mapper ns_mapper;
HashTable *template_fragments;
} php_dom_private_data;
typedef struct php_libxml_private_data_header php_libxml_private_data_header;
struct php_libxml_private_data_header;
struct php_dom_private_data;
typedef struct php_dom_private_data php_dom_private_data;
struct php_dom_libxml_ns_mapper;
typedef struct php_dom_libxml_ns_mapper php_dom_libxml_ns_mapper;
php_libxml_private_data_header *php_dom_libxml_private_data_header(php_dom_private_data *private_data);
php_dom_libxml_ns_mapper *php_dom_ns_mapper_from_private(php_dom_private_data *private_data);
php_dom_private_data *php_dom_private_data_create(void);
void php_dom_private_data_destroy(php_dom_private_data *data);
void php_dom_add_templated_content(php_dom_private_data *private_data, const xmlNode *template_node, xmlNodePtr fragment);
xmlNodePtr php_dom_retrieve_templated_content(php_dom_private_data *private_data, const xmlNode *template_node);
xmlNodePtr php_dom_ensure_templated_content(php_dom_private_data *private_data, xmlNodePtr template_node);
void php_dom_remove_templated_content(php_dom_private_data *private_data, const xmlNode *template_node);
uint32_t php_dom_get_template_count(const php_dom_private_data *private_data);
void dom_add_element_ns_hook(php_dom_private_data *private_data, xmlNodePtr element);
#endif

View File

@@ -0,0 +1,14 @@
--TEST--
Template cloning
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString('<template>x</template>', LIBXML_NOERROR);
$a = $dom->head->firstChild->cloneNode(false);
echo $dom->saveXML($a), "\n";
echo $dom->saveHTML($a), "\n";
?>
--EXPECT--
<template xmlns="http://www.w3.org/1999/xhtml"></template>
<template></template>

View File

@@ -0,0 +1,22 @@
--TEST--
template content indirect removal
--EXTENSIONS--
dom
--SKIPIF--
<?php
if (!PHP_DEBUG) { die ("skip only for debug build"); }
?>
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString('<template>foo<template>nested</template></template>', LIBXML_NOERROR);
$head = $dom->head;
var_dump($dom->debugGetTemplateCount());
$head->remove();
var_dump($dom->debugGetTemplateCount());
unset($head);
var_dump($dom->debugGetTemplateCount());
?>
--EXPECT--
int(2)
int(2)
int(0)

View File

@@ -0,0 +1,36 @@
--TEST--
<template> element manual creation
--EXTENSIONS--
dom
--FILE--
<?php
echo "=== After creation ===\n";
$dom = Dom\HTMLDocument::createEmpty();
$template = $dom->appendChild($dom->createElement("template"));
var_dump($template->innerHTML);
echo $dom->saveXML(), "\n";
echo $dom->saveHTML(), "\n";
echo "=== After setting content ===\n";
$template->innerHTML = "<p>hello</template></p>";
var_dump($template->innerHTML);
var_dump($template->firstChild);
echo $dom->saveXML(), "\n";
echo $dom->saveHTML(), "\n";
?>
--EXPECT--
=== After creation ===
string(0) ""
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template xmlns="http://www.w3.org/1999/xhtml"></template>
<template></template>
=== After setting content ===
string(12) "<p>hello</p>"
NULL
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<template xmlns="http://www.w3.org/1999/xhtml"><p>hello</p></template>
<template><p>hello</p></template>

View File

@@ -0,0 +1,29 @@
--TEST--
<template> element nesting
--EXTENSIONS--
dom
--FILE--
<?php
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<template>foo<template>bar</template></template>
</body>
</html>
HTML;
$dom = Dom\HTMLDocument::createFromString($html);
$template = $dom->body->firstElementChild;
var_dump($template->innerHTML);
echo $dom->saveXML();
?>
--EXPECT--
string(27) "foo<template>bar</template>"
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>
<template>foo<template>bar</template></template>
</body></html>

View File

@@ -0,0 +1,24 @@
--TEST--
<template> element no default namespace
--EXTENSIONS--
dom
--FILE--
<?php
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<template>a<div>foo</div>b</template>
</body>
</html>
HTML;
$dom = Dom\HTMLDocument::createFromString($html, Dom\HTML_NO_DEFAULT_NS);
$template = $dom->getElementsByTagName('template')[0];
var_dump($template->innerHTML);
var_dump($template->firstElementChild->tagName);
?>
--EXPECT--
string(16) "a<div>foo</div>b"
string(3) "div"

View File

@@ -0,0 +1,83 @@
--TEST--
<template> element contents do not participate in DOM
--EXTENSIONS--
dom
--FILE--
<?php
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<template>a<div>foo</div>b</template>
</body>
</html>
HTML;
$dom = Dom\HTMLDocument::createFromString($html);
$template = $dom->body->firstElementChild;
echo "=== Manipulation ===\n";
echo "First child of template: ";
var_dump($template->firstChild?->nodeName);
$template->append($dom->createElement('invisible'));
echo "First child of template after appending: ";
var_dump($template->firstChild->nodeName);
$template->innerHTML = $template->innerHTML;
echo "Inner HTML after idempotent modification: ";
var_dump($template->innerHTML);
echo "Selector should not find div element in shadow DOM: ";
var_dump($template->querySelector('div'));
echo "XPath should not find div element in shadow DOM:\n";
$xpath = new Dom\XPath($dom);
var_dump($xpath->query('//div'));
echo "=== HTML serialization ===\n";
echo $dom->saveHTML(), "\n";
echo "=== HTML serialization of <template> ===\n";
echo $dom->saveHTML($template), "\n";
echo "=== XML serialization ===\n";
echo $dom->saveXML(), "\n";
echo "=== XML serialization of <template> ===\n";
echo $dom->saveXML($template), "\n";
// Should not crash
$template->remove();
unset($template);
echo "=== Creating a new template should not leak the old contents ===\n";
$template = $dom->createElement('template');
var_dump($template->innerHTML);
?>
--EXPECT--
=== Manipulation ===
First child of template: NULL
First child of template after appending: string(9) "INVISIBLE"
Inner HTML after idempotent modification: string(16) "a<div>foo</div>b"
Selector should not find div element in shadow DOM: NULL
XPath should not find div element in shadow DOM:
object(Dom\NodeList)#4 (1) {
["length"]=>
int(0)
}
=== HTML serialization ===
<!DOCTYPE html><html><head></head><body>
<template>a<div>foo</div>b</template>
</body></html>
=== HTML serialization of <template> ===
<template>a<div>foo</div>b</template>
=== XML serialization ===
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>
<template>a<div>foo</div>b</template>
</body></html>
=== XML serialization of <template> ===
<template xmlns="http://www.w3.org/1999/xhtml">a<div>foo</div>b</template>
=== Creating a new template should not leak the old contents ===
string(0) ""

View File

@@ -0,0 +1,35 @@
--TEST--
<template> element renaming
--EXTENSIONS--
dom
--FILE--
<?php
$html = <<<HTML
<!DOCTYPE html>
<html>
<body>
<template>a<div>foo</div>b</template>
</body>
</html>
HTML;
$dom = Dom\HTMLDocument::createFromString($html);
$template = $dom->body->firstElementChild;
var_dump($template->innerHTML);
try {
$template->rename($template->namespaceURI, 'screwthis');
} catch (DOMException $e) {
echo $e->getMessage(), "\n";
}
// These shouldn't be changed!
var_dump($template->nodeName);
var_dump($template->innerHTML);
?>
--EXPECT--
string(16) "a<div>foo</div>b"
It is not possible to rename the template element because it hosts a document fragment
string(8) "TEMPLATE"
string(16) "a<div>foo</div>b"

View File

@@ -0,0 +1,30 @@
--TEST--
SimpleXML and template content
--EXTENSIONS--
dom
simplexml
--SKIPIF--
<?php
if (!PHP_DEBUG) { die ("skip only for debug build"); }
?>
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString('<template>foo<template>nested</template></template>', LIBXML_NOERROR);
$head = $dom->head;
$head_sxe = simplexml_import_dom($head);
var_dump($head_sxe);
var_dump($dom->debugGetTemplateCount());
unset($head_sxe->template);
var_dump($head_sxe);
var_dump($dom->debugGetTemplateCount());
?>
--EXPECTF--
object(SimpleXMLElement)#%d (1) {
["template"]=>
object(SimpleXMLElement)#%d (0) {
}
}
int(2)
object(SimpleXMLElement)#%d (0) {
}
int(0)

View File

@@ -22,6 +22,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "xml_serializer.h"
#include <libxml/xmlsave.h>
@@ -259,6 +260,12 @@ static int php_new_dom_write_smart_str(void *context, const char *buffer, int le
return len;
}
static php_dom_private_data *get_private_data_from_node(xmlNodePtr node)
{
dom_object *intern = php_dom_object_get_data(node);
return intern != NULL ? php_dom_get_private_data(intern) : NULL;
}
static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options, bool format, const char *encoding)
{
smart_str str = {0};
@@ -269,7 +276,7 @@ static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options
xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
if (EXPECTED(out != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format, false);
status = dom_xml_serialize(ctxt, out, node, format, false, get_private_data_from_node(node));
status |= xmlOutputBufferFlush(out);
status |= xmlOutputBufferClose(out);
} else {
@@ -310,7 +317,7 @@ zend_long php_new_dom_dump_node_to_file(const char *filename, xmlDocPtr doc, xml
int status = -1;
xmlSaveCtxtPtr ctxt = xmlSaveToIO(out->writecallback, NULL, stream, encoding, XML_SAVE_AS_XML);
if (EXPECTED(ctxt != NULL)) {
status = dom_xml_serialize(ctxt, out, node, format, false);
status = dom_xml_serialize(ctxt, out, node, format, false, get_private_data_from_node(node));
status |= xmlOutputBufferFlush(out);
(void) xmlSaveClose(ctxt);
}

View File

@@ -21,6 +21,7 @@
#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "xml_serializer.h"
#include "private_data.h"
#include "namespace_compat.h"
#include "serialize_common.h"
#include "internal_helpers.h"
@@ -69,6 +70,7 @@ typedef struct {
typedef struct dom_xml_serialize_ctx {
xmlSaveCtxtPtr ctxt;
xmlOutputBufferPtr out;
php_dom_private_data *private_data;
} dom_xml_serialize_ctx;
static int dom_xml_serialization_algorithm(
@@ -1128,16 +1130,26 @@ static int dom_xml_serialize_element_node(
/* 17. If the value of skip end tag is true, then return the value of markup and skip the remaining steps. */
if (!skip_end_tag) {
/* Step 18 deals with template elements which we don't support. */
if (should_format) {
indent++;
} else {
indent = -1;
}
/* 18. If ns is the HTML namespace, and the node's localName matches the string "template",
* then this is a template element.
* Append to markup the result of XML serializing a DocumentFragment node. */
xmlNodePtr child = NULL;
if (php_dom_ns_is_fast(element, php_dom_ns_is_html_magic_token) && xmlStrEqual(element->name, BAD_CAST "template")) {
if (ctx->private_data != NULL) {
child = php_dom_retrieve_templated_content(ctx->private_data, element);
}
} else {
child = element->children;
}
/* 19. Otherwise, append to markup the result of running the XML serialization algorithm on each of node's children. */
for (xmlNodePtr child = element->children; child != NULL; child = child->next) {
for (; child != NULL; child = child->next) {
if (should_format) {
TRY_OR_CLEANUP(dom_xml_output_indents(ctx->out, indent));
}
@@ -1281,7 +1293,7 @@ static int dom_xml_serialization_algorithm(
}
/* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization */
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed)
int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed, php_dom_private_data *private_data)
{
/* 1. Let namespace be a context namespace with value null. */
const xmlChar *namespace = NULL;
@@ -1300,6 +1312,7 @@ int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr no
dom_xml_serialize_ctx ctx;
ctx.out = out;
ctx.ctxt = ctxt;
ctx.private_data = private_data;
int indent = format ? 0 : -1;
int result = dom_xml_serialization_algorithm(&ctx, &namespace_prefix_map, node, namespace, &prefix_index, indent, require_well_formed);

View File

@@ -22,6 +22,9 @@
#include <libxml/xmlsave.h>
#include <libxml/xmlIO.h>
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed);
struct php_dom_private_data;
typedef struct php_dom_private_data php_dom_private_data;
int dom_xml_serialize(xmlSaveCtxtPtr ctx, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed, php_dom_private_data *private_data);
#endif

View File

@@ -23,6 +23,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#define PHP_DOM_XPATH_QUERY 0
#define PHP_DOM_XPATH_EVALUATE 1

View File

@@ -276,7 +276,12 @@ static void php_libxml_node_free(xmlNodePtr node)
xmlFreeDtd(dtd);
break;
}
case XML_ELEMENT_NODE:
case XML_ELEMENT_NODE: {
if (node->ns && (((uintptr_t) node->ns->_private) & 1) == LIBXML_NS_TAG_HOOK) {
/* Special destruction routine hook should be called because it belongs to a "special" namespace. */
php_libxml_private_data_header *header = (php_libxml_private_data_header *) (((uintptr_t) node->ns->_private) & ~1);
header->ns_hook(header, node);
}
if (node->nsDef && node->doc) {
/* Make the namespace declaration survive the destruction of the holding element.
* This prevents a use-after-free on the namespace declaration.
@@ -308,6 +313,7 @@ static void php_libxml_node_free(xmlNodePtr node)
}
xmlFreeNode(node);
break;
}
default:
xmlFreeNode(node);
break;
@@ -1369,6 +1375,9 @@ PHP_LIBXML_API int php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj *doc
{
int ret_refcount = --document->refcount;
if (ret_refcount == 0) {
if (document->private_data != NULL) {
document->private_data->dtor(document->private_data);
}
if (document->ptr != NULL) {
xmlFreeDoc((xmlDoc *) document->ptr);
}
@@ -1379,9 +1388,6 @@ PHP_LIBXML_API int php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj *doc
}
efree(document->doc_props);
}
if (document->private_data != NULL) {
document->private_data->dtor(document->private_data);
}
efree(document);
}

View File

@@ -39,6 +39,8 @@ extern zend_module_entry libxml_module_entry;
#define LIBXML_SAVE_NOEMPTYTAG 1<<2
#define LIBXML_NS_TAG_HOOK 1
ZEND_BEGIN_MODULE_GLOBALS(libxml)
zval stream_context;
smart_str error_buffer;
@@ -67,6 +69,7 @@ typedef struct {
typedef struct php_libxml_private_data_header {
void (*dtor)(struct php_libxml_private_data_header *);
void (*ns_hook)(struct php_libxml_private_data_header *, xmlNodePtr);
/* extra fields */
} php_libxml_private_data_header;