From 768900b180c8e07552b5a579a4b4d7a7a0d02c22 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:57:02 +0200 Subject: [PATCH] Implement Dom $innerHTML property --- ext/dom/config.m4 | 2 +- ext/dom/config.w32 | 2 +- ext/dom/dom_properties.h | 2 + ext/dom/html5_parser.c | 45 ++- ext/dom/html5_parser.h | 10 +- ext/dom/inner_html_mixin.c | 357 ++++++++++++++++++ ext/dom/namespace_compat.c | 4 +- ext/dom/namespace_compat.h | 2 +- ext/dom/node.c | 2 +- ext/dom/php_dom.c | 1 + ext/dom/php_dom.h | 2 + ext/dom/php_dom.stub.php | 2 + ext/dom/php_dom_arginfo.h | 8 +- .../modern/html/parser/Element_innerHTML.phpt | 65 ++++ .../html/serializer/Element_innerHTML.phpt | 21 ++ .../modern/xml/Element_innerHTML_reading.phpt | 65 ++++ .../xml/Element_innerHTML_reading_errors.phpt | 108 ++++++ .../modern/xml/Element_innerHTML_writing.phpt | 86 +++++ .../xml/Element_innerHTML_writing_errors.phpt | 47 +++ ...MLDocument_createFromFile_empty_input.phpt | 2 +- ext/dom/xml_document.c | 8 +- ext/dom/xml_serializer.c | 251 +++++++++--- ext/dom/xml_serializer.h | 2 +- ext/dom/xpath.c | 2 +- ext/libxml/libxml.c | 2 +- ext/libxml/php_libxml.h | 8 +- 26 files changed, 1025 insertions(+), 81 deletions(-) create mode 100644 ext/dom/inner_html_mixin.c create mode 100644 ext/dom/tests/modern/html/parser/Element_innerHTML.phpt create mode 100644 ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt create mode 100644 ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt create mode 100644 ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt create mode 100644 ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt create mode 100644 ext/dom/tests/modern/xml/Element_innerHTML_writing_errors.phpt diff --git a/ext/dom/config.m4 b/ext/dom/config.m4 index 4c5a5eb121d..e67d8dcbfe9 100644 --- a/ext/dom/config.m4 +++ b/ext/dom/config.m4 @@ -32,7 +32,7 @@ if test "$PHP_DOM" != "no"; then parentnode/tree.c parentnode/css_selectors.c \ processinginstruction.c cdatasection.c \ documentfragment.c domimplementation.c \ - element.c node.c characterdata.c \ + element.c node.c characterdata.c inner_html_mixin.c \ documenttype.c entity.c \ nodelist.c html_collection.c text.c comment.c \ entityreference.c \ diff --git a/ext/dom/config.w32 b/ext/dom/config.w32 index 87096903daa..16fe4b25306 100644 --- a/ext/dom/config.w32 +++ b/ext/dom/config.w32 @@ -10,7 +10,7 @@ if (PHP_DOM == "yes") { EXTENSION("dom", "php_dom.c attr.c document.c infra.c \ xml_document.c html_document.c xml_serializer.c html5_serializer.c html5_parser.c namespace_compat.c \ domexception.c processinginstruction.c \ - cdatasection.c documentfragment.c domimplementation.c element.c \ + cdatasection.c documentfragment.c domimplementation.c element.c inner_html_mixin.c \ node.c characterdata.c documenttype.c \ entity.c nodelist.c html_collection.c text.c comment.c \ entityreference.c \ diff --git a/ext/dom/dom_properties.h b/ext/dom/dom_properties.h index a9b6df87594..eb0bdb96f73 100644 --- a/ext/dom/dom_properties.h +++ b/ext/dom/dom_properties.h @@ -83,6 +83,8 @@ zend_result dom_element_class_name_write(dom_object *obj, zval *newval); zend_result dom_element_id_read(dom_object *obj, zval *retval); zend_result dom_element_id_write(dom_object *obj, zval *newval); zend_result dom_element_schema_type_info_read(dom_object *obj, zval *retval); +zend_result dom_element_inner_html_read(dom_object *obj, zval *retval); +zend_result dom_element_inner_html_write(dom_object *obj, zval *newval); /* entity properties */ zend_result dom_entity_public_id_read(dom_object *obj, zval *retval); diff --git a/ext/dom/html5_parser.c b/ext/dom/html5_parser.c index a8d75d56860..b4c3eadfdf8 100644 --- a/ext/dom/html5_parser.c +++ b/ext/dom/html5_parser.c @@ -99,6 +99,7 @@ static zend_always_inline xmlNodePtr lexbor_libxml2_bridge_new_text_node_fast(xm static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert( lxb_dom_node_t *start_node, xmlDocPtr lxml_doc, + xmlNodePtr root, bool compact_text_nodes, bool create_default_ns, php_dom_libxml_ns_mapper *ns_mapper @@ -114,7 +115,7 @@ static lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert( lexbor_array_obj_init(&work_list, WORK_LIST_INIT_SIZE, sizeof(work_list_item)); for (lxb_dom_node_t *node = start_node; node != NULL; node = node->prev) { - lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, (xmlNodePtr) lxml_doc, NULL); + lexbor_libxml2_bridge_work_list_item_push(&work_list, node, LXB_NS__UNDEF, root, NULL); } work_list_item *current_stack_item; @@ -316,6 +317,7 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document( lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert( lxb_dom_interface_node(document)->last_child, lxml_doc, + (xmlNodePtr) lxml_doc, compact_text_nodes, create_default_ns, ns_mapper @@ -328,6 +330,35 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document( return LEXBOR_LIBXML2_BRIDGE_STATUS_OK; } +lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment( + lxb_dom_node_t *start_node, + xmlDocPtr lxml_doc, + xmlNodePtr *fragment_out, + bool compact_text_nodes, + bool create_default_ns, + php_dom_libxml_ns_mapper *ns_mapper +) +{ + xmlNodePtr fragment = xmlNewDocFragment(lxml_doc); + if (UNEXPECTED(fragment == NULL)) { + return LEXBOR_LIBXML2_BRIDGE_STATUS_OOM; + } + lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert( + start_node, + lxml_doc, + fragment, + compact_text_nodes, + create_default_ns, + ns_mapper + ); + if (status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK) { + xmlFreeNode(fragment); + return status; + } + *fragment_out = fragment; + return LEXBOR_LIBXML2_BRIDGE_STATUS_OK; +} + void lexbor_libxml2_bridge_report_errors( const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, @@ -376,12 +407,22 @@ void lexbor_libxml2_bridge_report_errors( *error_index_offset_tree = index; } +static php_libxml_quirks_mode dom_translate_quirks_mode(lxb_dom_document_cmode_t quirks_mode) +{ + switch (quirks_mode) { + case LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS: return PHP_LIBXML_NO_QUIRKS; + case LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS: return PHP_LIBXML_LIMITED_QUIRKS; + case LXB_DOM_DOCUMENT_CMODE_QUIRKS: return PHP_LIBXML_QUIRKS; + EMPTY_SWITCH_DEFAULT_CASE(); + } +} + void lexbor_libxml2_bridge_copy_observations(lxb_html_tree_t *tree, lexbor_libxml2_bridge_extracted_observations *observations) { observations->has_explicit_html_tag = tree->has_explicit_html_tag; observations->has_explicit_head_tag = tree->has_explicit_head_tag; observations->has_explicit_body_tag = tree->has_explicit_body_tag; - observations->quirks_mode = lxb_dom_interface_document(tree->document)->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS; + observations->quirks_mode = dom_translate_quirks_mode(lxb_dom_interface_document(tree->document)->compat_mode); } #endif /* HAVE_LIBXML && HAVE_DOM */ diff --git a/ext/dom/html5_parser.h b/ext/dom/html5_parser.h index 6b01a187605..86b4b106b97 100644 --- a/ext/dom/html5_parser.h +++ b/ext/dom/html5_parser.h @@ -47,7 +47,7 @@ typedef struct _lexbor_libxml2_bridge_extracted_observations { bool has_explicit_html_tag; bool has_explicit_head_tag; bool has_explicit_body_tag; - bool quirks_mode; + php_libxml_quirks_mode quirks_mode; } lexbor_libxml2_bridge_extracted_observations; typedef struct _lexbor_libxml2_bridge_parse_context { @@ -73,6 +73,14 @@ lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_document( bool create_default_ns, php_dom_libxml_ns_mapper *ns_mapper ); +lexbor_libxml2_bridge_status lexbor_libxml2_bridge_convert_fragment( + lxb_dom_node_t *start_node, + xmlDocPtr lxml_doc, + xmlNodePtr *fragment_out, + bool compact_text_nodes, + bool create_default_ns, + php_dom_libxml_ns_mapper *ns_mapper +); void lexbor_libxml2_bridge_report_errors( const lexbor_libxml2_bridge_parse_context *ctx, lxb_html_parser_t *parser, diff --git a/ext/dom/inner_html_mixin.c b/ext/dom/inner_html_mixin.c new file mode 100644 index 00000000000..6db71462d1a --- /dev/null +++ b/ext/dom/inner_html_mixin.c @@ -0,0 +1,357 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#if defined(HAVE_LIBXML) && defined(HAVE_DOM) +#include "php_dom.h" +#include "dom_properties.h" +#include "html5_parser.h" +#include "html5_serializer.h" +#include "xml_serializer.h" +#include "domexception.h" +#include +#include +#include +#include +#include + +/* Spec date: 2024-04-14 */ + +static zend_result dom_inner_html_write_string(void *application_data, const char *buf) +{ + smart_str *output = application_data; + smart_str_appends(output, buf); + return SUCCESS; +} + +static zend_result dom_inner_html_write_string_len(void *application_data, const char *buf, size_t len) +{ + smart_str *output = application_data; + smart_str_appendl(output, buf, len); + return SUCCESS; +} + +static int dom_write_smart_str(void *context, const char *buffer, int len) +{ + smart_str *str = context; + smart_str_appendl(str, buffer, len); + return len; +} + +/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin + * and https://w3c.github.io/DOM-Parsing/#dfn-fragment-serializing-algorithm */ +zend_result dom_element_inner_html_read(dom_object *obj, zval *retval) +{ + DOM_PROP_NODE(xmlNodePtr, node, obj); + + /* 1. Let context document be the value of node's node document. */ + const xmlDoc *context_document = node->doc; + + /* 2. If context document is an HTML document, return an HTML serialization of node. */ + if (context_document->type == XML_HTML_DOCUMENT_NODE) { + smart_str output = {0}; + dom_html5_serialize_context ctx; + ctx.application_data = &output; + ctx.write_string = dom_inner_html_write_string; + ctx.write_string_len = dom_inner_html_write_string_len; + dom_html5_serialize(&ctx, node); + ZVAL_STR(retval, smart_str_extract(&output)); + } + /* 3. Otherwise, context document is an XML document; return an XML serialization of node passing the flag require well-formed. */ + else { + ZEND_ASSERT(context_document->type == XML_DOCUMENT_NODE); + + int status = -1; + smart_str str = {0}; + /* No need to check buf's return value, as xmlSaveToBuffer() will fail instead. */ + xmlSaveCtxtPtr ctxt = xmlSaveToIO(dom_write_smart_str, NULL, &str, "UTF-8", XML_SAVE_AS_XML); + if (EXPECTED(ctxt != NULL)) { + xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler("UTF-8"); + xmlOutputBufferPtr out = xmlOutputBufferCreateIO(dom_write_smart_str, NULL, &str, handler); + if (EXPECTED(out != NULL)) { + /* Note: the innerHTML mixin sets the well-formed flag to true. */ + xmlNodePtr child = node->children; + status = 0; + while (child != NULL && status == 0) { + status = dom_xml_serialize(ctxt, out, child, false, true); + child = child->next; + } + status |= xmlOutputBufferFlush(out); + status |= xmlOutputBufferClose(out); + } + (void) xmlSaveClose(ctxt); + xmlCharEncCloseFunc(handler); + } + if (UNEXPECTED(status < 0)) { + smart_str_free_ex(&str, false); + php_dom_throw_error_with_message(SYNTAX_ERR, "The resulting XML serialization is not well-formed", true); + return FAILURE; + } + ZVAL_STR(retval, smart_str_extract(&str)); + } + + return SUCCESS; +} + +static lxb_dom_node_t *dom_html_fragment_lexbor_parse(lxb_html_document_t *document, lxb_dom_element_t *element, const zend_string *input) +{ + lxb_status_t status = lxb_html_document_parse_fragment_chunk_begin(document, element); + if (status != LXB_STATUS_OK) { + return NULL; + } + + const lxb_encoding_data_t *encoding_data = lxb_encoding_data(LXB_ENCODING_UTF_8); + lxb_encoding_decode_t decode; + lxb_encoding_decode_init_single(&decode, encoding_data); + + const lxb_char_t *buf_ref = (const lxb_char_t *) ZSTR_VAL(input); + if (ZSTR_IS_VALID_UTF8(input)) { + /* If we know the input is valid UTF-8, we don't have to perform checks and replace invalid sequences. */ + status = lxb_html_document_parse_fragment_chunk(document, buf_ref, ZSTR_LEN(input)); + if (UNEXPECTED(status != LXB_STATUS_OK)) { + return NULL; + } + } else { + /* See dom_decode_encode_fast_path(), simplified version for in-memory use-case. */ + const lxb_char_t *buf_end = buf_ref + ZSTR_LEN(input); + const lxb_char_t *last_output = buf_ref; + while (buf_ref < buf_end) { + if (decode.u.utf_8.need == 0 && *buf_ref < 0x80) { + buf_ref++; + continue; + } + + const lxb_char_t *buf_ref_backup = buf_ref; + lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single(&decode, &buf_ref, buf_end); + if (UNEXPECTED(codepoint > LXB_ENCODING_MAX_CODEPOINT)) { + status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref_backup - last_output); + if (UNEXPECTED(status != LXB_STATUS_OK)) { + return NULL; + } + + status = lxb_html_document_parse_fragment_chunk(document, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE); + if (UNEXPECTED(status != LXB_STATUS_OK)) { + return NULL; + } + + last_output = buf_ref; + } + } + + if (buf_ref != last_output) { + status = lxb_html_document_parse_fragment_chunk(document, last_output, buf_ref - last_output); + if (UNEXPECTED(status != LXB_STATUS_OK)) { + return NULL; + } + } + } + + return lxb_html_document_parse_fragment_chunk_end(document); +} + +static lxb_dom_document_cmode_t dom_translate_quirks_mode(php_libxml_quirks_mode quirks_mode) +{ + switch (quirks_mode) { + case PHP_LIBXML_NO_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_NO_QUIRKS; + case PHP_LIBXML_LIMITED_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_LIMITED_QUIRKS; + case PHP_LIBXML_QUIRKS: return LXB_DOM_DOCUMENT_CMODE_QUIRKS; + EMPTY_SWITCH_DEFAULT_CASE(); + } +} + +/* https://html.spec.whatwg.org/#html-fragment-parsing-algorithm */ +static xmlNodePtr dom_html_fragment_parsing_algorithm(dom_object *obj, xmlNodePtr context_node, const zend_string *input, php_libxml_quirks_mode quirks_mode) +{ + /* The whole algorithm is implemented in Lexbor, we just have to be the adapter between the + * data structures used in PHP and what Lexbor expects. */ + + lxb_html_document_t *document = lxb_html_document_create(); + document->dom_document.compat_mode = dom_translate_quirks_mode(quirks_mode); + lxb_dom_element_t *element = lxb_dom_element_interface_create(&document->dom_document); + + const lxb_tag_data_t *tag_data = lxb_tag_data_by_name(document->dom_document.tags, (lxb_char_t *) context_node->name, xmlStrlen(context_node->name)); + element->node.local_name = tag_data == NULL ? LXB_TAG__UNDEF : tag_data->tag_id; + + const lxb_char_t *ns_uri; + size_t ns_uri_len; + if (context_node->ns == NULL || context_node->ns->href == NULL) { + ns_uri = (lxb_char_t *) ""; + ns_uri_len = 0; + } else { + ns_uri = context_node->ns->href; + ns_uri_len = xmlStrlen(ns_uri); + } + const lxb_ns_data_t *ns_data = lxb_ns_data_by_link(document->dom_document.ns, ns_uri, ns_uri_len); + element->node.ns = ns_data == NULL ? LXB_NS__UNDEF : ns_data->ns_id; + + lxb_dom_node_t *node = dom_html_fragment_lexbor_parse(document, element, input); + xmlNodePtr fragment = NULL; + if (node != NULL) { + /* node->last_child could be NULL, but that is allowed. */ + lexbor_libxml2_bridge_status status = lexbor_libxml2_bridge_convert_fragment(node->last_child, context_node->doc, &fragment, true, true, php_dom_get_ns_mapper(obj)); + if (UNEXPECTED(status != LEXBOR_LIBXML2_BRIDGE_STATUS_OK)) { + php_dom_throw_error(INVALID_STATE_ERR, true); + } + } else { + php_dom_throw_error(INVALID_STATE_ERR, true); + } + + lxb_html_document_destroy(document); + + return fragment; +} + +static void dom_xml_parser_tag_name(const xmlNode *context_node, xmlParserCtxtPtr parser) +{ + if (context_node->ns != NULL && context_node->ns->prefix != NULL) { + xmlParseChunk(parser, (const char *) context_node->ns->prefix, xmlStrlen(context_node->ns->prefix), 0); + xmlParseChunk(parser, ":", 1, 0); + } + + xmlParseChunk(parser, (const char *) context_node->name, xmlStrlen(context_node->name), 0); +} + +static void dom_xml_fragment_parsing_algorithm_parse(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *context_node, const zend_string *input, xmlParserCtxtPtr parser) +{ + xmlParseChunk(parser, "<", 1, 0); + dom_xml_parser_tag_name(context_node, parser); + + /* Namespaces: we have to declare all in-scope namespaces including the default namespace */ + /* xmlns attributes */ + php_dom_in_scope_ns in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, context_node, true); + for (size_t i = 0; i < in_scope_ns.count; i++) { + const xmlNs *ns = in_scope_ns.list[i]; + xmlParseChunk(parser, " xmlns:", 7, 0); + ZEND_ASSERT(ns->prefix != NULL); + xmlParseChunk(parser, (const char *) ns->prefix, xmlStrlen(ns->prefix), 0); + xmlParseChunk(parser, "=\"", 2, 0); + xmlParseChunk(parser, (const char *) ns->href, xmlStrlen(ns->href), 0); + xmlParseChunk(parser, "\"", 1, 0); + } + php_dom_in_scope_ns_destroy(&in_scope_ns); + /* default namespace */ + const char *default_ns = dom_locate_a_namespace(context_node, NULL); + if (default_ns != NULL) { + xmlParseChunk(parser, " xmlns=\"", 8, 0); + xmlParseChunk(parser, default_ns, strlen(default_ns), 0); + xmlParseChunk(parser, "\"", 1, 0); + } + + xmlParseChunk(parser, ">", 1, 0); + + xmlParseChunk(parser, (const char *) ZSTR_VAL(input), ZSTR_LEN(input), 0); + + xmlParseChunk(parser, "", 1, 1); +} + +/* https://html.spec.whatwg.org/#xml-fragment-parsing-algorithm */ +static xmlNodePtr dom_xml_fragment_parsing_algorithm(dom_object *obj, const xmlNode *context_node, const zend_string *input) +{ + /* Steps 1-4 below */ + xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL); + if (UNEXPECTED(parser == NULL)) { + php_dom_throw_error(INVALID_STATE_ERR, true); + return NULL; + } + + /* This is not only good to avoid a performance cost of changing the tree, but also to work around an old bug + * in xmlSetTreeDoc(). */ + xmlDictFree(parser->dict); + if (context_node->doc->dict == NULL) { + context_node->doc->dict = xmlDictCreate(); + xmlDictSetLimit(context_node->doc->dict, XML_MAX_DICTIONARY_LIMIT); + } + parser->dict = context_node->doc->dict; + + php_libxml_sanitize_parse_ctxt_options(parser); + xmlCtxtUseOptions(parser, XML_PARSE_IGNORE_ENC | XML_PARSE_NOERROR | XML_PARSE_NOWARNING); + + xmlCharEncodingHandlerPtr encoding = xmlFindCharEncodingHandler("UTF-8"); + (void) xmlSwitchToEncoding(parser, encoding); + + php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(obj); + dom_xml_fragment_parsing_algorithm_parse(ns_mapper, context_node, input, parser); + + /* 5. If there is an XML well-formedness or XML namespace well-formedness error, then throw a "SyntaxError" DOMException. */ + if (!parser->wellFormed || !parser->nsWellFormed) { + parser->dict = NULL; + xmlFreeDoc(parser->myDoc); + xmlFreeParserCtxt(parser); + php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true); + return NULL; + } + + xmlDocPtr doc = parser->myDoc; + xmlFreeParserCtxt(parser); + + if (EXPECTED(doc != NULL)) { + doc->dict = NULL; + + /* 6. If the document element of the resulting Document has any sibling nodes, then throw a "SyntaxError" DOMException. */ + xmlNodePtr document_element = doc->children; + if (document_element == NULL || document_element->next != NULL) { + xmlFreeDoc(doc); + php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true); + return NULL; + } + + /* 7. Return the child nodes of the document element of the resulting Document, in tree order. */ + xmlNodePtr fragment = xmlNewDocFragment(context_node->doc); + if (EXPECTED(fragment != NULL)) { + xmlNodePtr child = document_element->children; + /* Yes, we have to call both xmlSetTreeDoc() prior to xmlAddChildList() + * because xmlAddChildList() _only_ sets the tree for the topmost elements in the subtree! */ + xmlSetTreeDoc(document_element, context_node->doc); + xmlAddChildList(fragment, child); + dom_mark_namespaces_as_attributes_too(ns_mapper, doc); + document_element->children = NULL; + document_element->last = NULL; + } + xmlFreeDoc(doc); + return fragment; + } + return NULL; +} + +/* https://w3c.github.io/DOM-Parsing/#the-innerhtml-mixin + * and https://w3c.github.io/DOM-Parsing/#dfn-fragment-parsing-algorithm */ +zend_result dom_element_inner_html_write(dom_object *obj, zval *newval) +{ + DOM_PROP_NODE(xmlNodePtr, context_node, obj); + + xmlNodePtr fragment; + if (context_node->doc->type == XML_DOCUMENT_NODE) { + fragment = dom_xml_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval)); + } else { + fragment = dom_html_fragment_parsing_algorithm(obj, context_node, Z_STR_P(newval), obj->document->quirks_mode); + } + + if (fragment == NULL) { + return FAILURE; + } + + /* We skip the steps involving the template element as context node since we don't do special handling for that. */ + dom_remove_all_children(context_node); + return php_dom_pre_insert(obj->document, fragment, context_node, NULL) ? SUCCESS : FAILURE; +} + +#endif diff --git a/ext/dom/namespace_compat.c b/ext/dom/namespace_compat.c index 232aa521415..69991c1427f 100644 --- a/ext/dom/namespace_compat.c +++ b/ext/dom/namespace_compat.c @@ -447,7 +447,7 @@ PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns zend_hash_destroy(&ctx.old_ns_to_new_ns_ptr); } -PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node) +PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements) { ZEND_ASSERT(node != NULL); @@ -464,7 +464,7 @@ PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_map for (const xmlNode *cur = node; cur != NULL; cur = cur->parent) { if (cur->type == XML_ELEMENT_NODE) { /* Register namespace of element */ - if (cur->ns != NULL && cur->ns->prefix != NULL) { + if (!ignore_elements && cur->ns != NULL && cur->ns->prefix != NULL) { const char *prefix = (const char *) cur->ns->prefix; zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), cur->ns); } diff --git a/ext/dom/namespace_compat.h b/ext/dom/namespace_compat.h index 1147c628922..1016bf1e5ee 100644 --- a/ext/dom/namespace_compat.h +++ b/ext/dom/namespace_compat.h @@ -70,7 +70,7 @@ typedef struct _php_dom_in_scope_ns { bool origin_is_ns_compat; } php_dom_in_scope_ns; -PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node); +PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements); PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node); PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns); diff --git a/ext/dom/node.c b/ext/dom/node.c index 0a63aa61eb9..ef669963de7 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -1881,7 +1881,7 @@ PHP_METHOD(Dom_Node, lookupPrefix) /* }}} end dom_node_lookup_prefix */ /* https://dom.spec.whatwg.org/#locate-a-namespace */ -static const char *dom_locate_a_namespace(xmlNodePtr node, const zend_string *prefix) +const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix) { /* switch on the interface node implements: */ if (node->type == XML_ELEMENT_NODE) { diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 7ebafd92072..c43f617c35c 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1039,6 +1039,7 @@ PHP_MINIT_FUNCTION(dom) DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "childElementCount", dom_parent_node_child_element_count, NULL); DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "previousElementSibling", dom_node_previous_element_sibling_read, NULL); DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "nextElementSibling", dom_node_next_element_sibling_read, NULL); + DOM_REGISTER_PROP_HANDLER(&dom_modern_element_prop_handlers, "innerHTML", dom_element_inner_html_read, dom_element_inner_html_write); zend_hash_merge(&dom_modern_element_prop_handlers, &dom_modern_node_prop_handlers, NULL, false); DOM_OVERWRITE_PROP_HANDLER(&dom_modern_element_prop_handlers, "textContent", dom_node_text_content_read, dom_node_text_content_write); zend_hash_add_new_ptr(&classes, dom_modern_element_class_entry->name, &dom_modern_element_prop_handlers); diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index ec4d3d92d5c..cf0667e0692 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -171,6 +171,8 @@ dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_ent xmlDocPtr php_dom_create_html_doc(void); xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference); void dom_set_xml_class(php_libxml_ref_obj *document); +const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix); +void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc); bool dom_compare_value(const xmlAttr *attr, const xmlChar *value); void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp); diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 08313bc7b8a..a2f1d8991dd 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -1380,6 +1380,8 @@ namespace Dom public function querySelectorAll(string $selectors): NodeList {} public function closest(string $selectors): ?Element {} public function matches(string $selectors): bool {} + + public string $innerHTML; } class HTMLElement extends Element diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 0284b73d959..f3f28bcf272 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 28365949d78a2d0254cfdb0da6549e282d2eb436 */ + * Stub hash: 9065d5c713a6fb879f8116821eaabc3a01a4db20 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) @@ -3121,6 +3121,12 @@ static zend_class_entry *register_class_Dom_Element(zend_class_entry *class_entr zend_declare_typed_property(class_entry, property_nextElementSibling_name, &property_nextElementSibling_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_CLASS(property_nextElementSibling_class_Dom_Element, 0, MAY_BE_NULL)); zend_string_release(property_nextElementSibling_name); + zval property_innerHTML_default_value; + ZVAL_UNDEF(&property_innerHTML_default_value); + zend_string *property_innerHTML_name = zend_string_init("innerHTML", sizeof("innerHTML") - 1, 1); + zend_declare_typed_property(class_entry, property_innerHTML_name, &property_innerHTML_default_value, ZEND_ACC_PUBLIC, NULL, (zend_type) ZEND_TYPE_INIT_MASK(MAY_BE_STRING)); + zend_string_release(property_innerHTML_name); + return class_entry; } diff --git a/ext/dom/tests/modern/html/parser/Element_innerHTML.phpt b/ext/dom/tests/modern/html/parser/Element_innerHTML.phpt new file mode 100644 index 00000000000..058d4432b49 --- /dev/null +++ b/ext/dom/tests/modern/html/parser/Element_innerHTML.phpt @@ -0,0 +1,65 @@ +--TEST-- +Test writing Element::$innerHTML on HTML documents +--EXTENSIONS-- +dom +--FILE-- +createElement('div'); +$dom->appendChild($el); +$el->innerHTML = '

foo

'; +echo $dom->saveXML(), "\n"; +$el->innerHTML = ''; +echo $dom->saveXML(), "\n"; +$el->innerHTML = ''; +echo $dom->saveXML(), "\n"; +$el->innerHTML = '
 

'; +echo $dom->saveXML(), "\n"; +$el->innerHTML = "invalid\xffutf-8๐ˆ๐ˆ๐ˆ"; +echo $dom->saveXML(), "\n"; + +// Create a non-interned string that gets the UTF-8 validity flag added +$str = str_repeat("my valid string", random_int(1, 1)); +preg_match('/^.*$/u', $str); +$el->innerHTML = $str; +echo $dom->saveXML(), "\n"; + +$dom = DOM\HTMLDocument::createEmpty(); +$el = $dom->createElement('style'); +$dom->appendChild($el); +$el->innerHTML = '

foo

'; +echo $dom->saveXML(), "\n"; + +$dom = DOM\HTMLDocument::createEmpty(); +$el = $dom->createElementNS('urn:a', 'style'); +$dom->appendChild($el); +$el->innerHTML = '

foo

'; +echo $dom->saveXML(), "\n"; + +$dom = DOM\HTMLDocument::createEmpty(); +$el = $dom->createElement('textarea'); +$dom->appendChild($el); +$el->innerHTML = "\0-->"; +echo $dom->saveXML(), "\n"; + +?> +--EXPECT-- + +

foo

+ +
+ +
+ +
ย 

+ +
invalid๏ฟฝutf-8๐ˆ๐ˆ๐ˆ
+ +
my valid string
+ + + + + + diff --git a/ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt b/ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt new file mode 100644 index 00000000000..637008e30b7 --- /dev/null +++ b/ext/dom/tests/modern/html/serializer/Element_innerHTML.phpt @@ -0,0 +1,21 @@ +--TEST-- +Test reading Element::$innerHTML on HTML documents +--EXTENSIONS-- +dom +--FILE-- +Test

Hello, World!

'); +var_dump($dom->getElementsByTagName('body')[0]->innerHTML); +var_dump($dom->getElementsByTagName('head')[0]->innerHTML); +var_dump($dom->getElementsByTagName('html')[0]->innerHTML); +var_dump($dom->getElementsByTagName('div')[0]->innerHTML); +var_dump($dom->getElementsByTagName('p')[0]->innerHTML); + +?> +--EXPECT-- +string(31) "

Hello, World!

" +string(19) "Test" +string(76) "Test

Hello, World!

" +string(0) "" +string(13) "Hello, World!" diff --git a/ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt b/ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt new file mode 100644 index 00000000000..b096fc2c6cc --- /dev/null +++ b/ext/dom/tests/modern/xml/Element_innerHTML_reading.phpt @@ -0,0 +1,65 @@ +--TEST-- +Test reading Element::$innerHTML on XML documents +--EXTENSIONS-- +dom +--FILE-- +createElement("container"); + return $element; +} + +$container = createContainer(); +$container->append("Hello, world!"); +var_dump($container->innerHTML); + +$container = createContainer(); +$container->append($dom->createComment("This is -a- comment")); +var_dump($container->innerHTML); + +$container = createContainer(); +// Note: intentionally typo'd to check whether the string matching against "xml" happens correctly +// i.e. no bugs with prefix-matching only. +$container->append($dom->createProcessingInstruction("xmll", "")); +var_dump($container->innerHTML); + +$container = createContainer(); +$container->append($dom->createProcessingInstruction("almostmalformed", ">?")); +var_dump($container->innerHTML); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'http://example.com/'); +var_dump($container->innerHTML); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttributeNS('urn:a', 'name', ''); +$element->setAttributeNS('urn:b', 'name', ''); +var_dump($container->innerHTML); + +$dom = DOM\XMLDocument::createFromFile(__DIR__ . '/../../book.xml'); +var_dump($dom->documentElement->innerHTML); + +?> +--EXPECT-- +string(13) "Hello, world!" +string(26) "" +string(9) "" +string(22) "??>" +string(12) "" +string(72) "" +string(167) " + + The Grapes of Wrath + John Steinbeck + + + The Pearl + John Steinbeck + +" diff --git a/ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt b/ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt new file mode 100644 index 00000000000..04b9971186c --- /dev/null +++ b/ext/dom/tests/modern/xml/Element_innerHTML_reading_errors.phpt @@ -0,0 +1,108 @@ +--TEST-- +Test reading Element::$innerHTML on XML documents - error cases +--EXTENSIONS-- +dom +--FILE-- +createElement("container"); + return $element; +} + +function test($container) { + try { + var_dump($container->innerHTML); + } catch (DOMException $e) { + echo $e->getMessage(), "\n"; + } +} + +$container = createContainer(); +$container->append("Hello, \x01 world!"); +test($container); + +$container = createContainer(); +$container->append($dom->createComment('Hello -- world')); +test($container); + +$container = createContainer(); +$container->append($dom->createComment('Hello world-')); +test($container); + +$container = createContainer(); +$container->append($dom->createComment('Hello world-')); +test($container); + +$container = createContainer(); +$container->append($dom->createComment("\x01")); +test($container); + +$container = createContainer(); +$legacy = new DOMDocument; +$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo:bar', '?>'))); +test($container); + +$container = createContainer(); +$legacy = new DOMDocument; +$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', '?>'))); +test($container); + +$container = createContainer(); +$legacy = new DOMDocument; +$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('xml', ''))); +test($container); + +$container = createContainer(); +$legacy = new DOMDocument; +$container->append($dom->importLegacyNode($legacy->createProcessingInstruction('foo', "\x01"))); +test($container); + +$container = createContainer(); +$container->append($dom->createElement("with:colon")); +test($container); + +$container = createContainer(); +$container->append($dom->createElementNS("http://www.w3.org/2000/xmlns/", "xmlns:colon")); +test($container); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", "http://www.w3.org/2000/xmlns/"); +test($container); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:x", ""); +test($container); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttribute("with:colon", "value"); +test($container); + +$container = createContainer(); +$element = $container->appendChild(createContainer()); +$element->setAttribute("xmlns", "value"); +test($container); + +?> +--EXPECT-- +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed +The resulting XML serialization is not well-formed diff --git a/ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt b/ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt new file mode 100644 index 00000000000..f820b42c7de --- /dev/null +++ b/ext/dom/tests/modern/xml/Element_innerHTML_writing.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test writing Element::$innerHTML on XML documents +--EXTENSIONS-- +dom +--FILE-- +createElementNS('urn:a', 'root'); +$dom->appendChild($el); +$el->innerHTML = '

foo

bar

'; +echo $dom->saveXML(), "\n"; +$el->innerHTML = ''; +echo $dom->saveXML(), "\n"; +$el->innerHTML = '&'; +echo $dom->saveXML(), "\n"; +$el->innerHTML = '<foo>'; +echo $dom->saveXML(), "\n"; + +echo "----------------\n"; + +$dom = DOM\XMLDocument::createFromString(''); +$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child')); +$child->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns', 'urn:b'); +$child->innerHTML = ''; +echo $dom->saveXML(), "\n"; +var_dump($child->namespaceURI); +var_dump($child->firstChild->namespaceURI); + +echo "----------------\n"; + +$dom = DOM\XMLDocument::createFromString(<< + + + + + + + + +XML); +$dom->documentElement->innerHTML = $dom->documentElement->innerHTML; +echo $dom->saveXML(), "\n"; + +echo "----------------\n"; +$dom->documentElement->innerHTML = << + + + +XML; +echo $dom->saveXML(), "\n"; + +?> +--EXPECT-- + +

foo

bar

+ + + +& + +<foo> +---------------- + + +string(5) "urn:a" +string(5) "urn:a" +---------------- + + + + + + + + + + +---------------- + + + + + diff --git a/ext/dom/tests/modern/xml/Element_innerHTML_writing_errors.phpt b/ext/dom/tests/modern/xml/Element_innerHTML_writing_errors.phpt new file mode 100644 index 00000000000..0316ac88439 --- /dev/null +++ b/ext/dom/tests/modern/xml/Element_innerHTML_writing_errors.phpt @@ -0,0 +1,47 @@ +--TEST-- +Test writing Element::$innerHTML on XML documents - error cases +--EXTENSIONS-- +dom +--FILE-- + +]> + +XML); +$child = $dom->documentElement->appendChild($dom->createElementNS('urn:a', 'child')); +$original = $dom->saveXML(); + +function test($child, $html) { + global $dom, $original; + try { + $child->innerHTML = $html; + } catch (DOMException $e) { + echo $e->getMessage(), "\n"; + } + var_dump($dom->saveXML() === $original); +} + +test($child, '&foo;'); +test($child, ''); +test($child, '