From 3cc36b0b5e76da23846452c04f3883a998a02836 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+ndossche@users.noreply.github.com> Date: Tue, 11 Nov 2025 22:47:15 +0100 Subject: [PATCH] Fix GH-20439: xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback (#20453) We would need to escape the attributes, but there's no builtin method that we can call in libxml2 to do so in a way consistent with the attribute escape rules and expat. In fact, expat just repeats the input, while we reconstruct it. To fix the issue, and fix consistency with expat, we repeat the input as well. This works by seeking to the start and end of the tag and passing it to the default handler. This is fine for the parser because the parser used in ext/xml is always in non-progressive mode, so we have access to the entire input buffer. --- NEWS | 4 ++ ext/xml/compat.c | 106 ++++++++--------------------------- ext/xml/tests/gh20439_1.phpt | 24 ++++++++ ext/xml/tests/gh20439_2.phpt | 22 ++++++++ 4 files changed, 74 insertions(+), 82 deletions(-) create mode 100644 ext/xml/tests/gh20439_1.phpt create mode 100644 ext/xml/tests/gh20439_2.phpt diff --git a/NEWS b/NEWS index 28263cc2c0d..7d0fc8ce630 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,10 @@ PHP NEWS - Tidy: . Fixed bug GH-20374 (PHP with tidy and custom-tags). (ndossche) +- XML: + . Fixed bug GH-20439 (xml_set_default_handler() does not properly handle + special characters in attributes when passing data to callback). (ndossche) + 20 Nov 2025, PHP 8.3.28 - Core: diff --git a/ext/xml/compat.c b/ext/xml/compat.c index 3de77d0723e..25add45f034 100644 --- a/ext/xml/compat.c +++ b/ext/xml/compat.c @@ -45,6 +45,28 @@ _qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, x } } +static void start_element_emit_default(XML_Parser parser) +{ + if (parser->h_default) { + /* Grammar does not allow embedded '<' and '>' in elements, so we can seek to the start and end positions. + * Since the parser in the current mode mode is non-progressive, it contains the entire input. */ + const xmlChar *cur = parser->parser->input->cur; + const xmlChar *end = cur; + for (const xmlChar *base = parser->parser->input->base; cur > base && *cur != '<'; cur--); + if (*end == '/') { + /* BC: Keep split between start & end element. + * TODO: In the future this could be aligned with expat and only emit a start event, or vice versa. + * See gh20439_2.phpt */ + xmlChar *tmp = BAD_CAST estrndup((const char *) cur, end - cur + 1); + tmp[end - cur] = '>'; + parser->h_default(parser->user, tmp, end - cur + 1); + efree(tmp); + } else { + parser->h_default(parser->user, cur, end - cur + 1); + } + } +} + static void _start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes) { @@ -52,29 +74,7 @@ _start_element_handler(void *user, const xmlChar *name, const xmlChar **attribut xmlChar *qualified_name = NULL; if (parser->h_start_element == NULL) { - if (parser->h_default) { - int attno = 0; - - qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name)); - if (attributes) { - while (attributes[attno] != NULL) { - int att_len; - char *att_string, *att_name, *att_value; - - att_name = (char *)attributes[attno++]; - att_value = (char *)attributes[attno++]; - - att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value); - - qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len); - efree(att_string); - } - - } - qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1); - parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name)); - xmlFree(qualified_name); - } + start_element_emit_default(parser); return; } @@ -104,65 +104,7 @@ _start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix } if (parser->h_start_element == NULL) { - if (parser->h_default) { - - if (prefix) { - qualified_name = xmlStrncatNew((xmlChar *)"<", prefix, xmlStrlen(prefix)); - qualified_name = xmlStrncat(qualified_name, (xmlChar *)":", 1); - qualified_name = xmlStrncat(qualified_name, name, xmlStrlen(name)); - } else { - qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name)); - } - - if (namespaces) { - int i, j; - for (i = 0,j = 0;j < nb_namespaces;j++) { - int ns_len; - char *ns_string, *ns_prefix, *ns_url; - - ns_prefix = (char *) namespaces[i++]; - ns_url = (char *) namespaces[i++]; - - if (ns_prefix) { - ns_len = spprintf(&ns_string, 0, " xmlns:%s=\"%s\"", ns_prefix, ns_url); - } else { - ns_len = spprintf(&ns_string, 0, " xmlns=\"%s\"", ns_url); - } - qualified_name = xmlStrncat(qualified_name, (xmlChar *)ns_string, ns_len); - - efree(ns_string); - } - } - - if (attributes) { - for (i = 0; i < nb_attributes; i += 1) { - int att_len; - char *att_string, *att_name, *att_value, *att_prefix, *att_valueend; - - att_name = (char *) attributes[y++]; - att_prefix = (char *)attributes[y++]; - y++; - att_value = (char *)attributes[y++]; - att_valueend = (char *)attributes[y++]; - - if (att_prefix) { - att_len = spprintf(&att_string, 0, " %s:%s=\"", att_prefix, att_name); - } else { - att_len = spprintf(&att_string, 0, " %s=\"", att_name); - } - - qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len); - qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_value, att_valueend - att_value); - qualified_name = xmlStrncat(qualified_name, (xmlChar *)"\"", 1); - - efree(att_string); - } - - } - qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1); - parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name)); - xmlFree(qualified_name); - } + start_element_emit_default(parser); return; } _qualify_namespace(parser, name, URI, &qualified_name); diff --git a/ext/xml/tests/gh20439_1.phpt b/ext/xml/tests/gh20439_1.phpt new file mode 100644 index 00000000000..cda6803e9d0 --- /dev/null +++ b/ext/xml/tests/gh20439_1.phpt @@ -0,0 +1,24 @@ +--TEST-- +GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) +--EXTENSIONS-- +xml +--FILE-- +"; +$inputs = str_split($input); + +// Test chunked parser wrt non-progressive parser +foreach ($inputs as $input) { + xml_parse($x, $input, false); +} +xml_parse($x, "", true); + +?> +--EXPECT-- +string(12) "" +string(71) "" +string(6) "" diff --git a/ext/xml/tests/gh20439_2.phpt b/ext/xml/tests/gh20439_2.phpt new file mode 100644 index 00000000000..dce4f5976a1 --- /dev/null +++ b/ext/xml/tests/gh20439_2.phpt @@ -0,0 +1,22 @@ +--TEST-- +GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) - closing solidus variant +--EXTENSIONS-- +xml +--SKIPIF-- + +--FILE-- +"; +xml_parse($x, $input, true); + +?> +--EXPECT-- +string(29) "" +string(10) ""