From 3cc36b0b5e76da23846452c04f3883a998a02836 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+ndossche@users.noreply.github.com>
Date: Tue, 11 Nov 2025 22:47:15 +0100
Subject: [PATCH] Fix GH-20439: xml_set_default_handler() does not properly
handle special characters in attributes when passing data to callback
(#20453)
We would need to escape the attributes, but there's no builtin method
that we can call in libxml2 to do so in a way consistent with the
attribute escape rules and expat.
In fact, expat just repeats the input, while we reconstruct it.
To fix the issue, and fix consistency with expat, we repeat the input as
well. This works by seeking to the start and end of the tag and passing
it to the default handler. This is fine for the parser because the
parser used in ext/xml is always in non-progressive mode, so we have
access to the entire input buffer.
---
NEWS | 4 ++
ext/xml/compat.c | 106 ++++++++---------------------------
ext/xml/tests/gh20439_1.phpt | 24 ++++++++
ext/xml/tests/gh20439_2.phpt | 22 ++++++++
4 files changed, 74 insertions(+), 82 deletions(-)
create mode 100644 ext/xml/tests/gh20439_1.phpt
create mode 100644 ext/xml/tests/gh20439_2.phpt
diff --git a/NEWS b/NEWS
index 28263cc2c0d..7d0fc8ce630 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,10 @@ PHP NEWS
- Tidy:
. Fixed bug GH-20374 (PHP with tidy and custom-tags). (ndossche)
+- XML:
+ . Fixed bug GH-20439 (xml_set_default_handler() does not properly handle
+ special characters in attributes when passing data to callback). (ndossche)
+
20 Nov 2025, PHP 8.3.28
- Core:
diff --git a/ext/xml/compat.c b/ext/xml/compat.c
index 3de77d0723e..25add45f034 100644
--- a/ext/xml/compat.c
+++ b/ext/xml/compat.c
@@ -45,6 +45,28 @@ _qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, x
}
}
+static void start_element_emit_default(XML_Parser parser)
+{
+ if (parser->h_default) {
+ /* Grammar does not allow embedded '<' and '>' in elements, so we can seek to the start and end positions.
+ * Since the parser in the current mode mode is non-progressive, it contains the entire input. */
+ const xmlChar *cur = parser->parser->input->cur;
+ const xmlChar *end = cur;
+ for (const xmlChar *base = parser->parser->input->base; cur > base && *cur != '<'; cur--);
+ if (*end == '/') {
+ /* BC: Keep split between start & end element.
+ * TODO: In the future this could be aligned with expat and only emit a start event, or vice versa.
+ * See gh20439_2.phpt */
+ xmlChar *tmp = BAD_CAST estrndup((const char *) cur, end - cur + 1);
+ tmp[end - cur] = '>';
+ parser->h_default(parser->user, tmp, end - cur + 1);
+ efree(tmp);
+ } else {
+ parser->h_default(parser->user, cur, end - cur + 1);
+ }
+ }
+}
+
static void
_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes)
{
@@ -52,29 +74,7 @@ _start_element_handler(void *user, const xmlChar *name, const xmlChar **attribut
xmlChar *qualified_name = NULL;
if (parser->h_start_element == NULL) {
- if (parser->h_default) {
- int attno = 0;
-
- qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
- if (attributes) {
- while (attributes[attno] != NULL) {
- int att_len;
- char *att_string, *att_name, *att_value;
-
- att_name = (char *)attributes[attno++];
- att_value = (char *)attributes[attno++];
-
- att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value);
-
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
- efree(att_string);
- }
-
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
- parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
- xmlFree(qualified_name);
- }
+ start_element_emit_default(parser);
return;
}
@@ -104,65 +104,7 @@ _start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix
}
if (parser->h_start_element == NULL) {
- if (parser->h_default) {
-
- if (prefix) {
- qualified_name = xmlStrncatNew((xmlChar *)"<", prefix, xmlStrlen(prefix));
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)":", 1);
- qualified_name = xmlStrncat(qualified_name, name, xmlStrlen(name));
- } else {
- qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
- }
-
- if (namespaces) {
- int i, j;
- for (i = 0,j = 0;j < nb_namespaces;j++) {
- int ns_len;
- char *ns_string, *ns_prefix, *ns_url;
-
- ns_prefix = (char *) namespaces[i++];
- ns_url = (char *) namespaces[i++];
-
- if (ns_prefix) {
- ns_len = spprintf(&ns_string, 0, " xmlns:%s=\"%s\"", ns_prefix, ns_url);
- } else {
- ns_len = spprintf(&ns_string, 0, " xmlns=\"%s\"", ns_url);
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)ns_string, ns_len);
-
- efree(ns_string);
- }
- }
-
- if (attributes) {
- for (i = 0; i < nb_attributes; i += 1) {
- int att_len;
- char *att_string, *att_name, *att_value, *att_prefix, *att_valueend;
-
- att_name = (char *) attributes[y++];
- att_prefix = (char *)attributes[y++];
- y++;
- att_value = (char *)attributes[y++];
- att_valueend = (char *)attributes[y++];
-
- if (att_prefix) {
- att_len = spprintf(&att_string, 0, " %s:%s=\"", att_prefix, att_name);
- } else {
- att_len = spprintf(&att_string, 0, " %s=\"", att_name);
- }
-
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_value, att_valueend - att_value);
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)"\"", 1);
-
- efree(att_string);
- }
-
- }
- qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
- parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
- xmlFree(qualified_name);
- }
+ start_element_emit_default(parser);
return;
}
_qualify_namespace(parser, name, URI, &qualified_name);
diff --git a/ext/xml/tests/gh20439_1.phpt b/ext/xml/tests/gh20439_1.phpt
new file mode 100644
index 00000000000..cda6803e9d0
--- /dev/null
+++ b/ext/xml/tests/gh20439_1.phpt
@@ -0,0 +1,24 @@
+--TEST--
+GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback)
+--EXTENSIONS--
+xml
+--FILE--
+";
+$inputs = str_split($input);
+
+// Test chunked parser wrt non-progressive parser
+foreach ($inputs as $input) {
+ xml_parse($x, $input, false);
+}
+xml_parse($x, "", true);
+
+?>
+--EXPECT--
+string(12) ""
+string(71) ""
+string(6) ""
diff --git a/ext/xml/tests/gh20439_2.phpt b/ext/xml/tests/gh20439_2.phpt
new file mode 100644
index 00000000000..dce4f5976a1
--- /dev/null
+++ b/ext/xml/tests/gh20439_2.phpt
@@ -0,0 +1,22 @@
+--TEST--
+GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) - closing solidus variant
+--EXTENSIONS--
+xml
+--SKIPIF--
+
+--FILE--
+";
+xml_parse($x, $input, true);
+
+?>
+--EXPECT--
+string(29) ""
+string(10) ""