1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Fix GH-20439: xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback (#20453)

We would need to escape the attributes, but there's no builtin method
that we can call in libxml2 to do so in a way consistent with the
attribute escape rules and expat.
In fact, expat just repeats the input, while we reconstruct it.
To fix the issue, and fix consistency with expat, we repeat the input as
well. This works by seeking to the start and end of the tag and passing
it to the default handler. This is fine for the parser because the
parser used in ext/xml is always in non-progressive mode, so we have
access to the entire input buffer.
This commit is contained in:
Niels Dossche
2025-11-11 22:47:15 +01:00
committed by GitHub
parent 33a2acba44
commit 3cc36b0b5e
4 changed files with 74 additions and 82 deletions

4
NEWS
View File

@@ -25,6 +25,10 @@ PHP NEWS
- Tidy:
. Fixed bug GH-20374 (PHP with tidy and custom-tags). (ndossche)
- XML:
. Fixed bug GH-20439 (xml_set_default_handler() does not properly handle
special characters in attributes when passing data to callback). (ndossche)
20 Nov 2025, PHP 8.3.28
- Core:

View File

@@ -45,6 +45,28 @@ _qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, x
}
}
static void start_element_emit_default(XML_Parser parser)
{
if (parser->h_default) {
/* Grammar does not allow embedded '<' and '>' in elements, so we can seek to the start and end positions.
* Since the parser in the current mode mode is non-progressive, it contains the entire input. */
const xmlChar *cur = parser->parser->input->cur;
const xmlChar *end = cur;
for (const xmlChar *base = parser->parser->input->base; cur > base && *cur != '<'; cur--);
if (*end == '/') {
/* BC: Keep split between start & end element.
* TODO: In the future this could be aligned with expat and only emit a start event, or vice versa.
* See gh20439_2.phpt */
xmlChar *tmp = BAD_CAST estrndup((const char *) cur, end - cur + 1);
tmp[end - cur] = '>';
parser->h_default(parser->user, tmp, end - cur + 1);
efree(tmp);
} else {
parser->h_default(parser->user, cur, end - cur + 1);
}
}
}
static void
_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes)
{
@@ -52,29 +74,7 @@ _start_element_handler(void *user, const xmlChar *name, const xmlChar **attribut
xmlChar *qualified_name = NULL;
if (parser->h_start_element == NULL) {
if (parser->h_default) {
int attno = 0;
qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
if (attributes) {
while (attributes[attno] != NULL) {
int att_len;
char *att_string, *att_name, *att_value;
att_name = (char *)attributes[attno++];
att_value = (char *)attributes[attno++];
att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value);
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
efree(att_string);
}
}
qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
xmlFree(qualified_name);
}
start_element_emit_default(parser);
return;
}
@@ -104,65 +104,7 @@ _start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix
}
if (parser->h_start_element == NULL) {
if (parser->h_default) {
if (prefix) {
qualified_name = xmlStrncatNew((xmlChar *)"<", prefix, xmlStrlen(prefix));
qualified_name = xmlStrncat(qualified_name, (xmlChar *)":", 1);
qualified_name = xmlStrncat(qualified_name, name, xmlStrlen(name));
} else {
qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
}
if (namespaces) {
int i, j;
for (i = 0,j = 0;j < nb_namespaces;j++) {
int ns_len;
char *ns_string, *ns_prefix, *ns_url;
ns_prefix = (char *) namespaces[i++];
ns_url = (char *) namespaces[i++];
if (ns_prefix) {
ns_len = spprintf(&ns_string, 0, " xmlns:%s=\"%s\"", ns_prefix, ns_url);
} else {
ns_len = spprintf(&ns_string, 0, " xmlns=\"%s\"", ns_url);
}
qualified_name = xmlStrncat(qualified_name, (xmlChar *)ns_string, ns_len);
efree(ns_string);
}
}
if (attributes) {
for (i = 0; i < nb_attributes; i += 1) {
int att_len;
char *att_string, *att_name, *att_value, *att_prefix, *att_valueend;
att_name = (char *) attributes[y++];
att_prefix = (char *)attributes[y++];
y++;
att_value = (char *)attributes[y++];
att_valueend = (char *)attributes[y++];
if (att_prefix) {
att_len = spprintf(&att_string, 0, " %s:%s=\"", att_prefix, att_name);
} else {
att_len = spprintf(&att_string, 0, " %s=\"", att_name);
}
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_value, att_valueend - att_value);
qualified_name = xmlStrncat(qualified_name, (xmlChar *)"\"", 1);
efree(att_string);
}
}
qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
xmlFree(qualified_name);
}
start_element_emit_default(parser);
return;
}
_qualify_namespace(parser, name, URI, &qualified_name);

View File

@@ -0,0 +1,24 @@
--TEST--
GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback)
--EXTENSIONS--
xml
--FILE--
<?php
$x = xml_parser_create_ns('utf-8');
xml_set_default_handler($x, function( $_parser, $data ) { var_dump($data); });
$input = "<!-- xxx --><foo attr1='\"&lt;&quot;&#9;&#x0A;&#x0D;&#13;𐍅' attr2=\"&quot;&lt;\"></foo>";
$inputs = str_split($input);
// Test chunked parser wrt non-progressive parser
foreach ($inputs as $input) {
xml_parse($x, $input, false);
}
xml_parse($x, "", true);
?>
--EXPECT--
string(12) "<!-- xxx -->"
string(71) "<foo attr1='"&lt;&quot;&#9;&#x0A;&#x0D;&#13;𐍅' attr2="&quot;&lt;">"
string(6) "</foo>"

View File

@@ -0,0 +1,22 @@
--TEST--
GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) - closing solidus variant
--EXTENSIONS--
xml
--SKIPIF--
<?php
require __DIR__ . '/libxml_expat_skipif.inc';
skipif(want_expat: false);
?>
--FILE--
<?php
$x = xml_parser_create_ns('utf-8');
xml_set_default_handler($x, function( $_parser, $data ) { var_dump($data); });
$input = "<ns:test xmlns:ns='urn:x' />";
xml_parse($x, $input, true);
?>
--EXPECT--
string(29) "<ns:test xmlns:ns='urn:x' >"
string(10) "</ns:test>"