mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Fix GH-20439: xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback (#20453)
We would need to escape the attributes, but there's no builtin method that we can call in libxml2 to do so in a way consistent with the attribute escape rules and expat. In fact, expat just repeats the input, while we reconstruct it. To fix the issue, and fix consistency with expat, we repeat the input as well. This works by seeking to the start and end of the tag and passing it to the default handler. This is fine for the parser because the parser used in ext/xml is always in non-progressive mode, so we have access to the entire input buffer.
This commit is contained in:
4
NEWS
4
NEWS
@@ -25,6 +25,10 @@ PHP NEWS
|
||||
- Tidy:
|
||||
. Fixed bug GH-20374 (PHP with tidy and custom-tags). (ndossche)
|
||||
|
||||
- XML:
|
||||
. Fixed bug GH-20439 (xml_set_default_handler() does not properly handle
|
||||
special characters in attributes when passing data to callback). (ndossche)
|
||||
|
||||
20 Nov 2025, PHP 8.3.28
|
||||
|
||||
- Core:
|
||||
|
||||
106
ext/xml/compat.c
106
ext/xml/compat.c
@@ -45,6 +45,28 @@ _qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, x
|
||||
}
|
||||
}
|
||||
|
||||
static void start_element_emit_default(XML_Parser parser)
|
||||
{
|
||||
if (parser->h_default) {
|
||||
/* Grammar does not allow embedded '<' and '>' in elements, so we can seek to the start and end positions.
|
||||
* Since the parser in the current mode mode is non-progressive, it contains the entire input. */
|
||||
const xmlChar *cur = parser->parser->input->cur;
|
||||
const xmlChar *end = cur;
|
||||
for (const xmlChar *base = parser->parser->input->base; cur > base && *cur != '<'; cur--);
|
||||
if (*end == '/') {
|
||||
/* BC: Keep split between start & end element.
|
||||
* TODO: In the future this could be aligned with expat and only emit a start event, or vice versa.
|
||||
* See gh20439_2.phpt */
|
||||
xmlChar *tmp = BAD_CAST estrndup((const char *) cur, end - cur + 1);
|
||||
tmp[end - cur] = '>';
|
||||
parser->h_default(parser->user, tmp, end - cur + 1);
|
||||
efree(tmp);
|
||||
} else {
|
||||
parser->h_default(parser->user, cur, end - cur + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes)
|
||||
{
|
||||
@@ -52,29 +74,7 @@ _start_element_handler(void *user, const xmlChar *name, const xmlChar **attribut
|
||||
xmlChar *qualified_name = NULL;
|
||||
|
||||
if (parser->h_start_element == NULL) {
|
||||
if (parser->h_default) {
|
||||
int attno = 0;
|
||||
|
||||
qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
|
||||
if (attributes) {
|
||||
while (attributes[attno] != NULL) {
|
||||
int att_len;
|
||||
char *att_string, *att_name, *att_value;
|
||||
|
||||
att_name = (char *)attributes[attno++];
|
||||
att_value = (char *)attributes[attno++];
|
||||
|
||||
att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value);
|
||||
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
|
||||
efree(att_string);
|
||||
}
|
||||
|
||||
}
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
|
||||
parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
|
||||
xmlFree(qualified_name);
|
||||
}
|
||||
start_element_emit_default(parser);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -104,65 +104,7 @@ _start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix
|
||||
}
|
||||
|
||||
if (parser->h_start_element == NULL) {
|
||||
if (parser->h_default) {
|
||||
|
||||
if (prefix) {
|
||||
qualified_name = xmlStrncatNew((xmlChar *)"<", prefix, xmlStrlen(prefix));
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)":", 1);
|
||||
qualified_name = xmlStrncat(qualified_name, name, xmlStrlen(name));
|
||||
} else {
|
||||
qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
|
||||
}
|
||||
|
||||
if (namespaces) {
|
||||
int i, j;
|
||||
for (i = 0,j = 0;j < nb_namespaces;j++) {
|
||||
int ns_len;
|
||||
char *ns_string, *ns_prefix, *ns_url;
|
||||
|
||||
ns_prefix = (char *) namespaces[i++];
|
||||
ns_url = (char *) namespaces[i++];
|
||||
|
||||
if (ns_prefix) {
|
||||
ns_len = spprintf(&ns_string, 0, " xmlns:%s=\"%s\"", ns_prefix, ns_url);
|
||||
} else {
|
||||
ns_len = spprintf(&ns_string, 0, " xmlns=\"%s\"", ns_url);
|
||||
}
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)ns_string, ns_len);
|
||||
|
||||
efree(ns_string);
|
||||
}
|
||||
}
|
||||
|
||||
if (attributes) {
|
||||
for (i = 0; i < nb_attributes; i += 1) {
|
||||
int att_len;
|
||||
char *att_string, *att_name, *att_value, *att_prefix, *att_valueend;
|
||||
|
||||
att_name = (char *) attributes[y++];
|
||||
att_prefix = (char *)attributes[y++];
|
||||
y++;
|
||||
att_value = (char *)attributes[y++];
|
||||
att_valueend = (char *)attributes[y++];
|
||||
|
||||
if (att_prefix) {
|
||||
att_len = spprintf(&att_string, 0, " %s:%s=\"", att_prefix, att_name);
|
||||
} else {
|
||||
att_len = spprintf(&att_string, 0, " %s=\"", att_name);
|
||||
}
|
||||
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_value, att_valueend - att_value);
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)"\"", 1);
|
||||
|
||||
efree(att_string);
|
||||
}
|
||||
|
||||
}
|
||||
qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
|
||||
parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
|
||||
xmlFree(qualified_name);
|
||||
}
|
||||
start_element_emit_default(parser);
|
||||
return;
|
||||
}
|
||||
_qualify_namespace(parser, name, URI, &qualified_name);
|
||||
|
||||
24
ext/xml/tests/gh20439_1.phpt
Normal file
24
ext/xml/tests/gh20439_1.phpt
Normal file
@@ -0,0 +1,24 @@
|
||||
--TEST--
|
||||
GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback)
|
||||
--EXTENSIONS--
|
||||
xml
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$x = xml_parser_create_ns('utf-8');
|
||||
xml_set_default_handler($x, function( $_parser, $data ) { var_dump($data); });
|
||||
|
||||
$input = "<!-- xxx --><foo attr1='\"<"	

 𐍅' attr2=\""<\"></foo>";
|
||||
$inputs = str_split($input);
|
||||
|
||||
// Test chunked parser wrt non-progressive parser
|
||||
foreach ($inputs as $input) {
|
||||
xml_parse($x, $input, false);
|
||||
}
|
||||
xml_parse($x, "", true);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
string(12) "<!-- xxx -->"
|
||||
string(71) "<foo attr1='"<"	

 𐍅' attr2=""<">"
|
||||
string(6) "</foo>"
|
||||
22
ext/xml/tests/gh20439_2.phpt
Normal file
22
ext/xml/tests/gh20439_2.phpt
Normal file
@@ -0,0 +1,22 @@
|
||||
--TEST--
|
||||
GH-20439 (xml_set_default_handler() does not properly handle special characters in attributes when passing data to callback) - closing solidus variant
|
||||
--EXTENSIONS--
|
||||
xml
|
||||
--SKIPIF--
|
||||
<?php
|
||||
require __DIR__ . '/libxml_expat_skipif.inc';
|
||||
skipif(want_expat: false);
|
||||
?>
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$x = xml_parser_create_ns('utf-8');
|
||||
xml_set_default_handler($x, function( $_parser, $data ) { var_dump($data); });
|
||||
|
||||
$input = "<ns:test xmlns:ns='urn:x' />";
|
||||
xml_parse($x, $input, true);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
string(29) "<ns:test xmlns:ns='urn:x' >"
|
||||
string(10) "</ns:test>"
|
||||
Reference in New Issue
Block a user