1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Implement GH-18550: Implement getElementsByClassName() (#19108)

Spec: https://dom.spec.whatwg.org/#ref-for-dom-element-getelementsbyclassname
This commit is contained in:
Niels Dossche
2025-07-14 21:53:37 +02:00
committed by GitHub
parent 114fc16266
commit 9121b015c1
12 changed files with 364 additions and 4 deletions

1
NEWS
View File

@@ -8,6 +8,7 @@ PHP NEWS
- DOM:
. Make cloning DOM node lists, maps, and collections fail. (nielsdos)
. Added Dom\Element::getElementsByClassName(). (nielsdos)
- PDO_ODBC
. Fetch larger block sizes and better handle SQL_NO_TOTAL when calling

View File

@@ -408,6 +408,7 @@ PHP 8.5 UPGRADE NOTES
RFC: https://wiki.php.net/rfc/curl_share_persistence_improvement
- DOM:
. Added Dom\Element::getElementsByClassName().
. Added Dom\Element::insertAdjacentHTML().
- Enchant:

View File

@@ -842,6 +842,44 @@ PHP_METHOD(Dom_Element, getElementsByTagName)
}
/* }}} end dom_element_get_elements_by_tag_name */
PHP_METHOD(Dom_Element, getElementsByClassName)
{
dom_object *intern, *namednode;
zend_string *class_names;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "P", &class_names) == FAILURE) {
RETURN_THROWS();
}
if (ZSTR_LEN(class_names) > INT_MAX) {
zend_argument_value_error(1, "is too long");
RETURN_THROWS();
}
DOM_GET_THIS_INTERN(intern);
object_init_ex(return_value, dom_html_collection_class_entry);
namednode = Z_DOMOBJ_P(return_value);
HashTable *token_set;
ALLOC_HASHTABLE(token_set);
zend_hash_init(token_set, 0, NULL, NULL, false);
dom_ordered_set_parser(token_set, ZSTR_VAL(class_names), intern->document->quirks_mode == PHP_LIBXML_QUIRKS);
if (zend_hash_num_elements(token_set) == 0) {
php_dom_create_obj_map(intern, namednode, NULL, NULL, NULL, &php_dom_obj_map_noop);
zend_hash_destroy(token_set);
FREE_HASHTABLE(token_set);
} else {
php_dom_create_obj_map(intern, namednode, NULL, NULL, NULL, &php_dom_obj_map_by_class_name);
dom_nnodemap_object *map = namednode->ptr;
map->array = token_set;
map->release_array = true;
}
}
/* should_free_result must be initialized to false */
static const xmlChar *dom_get_attribute_ns(dom_object *intern, xmlNodePtr elemp, const char *uri, size_t uri_len, const char *name, bool *should_free_result)
{

View File

@@ -24,6 +24,7 @@
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "obj_map.h"
#include "token_list.h"
static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
{
@@ -40,6 +41,30 @@ static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap)
objmap->cached_length = -1;
}
static bool dom_matches_class_name(const dom_nnodemap_object *map, const xmlNode *nodep)
{
bool ret = false;
if (nodep->type == XML_ELEMENT_NODE) {
xmlAttrPtr classes = xmlHasNsProp(nodep, BAD_CAST "class", NULL);
if (classes != NULL) {
bool should_free;
xmlChar *value = php_libxml_attr_value(classes, &should_free);
bool quirks = map->baseobj->document->quirks_mode == PHP_LIBXML_QUIRKS;
if (dom_ordered_set_all_contained(map->array, (const char *) value, quirks)) {
ret = true;
}
if (should_free) {
xmlFree(value);
}
}
}
return ret;
}
/**************************
* === Length methods === *
**************************/
@@ -106,6 +131,24 @@ static zend_long dom_map_get_by_tag_name_length(dom_nnodemap_object *map)
return count;
}
static zend_long dom_map_get_by_class_name_length(dom_nnodemap_object *map)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
zend_long count = 0;
if (nodep) {
xmlNodePtr basep = nodep;
nodep = php_dom_first_child_of_container_node(basep);
while (nodep != NULL) {
if (dom_matches_class_name(map, nodep)) {
count++;
}
nodep = php_dom_next_in_tree_order(nodep, basep);
}
}
return count;
}
static zend_long dom_map_get_zero_length(dom_nnodemap_object *map)
{
return 0;
@@ -276,6 +319,10 @@ static void dom_map_collection_named_item_elements_iter(dom_nnodemap_object *map
}
}
static void dom_map_collection_named_item_null(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
}
static void dom_map_get_by_tag_name_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
@@ -292,12 +339,54 @@ static void dom_map_get_by_tag_name_item(dom_nnodemap_object *map, zend_long ind
}
}
static void dom_map_get_by_class_name_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
xmlNodePtr nodep = dom_object_get_node(map->baseobj);
xmlNodePtr itemnode = NULL;
if (nodep && index >= 0) {
dom_node_idx_pair start_point = dom_obj_map_get_start_point(map, nodep, index);
if (start_point.node) {
if (start_point.index > 0) {
/* Only start iteration at next point if we actually have an index to seek to. */
itemnode = php_dom_next_in_tree_order(start_point.node, nodep);
} else {
itemnode = start_point.node;
}
} else {
itemnode = php_dom_first_child_of_container_node(nodep);
}
do {
--start_point.index;
while (itemnode != NULL && !dom_matches_class_name(map, itemnode)) {
itemnode = php_dom_next_in_tree_order(itemnode, nodep);
}
} while (start_point.index > 0 && itemnode);
}
dom_ret_node_to_zobj(map, itemnode, return_value);
if (itemnode) {
dom_map_cache_obj(map, itemnode, index, return_value);
}
}
static void dom_map_collection_named_item_by_tag_name_iter(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
iter->candidate = dom_get_elements_by_tag_name_ns_raw(iter->basep, iter->candidate, map->ns, map->local, map->local_lower, &iter->cur, iter->next);
iter->next = iter->cur + 1;
}
static void dom_map_collection_named_item_by_class_name_iter(dom_nnodemap_object *map, php_dom_obj_map_collection_iter *iter)
{
xmlNodePtr basep = iter->basep;
xmlNodePtr nodep = iter->candidate ? php_dom_next_in_tree_order(iter->candidate, basep) : php_dom_first_child_of_container_node(basep);
while (nodep != NULL && !dom_matches_class_name(map, nodep)) {
nodep = php_dom_next_in_tree_order(nodep, basep);
}
iter->candidate = nodep;
}
static void dom_map_get_null_item(dom_nnodemap_object *map, zend_long index, zval *return_value)
{
RETURN_NULL();
@@ -478,6 +567,16 @@ const php_dom_obj_map_handler php_dom_obj_map_by_tag_name = {
.nameless = true,
};
const php_dom_obj_map_handler php_dom_obj_map_by_class_name = {
.length = dom_map_get_by_class_name_length,
.get_item = dom_map_get_by_class_name_item,
.get_ns_named_item = dom_map_get_ns_named_item_null,
.has_ns_named_item = dom_map_has_ns_named_item_null,
.collection_named_item_iter = dom_map_collection_named_item_by_class_name_iter,
.use_cache = true,
.nameless = true,
};
const php_dom_obj_map_handler php_dom_obj_map_child_nodes = {
.length = dom_map_get_nodes_length,
.get_item = dom_map_get_nodes_item,
@@ -533,7 +632,7 @@ const php_dom_obj_map_handler php_dom_obj_map_noop = {
.get_item = dom_map_get_null_item,
.get_ns_named_item = dom_map_get_ns_named_item_null,
.has_ns_named_item = dom_map_has_ns_named_item_null,
.collection_named_item_iter = NULL,
.collection_named_item_iter = dom_map_collection_named_item_null,
.use_cache = false,
.nameless = true,
};

View File

@@ -63,6 +63,7 @@ zend_long php_dom_get_nodelist_length(dom_object *obj);
extern const php_dom_obj_map_handler php_dom_obj_map_attributes;
extern const php_dom_obj_map_handler php_dom_obj_map_by_tag_name;
extern const php_dom_obj_map_handler php_dom_obj_map_by_class_name;
extern const php_dom_obj_map_handler php_dom_obj_map_child_elements;
extern const php_dom_obj_map_handler php_dom_obj_map_child_nodes;
extern const php_dom_obj_map_handler php_dom_obj_map_nodeset;

View File

@@ -1659,6 +1659,7 @@ namespace Dom
public function getElementsByTagName(string $qualifiedName): HTMLCollection {}
public function getElementsByTagNameNS(?string $namespace, string $localName): HTMLCollection {}
public function getElementsByClassName(string $classNames): HTMLCollection {}
public function insertAdjacentElement(AdjacentPosition $where, Element $element): ?Element {}
public function insertAdjacentText(AdjacentPosition $where, string $data): void {}
@@ -1986,6 +1987,8 @@ namespace Dom
public function getElementsByTagName(string $qualifiedName): HTMLCollection {}
/** @implementation-alias Dom\Element::getElementsByTagNameNS */
public function getElementsByTagNameNS(?string $namespace, string $localName): HTMLCollection {}
/** @implementation-alias Dom\Element::getElementsByClassName */
public function getElementsByClassName(string $classNames): HTMLCollection {}
public function createElement(string $localName): Element {}
public function createElementNS(?string $namespace, string $qualifiedName): Element {}

View File

@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 2119512797f6d51d9835660cd0eccd3ba83417a9 */
* Stub hash: 757889c0ca89cc8e9905ba465e0621fe89b6e716 */
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_dom_import_simplexml, 0, 1, DOMAttr|DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
@@ -775,6 +775,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_Element_getElementsByTa
ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_Element_getElementsByClassName, 0, 1, Dom\\HTMLCollection, 0)
ZEND_ARG_TYPE_INFO(0, classNames, IS_STRING, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_Element_insertAdjacentElement, 0, 2, Dom\\Element, 1)
ZEND_ARG_OBJ_INFO(0, where, Dom\\AdjacentPosition, 0)
ZEND_ARG_OBJ_INFO(0, element, Dom\\Element, 0)
@@ -906,6 +910,8 @@ ZEND_END_ARG_INFO()
#define arginfo_class_Dom_Document_getElementsByTagNameNS arginfo_class_Dom_Element_getElementsByTagNameNS
#define arginfo_class_Dom_Document_getElementsByClassName arginfo_class_Dom_Element_getElementsByClassName
ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_class_Dom_Document_createElement, 0, 1, Dom\\Element, 0)
ZEND_ARG_TYPE_INFO(0, localName, IS_STRING, 0)
ZEND_END_ARG_INFO()
@@ -1278,6 +1284,7 @@ ZEND_METHOD(Dom_Element, setAttributeNodeNS);
ZEND_METHOD(Dom_Element, removeAttributeNode);
ZEND_METHOD(Dom_Element, getElementsByTagName);
ZEND_METHOD(Dom_Element, getElementsByTagNameNS);
ZEND_METHOD(Dom_Element, getElementsByClassName);
ZEND_METHOD(Dom_Element, insertAdjacentElement);
ZEND_METHOD(Dom_Element, insertAdjacentText);
ZEND_METHOD(Dom_Element, insertAdjacentHTML);
@@ -1653,6 +1660,7 @@ static const zend_function_entry class_Dom_Element_methods[] = {
ZEND_ME(Dom_Element, removeAttributeNode, arginfo_class_Dom_Element_removeAttributeNode, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, getElementsByTagName, arginfo_class_Dom_Element_getElementsByTagName, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, getElementsByTagNameNS, arginfo_class_Dom_Element_getElementsByTagNameNS, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, getElementsByClassName, arginfo_class_Dom_Element_getElementsByClassName, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, insertAdjacentElement, arginfo_class_Dom_Element_insertAdjacentElement, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, insertAdjacentText, arginfo_class_Dom_Element_insertAdjacentText, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Element, insertAdjacentHTML, arginfo_class_Dom_Element_insertAdjacentHTML, ZEND_ACC_PUBLIC)
@@ -1721,6 +1729,7 @@ static const zend_function_entry class_Dom_DocumentFragment_methods[] = {
static const zend_function_entry class_Dom_Document_methods[] = {
ZEND_RAW_FENTRY("getElementsByTagName", zim_Dom_Element_getElementsByTagName, arginfo_class_Dom_Document_getElementsByTagName, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_RAW_FENTRY("getElementsByTagNameNS", zim_Dom_Element_getElementsByTagNameNS, arginfo_class_Dom_Document_getElementsByTagNameNS, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_RAW_FENTRY("getElementsByClassName", zim_Dom_Element_getElementsByClassName, arginfo_class_Dom_Document_getElementsByClassName, ZEND_ACC_PUBLIC, NULL, NULL)
ZEND_ME(Dom_Document, createElement, arginfo_class_Dom_Document_createElement, ZEND_ACC_PUBLIC)
ZEND_ME(Dom_Document, createElementNS, arginfo_class_Dom_Document_createElementNS, ZEND_ACC_PUBLIC)
ZEND_RAW_FENTRY("createDocumentFragment", zim_DOMDocument_createDocumentFragment, arginfo_class_Dom_Document_createDocumentFragment, ZEND_ACC_PUBLIC, NULL, NULL)

View File

@@ -0,0 +1,32 @@
--TEST--
Dom\Element::getElementsByClassName() empty class names
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString(<<<HTML
<div class=" foo bar ">
<p id="child"></p>
</div>
HTML, LIBXML_NOERROR);
$collection = $dom->documentElement->getElementsByClassName("");
var_dump($collection->count());
foreach ($collection as $node) {
throw new Error("unreachable");
}
var_dump($dom->getElementsByClassName(" ")->count());
var_dump($dom->getElementsByClassName("\t")->count());
var_dump($dom->getElementsByClassName("\t\n\f\v")->count());
var_dump($dom->getElementsByClassName("\t\n\f\v")->namedItem("child"));
?>
--EXPECT--
int(0)
int(0)
int(0)
int(0)
NULL

View File

@@ -0,0 +1,55 @@
--TEST--
Dom\Element::getElementsByClassName() non quirks mode
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString(<<<HTML
<!DOCTYPE html>
<div id="container">
<p class="Bar">1</p>
<p class="bar">2</p>
<p class="Bar Foo">3</p>
<p class="Bar foo">4</p>
<p class="foo bar">5</p>
<p class="foo bar" name="here">6</p>
</div>
<div>
<p class="Bar">7</p>
<p class="bar">8</p>
<p class="Bar Foo">9</p>
<p class="Bar foo">10</p>
<p class="foo bar">11</p>
</div>
HTML);
$collection = $dom->getElementsByClassName("foo \n bar");
echo "There are {$collection->length} items in the document in total that have both \"foo\" and \"bar\"\n";
$collection = $dom->getElementById('container')->getElementsByClassName("foo \n bar");
echo "There are {$collection->length} items in #container in total that have both \"foo\" and \"bar\"\n";
foreach ($collection as $key => $node) {
echo "--- Key $key ---\n";
var_dump($node->tagName, $node->textContent);
var_dump($node === $collection->item($key));
}
var_dump($collection->namedItem("here")->textContent);
?>
--EXPECT--
There are 3 items in the document in total that have both "foo" and "bar"
There are 2 items in #container in total that have both "foo" and "bar"
--- Key 0 ---
string(1) "P"
string(1) "5"
bool(true)
--- Key 1 ---
string(1) "P"
string(1) "6"
bool(true)
string(1) "6"

View File

@@ -0,0 +1,69 @@
--TEST--
Dom\Element::getElementsByClassName() quirks mode
--EXTENSIONS--
dom
--FILE--
<?php
$dom = Dom\HTMLDocument::createFromString(<<<HTML
<div class=" foo bar ">
<main class="bar">
<p name="here">1</p>
<p class="bar" name="here">2</p>
<p name="here">3</p>
<p class="bAR" name="here">4</p>
</main>
<b class="foo bars"></b>
</div>
HTML, LIBXML_NOERROR);
$collection = $dom->documentElement->getElementsByClassName("Bar");
echo "There are {$dom->getElementsByClassName("foo \n bar")->count()} items in the document in total that have both \"foo\" and \"bar\"\n";
echo "There are {$collection->count()} \"Bar\" items\n";
foreach ($collection as $key => $node) {
echo "--- Key $key ---\n";
var_dump($node->tagName, $node->textContent);
var_dump($node === $collection->item($key));
}
echo "--- named item \"here\" ---\n";
var_dump($collection->namedItem("here")->textContent);
?>
--EXPECT--
There are 1 items in the document in total that have both "foo" and "bar"
There are 4 "Bar" items
--- Key 0 ---
string(3) "DIV"
string(56) "
1
2
3
4
"
bool(true)
--- Key 1 ---
string(4) "MAIN"
string(45) "
1
2
3
4
"
bool(true)
--- Key 2 ---
string(1) "P"
string(1) "2"
bool(true)
--- Key 3 ---
string(1) "P"
string(1) "4"
bool(true)
--- named item "here" ---
string(1) "2"

View File

@@ -51,7 +51,7 @@ static zend_always_inline void dom_add_token(HashTable *ht, zend_string *token)
/* https://dom.spec.whatwg.org/#concept-ordered-set-parser
* and https://infra.spec.whatwg.org/#split-on-ascii-whitespace */
static void dom_ordered_set_parser(HashTable *token_set, const char *position)
void dom_ordered_set_parser(HashTable *token_set, const char *position, bool to_lowercase)
{
/* Adapted steps from "split on ASCII whitespace" such that that loop directly appends to the token set. */
@@ -72,6 +72,9 @@ static void dom_ordered_set_parser(HashTable *token_set, const char *position)
/* 4.2. Append token to tokens. */
zend_string *token = zend_string_init(start, length, false);
if (to_lowercase) {
zend_str_tolower(ZSTR_VAL(token), length);
}
dom_add_token(token_set, token);
zend_string_release_ex(token, false);
@@ -83,6 +86,53 @@ static void dom_ordered_set_parser(HashTable *token_set, const char *position)
* => That's the token set. */
}
/* This returns true if all tokens in "token_set" are found in "value". */
bool dom_ordered_set_all_contained(HashTable *token_set, const char *value, bool to_lowercase)
{
/* This code is conceptually close to dom_ordered_set_parser(),
* but without building a hash table.
* Since the storage of the token set maps a value on itself,
* we can reuse that storage as a "seen" flag by setting it to NULL. */
zval *zv;
uint32_t still_needed = zend_hash_num_elements(token_set);
value += strspn(value, ascii_whitespace);
while (*value != '\0' && still_needed > 0) {
const char *start = value;
value += strcspn(value, ascii_whitespace);
size_t length = value - start;
if (to_lowercase) {
ALLOCA_FLAG(use_heap)
char *lc_str = zend_str_tolower_copy(do_alloca(length + 1, use_heap), start, length);
zv = zend_hash_str_find(token_set, lc_str, length);
free_alloca(lc_str, use_heap);
} else {
zv = zend_hash_str_find(token_set, start, length);
}
if (zv) {
if (Z_STR_P(zv)) {
still_needed--;
Z_STR_P(zv) = NULL;
}
}
value += strspn(value, ascii_whitespace);
}
/* Restore "seen" flag. */
zend_string *k;
ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(token_set, k, zv) {
if (!Z_STR_P(zv)) {
Z_STR_P(zv) = k;
}
} ZEND_HASH_FOREACH_END();
return still_needed == 0;
}
/* https://dom.spec.whatwg.org/#concept-ordered-set-serializer */
static char *dom_ordered_set_serializer(HashTable *token_set)
{
@@ -166,7 +216,7 @@ static void dom_token_list_update_set(dom_token_list_object *intern, HashTable *
xmlChar *value = dom_token_list_get_class_value(attr, &should_free);
if (value != NULL) {
/* 2. Otherwise, parse the token set. */
dom_ordered_set_parser(token_set, (const char *) value);
dom_ordered_set_parser(token_set, (const char *) value, false);
intern->cached_string = estrdup((const char *) value);
} else {
intern->cached_string = NULL;

View File

@@ -35,6 +35,8 @@ static inline dom_token_list_object *php_dom_token_list_from_dom_obj(dom_object
return (dom_token_list_object *)((char *) obj - XtOffsetOf(dom_token_list_object, dom));
}
void dom_ordered_set_parser(HashTable *token_set, const char *position, bool to_lowercase);
bool dom_ordered_set_all_contained(HashTable *token_set, const char *value, bool to_lowercase);
void dom_token_list_ctor(dom_token_list_object *intern, dom_object *element_obj);
void dom_token_list_free_obj(zend_object *object);
zval *dom_token_list_read_dimension(zend_object *object, zval *offset, int type, zval *rv);