1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Optimize size of ext/dom (#12830)

* Remove unused upper case tag static data

* Shrink size of static binary search tree

This also makes it more efficient on the data cache.

* Update patches
This commit is contained in:
Niels Dossche
2023-11-29 21:27:00 +00:00
committed by GitHub
parent 37a1e191dd
commit f3ece813ff
6 changed files with 4741 additions and 4729 deletions

View File

@@ -19,12 +19,12 @@ extern "C" {
typedef struct {
lxb_char_t key;
void *value;
size_t value_len;
lxb_char_t value[6];
unsigned char value_len;
size_t left;
size_t right;
size_t next;
unsigned short left;
unsigned short right;
unsigned short next;
}
lexbor_sbst_entry_static_t;

File diff suppressed because it is too large Load Diff

View File

@@ -1815,7 +1815,7 @@ lxb_html_tokenizer_state_char_ref_named(lxb_html_tokenizer_t *tkz,
goto done;
}
if (entry->value != NULL) {
if (entry->value[0] != 0) {
tkz->entity_end = (tkz->pos + (data - begin)) - tkz->start;
tkz->entity_match = entry;
}

View File

@@ -224,6 +224,7 @@ static const lxb_tag_data_t lxb_tag_res_data_default[LXB_TAG__LAST_ENTRY] =
{{.u.short_str = "xmp", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
};
#if 0
static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY] =
{
{{.u.short_str = "#UNDEF", .length = 6, .next = NULL}, LXB_TAG__UNDEF, 1, true},
@@ -423,6 +424,7 @@ static const lxb_tag_data_t lxb_tag_res_data_upper_default[LXB_TAG__LAST_ENTRY]
{{.u.short_str = "WBR", .length = 3, .next = NULL}, LXB_TAG_WBR, 1, true},
{{.u.short_str = "XMP", .length = 3, .next = NULL}, LXB_TAG_XMP, 1, true}
};
#endif
static const lexbor_shs_entry_t lxb_tag_res_shs_data_default[] =
{

View File

@@ -92,6 +92,7 @@ lxb_tag_data_by_name(lexbor_hash_t *hash, const lxb_char_t *name, size_t len)
lexbor_hash_search_lower, name, len);
}
#if 0
const lxb_tag_data_t *
lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
const lxb_char_t *name, size_t len)
@@ -114,6 +115,7 @@ lxb_tag_data_by_name_upper(lexbor_hash_t *hash,
return (const lxb_tag_data_t *) lexbor_hash_search(hash,
lexbor_hash_search_upper, name, len);
}
#endif
/*
* No inline functions for ABI.

View File

@@ -17,15 +17,23 @@ This contains the following patch files in mailbox format.
A PHP specific patch to patch utilities and data structure to be able to generate smaller lookup tables.
This patch won't be upstreamed because it breaks generality of those data structures, i.e. it only works
because we only use it for character encoding.
* 0001-Remove-unused-upper-case-tag-static-data.patch
A PHP specific patch to remove unused upper case tag static data. This shrinks the static data size.
* 0001-Shrink-size-of-static-binary-search-tree.patch
A PHP specific patch to shrink the size of the static binary search tree for entities.
This shrinks the static data size and reduces data cache pressure.
**Note** for this patch the utilities to generate the tables are also patched.
Make sure to apply on a fresh Lexbor clone and run (in `lexbor/utils/encoding`): `python3 single-byte.py` and `python3 multi-byte.py` to generate the tables.
Also run `python3 tokenizer_entities_bst.py` to generate the static binary search tree for entities.
## How to apply
* cd into `ext/dom/lexbor_bridge/lexbor`
* cd into `ext/dom/lexbor/lexbor`
* `git am -3 ../patches/0001-Expose-line-and-column-information-for-use-in-PHP.patch`
* `git am -3 ../patches/0001-Track-implied-added-nodes-for-options-use-in-PHP.patch`
* `git am -3 ../patches/0001-Patch-out-CSS-parser.patch`
* `git am -3 ../patches/0001-Patch-utilities-and-data-structure-to-be-able-to-gen.patch`
* `git reset HEAD~4` # 4 is the number of commits created by the above commands
* `git am -3 ../patches/0001-Remove-unused-upper-case-tag-static-data.patch`
* `git am -3 ../patches/0001-Shrink-size-of-static-binary-search-tree.patch`
* `git reset HEAD~6` # 6 is the number of commits created by the above commands