mirror of
https://github.com/php/php-src.git
synced 2026-04-12 10:33:11 +02:00
Optimize get_browser() using prefix and contains checks
Avoid expensive regex compilations by checking for prefix and contained segments beforehand.
This commit is contained in:
@@ -27,6 +27,8 @@
|
||||
#include "zend_ini_scanner.h"
|
||||
#include "zend_globals.h"
|
||||
|
||||
#define BROWSCAP_NUM_CONTAINS 5
|
||||
|
||||
typedef struct {
|
||||
zend_string *key;
|
||||
zend_string *value;
|
||||
@@ -34,9 +36,13 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
zend_string *pattern;
|
||||
zend_string *regex;
|
||||
zend_string *parent;
|
||||
uint32_t kv_start;
|
||||
uint32_t kv_end;
|
||||
/* We ensure that the length fits in 16 bits, so this is fine */
|
||||
uint16_t contains_start[BROWSCAP_NUM_CONTAINS];
|
||||
uint8_t contains_len[BROWSCAP_NUM_CONTAINS];
|
||||
uint8_t prefix_len;
|
||||
} browscap_entry;
|
||||
|
||||
typedef struct {
|
||||
@@ -67,7 +73,9 @@ static void browscap_entry_dtor(zval *zvalue)
|
||||
{
|
||||
browscap_entry *entry = Z_PTR_P(zvalue);
|
||||
zend_string_release(entry->pattern);
|
||||
zend_string_release(entry->regex);
|
||||
if (entry->parent) {
|
||||
zend_string_release(entry->parent);
|
||||
}
|
||||
efree(entry);
|
||||
}
|
||||
|
||||
@@ -75,10 +83,55 @@ static void browscap_entry_dtor_persistent(zval *zvalue)
|
||||
{
|
||||
browscap_entry *entry = Z_PTR_P(zvalue);
|
||||
zend_string_release(entry->pattern);
|
||||
zend_string_release(entry->regex);
|
||||
if (entry->parent) {
|
||||
zend_string_release(entry->parent);
|
||||
}
|
||||
pefree(entry, 1);
|
||||
}
|
||||
|
||||
static inline zend_bool is_placeholder(char c) {
|
||||
return c == '?' || c == '*';
|
||||
}
|
||||
|
||||
/* Length of prefix not containing any wildcards */
|
||||
static uint8_t browscap_compute_prefix_len(zend_string *pattern) {
|
||||
size_t i;
|
||||
for (i = 0; i < ZSTR_LEN(pattern); i++) {
|
||||
if (is_placeholder(ZSTR_VAL(pattern)[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return MIN(i, UINT8_MAX);
|
||||
}
|
||||
|
||||
static size_t browscap_compute_contains(
|
||||
zend_string *pattern, size_t start_pos,
|
||||
uint16_t *contains_start, uint8_t *contains_len) {
|
||||
size_t i = start_pos;
|
||||
/* Find first non-placeholder character after prefix */
|
||||
for (; i < ZSTR_LEN(pattern); i++) {
|
||||
if (!is_placeholder(ZSTR_VAL(pattern)[i])) {
|
||||
/* Skip the case of a single non-placeholder character.
|
||||
* Let's try to find something longer instead. */
|
||||
if (i + 1 < ZSTR_LEN(pattern) &&
|
||||
!is_placeholder(ZSTR_VAL(pattern)[i + 1])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*contains_start = i;
|
||||
|
||||
/* Find first placeholder character after that */
|
||||
for (; i < ZSTR_LEN(pattern); i++) {
|
||||
if (is_placeholder(ZSTR_VAL(pattern)[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
*contains_len = MIN(i - *contains_start, UINT8_MAX);
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Length of regex, including escapes, anchors, etc. */
|
||||
static size_t browscap_compute_regex_len(zend_string *pattern) {
|
||||
size_t i, len = ZSTR_LEN(pattern);
|
||||
for (i = 0; i < ZSTR_LEN(pattern); i++) {
|
||||
@@ -227,12 +280,17 @@ static HashTable *browscap_entry_to_array(browser_data *bdata, browscap_entry *e
|
||||
ALLOC_HASHTABLE(ht);
|
||||
zend_hash_init(ht, 8, NULL, ZVAL_PTR_DTOR, 0);
|
||||
|
||||
ZVAL_STR_COPY(&tmp, entry->regex);
|
||||
ZVAL_STR(&tmp, browscap_convert_pattern(entry->pattern, 0));
|
||||
zend_hash_str_add(ht, "browser_name_regex", sizeof("browser_name_regex")-1, &tmp);
|
||||
|
||||
ZVAL_STR_COPY(&tmp, entry->pattern);
|
||||
zend_hash_str_add(ht, "browser_name_pattern", sizeof("browser_name_pattern")-1, &tmp);
|
||||
|
||||
if (entry->parent) {
|
||||
ZVAL_STR_COPY(&tmp, entry->parent);
|
||||
zend_hash_str_add(ht, "parent", sizeof("parent")-1, &tmp);
|
||||
}
|
||||
|
||||
for (i = entry->kv_start; i < entry->kv_end; i++) {
|
||||
ZVAL_STR_COPY(&tmp, bdata->kv[i].value);
|
||||
zend_hash_add(ht, bdata->kv[i].key, &tmp);
|
||||
@@ -256,17 +314,6 @@ static void php_browscap_parser_cb(zval *arg1, zval *arg2, zval *arg3, int callb
|
||||
if (ctx->current_entry != NULL && arg2) {
|
||||
zend_string *new_key, *new_value;
|
||||
|
||||
/* parent entry can not be same as current section -> causes infinite loop! */
|
||||
if (!strcasecmp(Z_STRVAL_P(arg1), "parent") &&
|
||||
ctx->current_section_name != NULL &&
|
||||
!strcasecmp(ZSTR_VAL(ctx->current_section_name), Z_STRVAL_P(arg2))
|
||||
) {
|
||||
zend_error(E_CORE_ERROR, "Invalid browscap ini file: "
|
||||
"'Parent' value cannot be same as the section name: %s "
|
||||
"(in file %s)", ZSTR_VAL(ctx->current_section_name), INI_STR("browscap"));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set proper value for true/false settings */
|
||||
if ((Z_STRLEN_P(arg2) == 2 && !strncasecmp(Z_STRVAL_P(arg2), "on", sizeof("on") - 1)) ||
|
||||
(Z_STRLEN_P(arg2) == 3 && !strncasecmp(Z_STRVAL_P(arg2), "yes", sizeof("yes") - 1)) ||
|
||||
@@ -284,28 +331,61 @@ static void php_browscap_parser_cb(zval *arg1, zval *arg2, zval *arg3, int callb
|
||||
new_value = browscap_intern_str(ctx, Z_STR_P(arg2));
|
||||
}
|
||||
|
||||
new_key = browscap_intern_str_ci(ctx, Z_STR_P(arg1), persistent);
|
||||
browscap_add_kv(bdata, new_key, new_value, persistent);
|
||||
ctx->current_entry->kv_end = bdata->kv_used;
|
||||
}
|
||||
break;
|
||||
case ZEND_INI_PARSER_SECTION: {
|
||||
zval processed;
|
||||
zval unprocessed;
|
||||
if (!strcasecmp(Z_STRVAL_P(arg1), "parent")) {
|
||||
/* parent entry can not be same as current section -> causes infinite loop! */
|
||||
if (ctx->current_section_name != NULL &&
|
||||
!strcasecmp(ZSTR_VAL(ctx->current_section_name), Z_STRVAL_P(arg2))
|
||||
) {
|
||||
zend_error(E_CORE_ERROR, "Invalid browscap ini file: "
|
||||
"'Parent' value cannot be same as the section name: %s "
|
||||
"(in file %s)", ZSTR_VAL(ctx->current_section_name), INI_STR("browscap"));
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->current_entry = pemalloc(sizeof(browscap_entry), persistent);
|
||||
zend_hash_update_ptr(bdata->htab, Z_STR_P(arg1), ctx->current_entry);
|
||||
|
||||
if (ctx->current_section_name) {
|
||||
zend_string_release(ctx->current_section_name);
|
||||
if (ctx->current_entry->parent) {
|
||||
zend_string_release(ctx->current_entry->parent);
|
||||
}
|
||||
ctx->current_entry->parent = new_value;
|
||||
} else {
|
||||
new_key = browscap_intern_str_ci(ctx, Z_STR_P(arg1), persistent);
|
||||
browscap_add_kv(bdata, new_key, new_value, persistent);
|
||||
ctx->current_entry->kv_end = bdata->kv_used;
|
||||
}
|
||||
ctx->current_section_name = zend_string_copy(Z_STR_P(arg1));
|
||||
|
||||
ctx->current_entry->regex = browscap_convert_pattern(Z_STR_P(arg1), persistent);
|
||||
ctx->current_entry->pattern = zend_string_copy(Z_STR_P(arg1));
|
||||
ctx->current_entry->kv_end = ctx->current_entry->kv_start = bdata->kv_used;
|
||||
}
|
||||
break;
|
||||
case ZEND_INI_PARSER_SECTION:
|
||||
{
|
||||
browscap_entry *entry;
|
||||
zend_string *pattern = Z_STR_P(arg1);
|
||||
size_t pos;
|
||||
int i;
|
||||
|
||||
if (ZSTR_LEN(pattern) > UINT16_MAX) {
|
||||
php_error_docref(NULL, E_WARNING,
|
||||
"Skipping excessively long pattern of length %zd", ZSTR_LEN(pattern));
|
||||
break;
|
||||
}
|
||||
|
||||
entry = ctx->current_entry
|
||||
= pemalloc(sizeof(browscap_entry), persistent);
|
||||
zend_hash_update_ptr(bdata->htab, pattern, entry);
|
||||
|
||||
if (ctx->current_section_name) {
|
||||
zend_string_release(ctx->current_section_name);
|
||||
}
|
||||
ctx->current_section_name = zend_string_copy(pattern);
|
||||
|
||||
entry->pattern = zend_string_copy(pattern);
|
||||
entry->kv_end = entry->kv_start = bdata->kv_used;
|
||||
entry->parent = NULL;
|
||||
|
||||
pos = entry->prefix_len = browscap_compute_prefix_len(pattern);
|
||||
for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
|
||||
pos = browscap_compute_contains(pattern, pos,
|
||||
&entry->contains_start[i], &entry->contains_len[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* }}} */
|
||||
@@ -455,32 +535,79 @@ PHP_MSHUTDOWN_FUNCTION(browscap) /* {{{ */
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
static inline size_t browscap_get_minimum_length(browscap_entry *entry) {
|
||||
size_t len = entry->prefix_len;
|
||||
int i;
|
||||
for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
|
||||
len += entry->contains_len[i];
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static int browser_reg_compare(
|
||||
zval *entry_zv, int num_args, va_list args, zend_hash_key *key) /* {{{ */
|
||||
{
|
||||
browscap_entry *entry = Z_PTR_P(entry_zv);
|
||||
char *lookup_browser_name = va_arg(args, char *);
|
||||
int lookup_browser_length = va_arg(args, int);
|
||||
zend_string *agent_name = va_arg(args, zend_string *);
|
||||
browscap_entry **found_entry_ptr = va_arg(args, browscap_entry **);
|
||||
browscap_entry *found_entry = *found_entry_ptr;
|
||||
ALLOCA_FLAG(use_heap);
|
||||
zend_string *pattern_lc, *regex;
|
||||
const char *cur;
|
||||
int i;
|
||||
|
||||
pcre *re;
|
||||
int re_options;
|
||||
pcre_extra *re_extra;
|
||||
|
||||
/* See if we have an exact match, if so, we're done... */
|
||||
if (found_entry) {
|
||||
if (!strcasecmp(ZSTR_VAL(found_entry->pattern), lookup_browser_name)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
re = pcre_get_compiled_regex(entry->regex, &re_extra, &re_options);
|
||||
if (re == NULL) {
|
||||
/* Agent name too short */
|
||||
if (ZSTR_LEN(agent_name) < browscap_get_minimum_length(entry)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre_exec(re, re_extra, lookup_browser_name, lookup_browser_length, 0, re_options, NULL, 0) == 0) {
|
||||
/* Quickly discard patterns where the prefix doesn't match. */
|
||||
if (zend_binary_strcasecmp(
|
||||
ZSTR_VAL(agent_name), entry->prefix_len,
|
||||
ZSTR_VAL(entry->pattern), entry->prefix_len) != 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Lowercase the pattern, the agent name is already lowercase */
|
||||
ZSTR_ALLOCA_ALLOC(pattern_lc, ZSTR_LEN(entry->pattern), use_heap);
|
||||
zend_str_tolower_copy(ZSTR_VAL(pattern_lc), ZSTR_VAL(entry->pattern), ZSTR_LEN(entry->pattern));
|
||||
|
||||
/* Check if the agent contains the "contains" portions */
|
||||
cur = ZSTR_VAL(agent_name) + entry->prefix_len;
|
||||
for (i = 0; i < BROWSCAP_NUM_CONTAINS; i++) {
|
||||
if (entry->contains_len[i] != 0) {
|
||||
cur = zend_memnstr(cur,
|
||||
ZSTR_VAL(pattern_lc) + entry->contains_start[i],
|
||||
entry->contains_len[i],
|
||||
ZSTR_VAL(agent_name) + ZSTR_LEN(agent_name));
|
||||
if (!cur) {
|
||||
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
|
||||
return 0;
|
||||
}
|
||||
cur += entry->contains_len[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* See if we have an exact match, if so, we're done... */
|
||||
if (zend_string_equals(agent_name, pattern_lc)) {
|
||||
*found_entry_ptr = entry;
|
||||
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
|
||||
return ZEND_HASH_APPLY_STOP;
|
||||
}
|
||||
|
||||
regex = browscap_convert_pattern(entry->pattern, 0);
|
||||
re = pcre_get_compiled_regex(regex, &re_extra, &re_options);
|
||||
if (re == NULL) {
|
||||
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
|
||||
zend_string_release(regex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre_exec(re, re_extra, ZSTR_VAL(agent_name), ZSTR_LEN(agent_name), 0, re_options, NULL, 0) == 0) {
|
||||
/* If we've found a possible browser, we need to do a comparison of the
|
||||
number of characters changed in the user agent being checked versus
|
||||
the previous match found and the current match. */
|
||||
@@ -523,6 +650,8 @@ static int browser_reg_compare(
|
||||
}
|
||||
}
|
||||
|
||||
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
|
||||
zend_string_release(regex);
|
||||
return 0;
|
||||
}
|
||||
/* }}} */
|
||||
@@ -537,12 +666,8 @@ static void browscap_zval_copy_ctor(zval *p) /* {{{ */
|
||||
Get information about the capabilities of a browser. If browser_name is omitted or null, HTTP_USER_AGENT is used. Returns an object by default; if return_array is true, returns an array. */
|
||||
PHP_FUNCTION(get_browser)
|
||||
{
|
||||
char *agent_name = NULL;
|
||||
size_t agent_name_len = 0;
|
||||
zend_string *agent_name = NULL, *lookup_browser_name;
|
||||
zend_bool return_array = 0;
|
||||
zval *agent, *parent_agent_name_zv, *http_user_agent;
|
||||
zend_string *parent_agent_name;
|
||||
char *lookup_browser_name;
|
||||
browser_data *bdata;
|
||||
browscap_entry *found_entry = NULL;
|
||||
HashTable *agent_ht;
|
||||
@@ -562,27 +687,29 @@ PHP_FUNCTION(get_browser)
|
||||
bdata = &global_bdata;
|
||||
}
|
||||
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!b", &agent_name, &agent_name_len, &return_array) == FAILURE) {
|
||||
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S!b", &agent_name, &return_array) == FAILURE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (agent_name == NULL) {
|
||||
if ((Z_TYPE(PG(http_globals)[TRACK_VARS_SERVER]) == IS_ARRAY || zend_is_auto_global_str(ZEND_STRL("_SERVER"))) &&
|
||||
(http_user_agent = zend_hash_str_find(Z_ARRVAL_P(&PG(http_globals)[TRACK_VARS_SERVER]), "HTTP_USER_AGENT", sizeof("HTTP_USER_AGENT")-1)) == NULL
|
||||
) {
|
||||
zval *http_user_agent = NULL;
|
||||
if (Z_TYPE(PG(http_globals)[TRACK_VARS_SERVER]) == IS_ARRAY
|
||||
|| zend_is_auto_global_str(ZEND_STRL("_SERVER"))) {
|
||||
http_user_agent = zend_hash_str_find(
|
||||
Z_ARRVAL_P(&PG(http_globals)[TRACK_VARS_SERVER]),
|
||||
"HTTP_USER_AGENT", sizeof("HTTP_USER_AGENT")-1);
|
||||
}
|
||||
if (http_user_agent == NULL) {
|
||||
php_error_docref(NULL, E_WARNING, "HTTP_USER_AGENT variable is not set, cannot determine user agent name");
|
||||
RETURN_FALSE;
|
||||
}
|
||||
agent_name = Z_STRVAL_P(http_user_agent);
|
||||
agent_name_len = Z_STRLEN_P(http_user_agent);
|
||||
agent_name = Z_STR_P(http_user_agent);
|
||||
}
|
||||
|
||||
lookup_browser_name = estrndup(agent_name, agent_name_len);
|
||||
php_strtolower(lookup_browser_name, agent_name_len);
|
||||
|
||||
found_entry = zend_hash_str_find_ptr(bdata->htab, lookup_browser_name, agent_name_len);
|
||||
lookup_browser_name = zend_string_tolower(agent_name);
|
||||
found_entry = zend_hash_find_ptr(bdata->htab, lookup_browser_name);
|
||||
if (found_entry == NULL) {
|
||||
zend_hash_apply_with_arguments(bdata->htab, browser_reg_compare, 3, lookup_browser_name, agent_name_len, &found_entry);
|
||||
zend_hash_apply_with_arguments(bdata->htab, browser_reg_compare, 2, lookup_browser_name, &found_entry);
|
||||
|
||||
if (found_entry == NULL) {
|
||||
found_entry = zend_hash_str_find_ptr(bdata->htab,
|
||||
@@ -602,11 +729,9 @@ PHP_FUNCTION(get_browser)
|
||||
object_and_properties_init(return_value, zend_standard_class_def, agent_ht);
|
||||
}
|
||||
|
||||
parent_agent_name_zv = zend_hash_str_find(agent_ht, "parent", sizeof("parent")-1);
|
||||
parent_agent_name = parent_agent_name_zv ? Z_STR_P(parent_agent_name_zv) : NULL;
|
||||
|
||||
while (parent_agent_name) {
|
||||
if ((found_entry = zend_hash_find_ptr(bdata->htab, parent_agent_name)) == NULL) {
|
||||
while (found_entry->parent) {
|
||||
found_entry = zend_hash_find_ptr(bdata->htab, found_entry->parent);
|
||||
if (found_entry == NULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -617,14 +742,11 @@ PHP_FUNCTION(get_browser)
|
||||
zend_hash_merge(Z_OBJPROP_P(return_value), agent_ht, (copy_ctor_func_t) browscap_zval_copy_ctor, 0);
|
||||
}
|
||||
|
||||
parent_agent_name_zv = zend_hash_str_find(agent_ht, "parent", sizeof("parent")-1);
|
||||
parent_agent_name = parent_agent_name_zv ? Z_STR_P(parent_agent_name_zv) : NULL;
|
||||
|
||||
zend_hash_destroy(agent_ht);
|
||||
efree(agent_ht);
|
||||
}
|
||||
|
||||
efree(lookup_browser_name);
|
||||
zend_string_release(lookup_browser_name);
|
||||
}
|
||||
/* }}} */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user