mirror of
https://github.com/php/php-src.git
synced 2026-04-13 02:52:48 +02:00
This submission introduces two features.
The first one is support for Perl-style matching regexp delimiters, i.e.
using <[{( and )}]> to delimit the regular expressions.
The second one is a new 'F' modifier that allows you to specify a function name
in the replacement argument to preg_replace(). This function will be called
when the replacement needs to be made. It is passed an array of full matched
pattern and captured subpatterns and it is expected to return a string that
will be used for replacement. 'e' and 'F' modifiers cannot be used together.
@- Implemented support for Perl-style matching regexp delimiters in PCRE.
@ You can use <{[( and )]}> to delimit your expressions now. (Andrei)
@- Introduced new 'F' modifier in PCRE that lets you specify a function
@ name in the replacement argument to preg_replace() that will be called
@ at run-time to provide the replacement string. It is passed an array of
@ matched pattern and subpatterns. (Andrei)
This commit is contained in:
@@ -41,6 +41,7 @@
|
||||
#define PREG_SPLIT_NO_EMPTY (1<<0)
|
||||
|
||||
#define PREG_REPLACE_EVAL (1<<0)
|
||||
#define PREG_REPLACE_FUNC (1<<1)
|
||||
|
||||
#ifdef ZTS
|
||||
int pcre_globals_id;
|
||||
@@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
|
||||
const char *error;
|
||||
int erroffset;
|
||||
char delimiter;
|
||||
char start_delimiter;
|
||||
char end_delimiter;
|
||||
char *p, *pp;
|
||||
char *pattern;
|
||||
int regex_len;
|
||||
@@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
|
||||
zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* We need to iterate through the pattern, searching for the ending delimiter,
|
||||
but skipping the backslashed delimiters. If the ending delimiter is not
|
||||
found, display a warning. */
|
||||
pp = p;
|
||||
while (*pp != 0) {
|
||||
if (*pp == '\\' && pp[1] != 0) pp++;
|
||||
else if (*pp == delimiter)
|
||||
break;
|
||||
pp++;
|
||||
}
|
||||
if (*pp == 0) {
|
||||
zend_error(E_WARNING, "No ending delimiter found");
|
||||
return NULL;
|
||||
|
||||
start_delimiter = delimiter;
|
||||
if ((pp = strchr("([{< )]}> )]}>", delimiter)))
|
||||
delimiter = pp[5];
|
||||
end_delimiter = delimiter;
|
||||
|
||||
if (start_delimiter == end_delimiter) {
|
||||
/* We need to iterate through the pattern, searching for the ending delimiter,
|
||||
but skipping the backslashed delimiters. If the ending delimiter is not
|
||||
found, display a warning. */
|
||||
pp = p;
|
||||
while (*pp != 0) {
|
||||
if (*pp == '\\' && pp[1] != 0) pp++;
|
||||
else if (*pp == delimiter)
|
||||
break;
|
||||
pp++;
|
||||
}
|
||||
if (*pp == 0) {
|
||||
zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
/* We iterate through the pattern, searching for the matching ending
|
||||
* delimiter. For each matching starting delimiter, we increment nesting
|
||||
* level, and decrement it for each matching ending delimiter. If we
|
||||
* reach the end of the pattern without matching, display a warning.
|
||||
*/
|
||||
int brackets = 1; /* brackets nesting level */
|
||||
pp = p;
|
||||
while (*pp != 0) {
|
||||
if (*pp == '\\' && pp[1] != 0) pp++;
|
||||
else if (*pp == end_delimiter && --brackets <= 0)
|
||||
break;
|
||||
else if (*pp == start_delimiter)
|
||||
brackets++;
|
||||
pp++;
|
||||
}
|
||||
if (*pp == 0) {
|
||||
zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make a copy of the actual pattern. */
|
||||
@@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
|
||||
case 'X': coptions |= PCRE_EXTRA; break;
|
||||
|
||||
/* Custom preg options */
|
||||
case 'e': poptions |= PREG_REPLACE_EVAL; break;
|
||||
case 'e': poptions |= PREG_REPLACE_EVAL; break;
|
||||
case 'F': poptions |= PREG_REPLACE_FUNC; break;
|
||||
|
||||
case ' ':
|
||||
case '\n':
|
||||
@@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) {
|
||||
zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together");
|
||||
efree(pattern);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if HAVE_SETLOCALE
|
||||
if (strcmp(locale, "C"))
|
||||
@@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result)
|
||||
{
|
||||
zval *retval_ptr; /* Function return value */
|
||||
zval function; /* Function to call */
|
||||
zval *function_ptr = &function; /* Pointer to function to call */
|
||||
zval **args[0]; /* Argument to pass to function */
|
||||
zval *subpats; /* Captured subpatterns */
|
||||
int result_len; /* Return value length */
|
||||
int i;
|
||||
CLS_FETCH();
|
||||
|
||||
ZVAL_STRING(function_ptr, function_name, 0);
|
||||
|
||||
MAKE_STD_ZVAL(subpats);
|
||||
array_init(subpats);
|
||||
for (i = 0; i < count; i++)
|
||||
add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
|
||||
args[0] = &subpats;
|
||||
|
||||
if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) {
|
||||
convert_to_string_ex(&retval_ptr);
|
||||
*result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
|
||||
result_len = Z_STRLEN_P(retval_ptr);
|
||||
zval_ptr_dtor(&retval_ptr);
|
||||
} else {
|
||||
php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name);
|
||||
*result = empty_string;
|
||||
result_len = 0;
|
||||
}
|
||||
zval_dtor(subpats);
|
||||
FREE_ZVAL(subpats);
|
||||
|
||||
return result_len;
|
||||
}
|
||||
|
||||
static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
|
||||
int *offsets, int count, char **result)
|
||||
@@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex, int regex_len,
|
||||
int size_offsets; /* Size of the offsets array */
|
||||
int new_len; /* Length of needed storage */
|
||||
int alloc_len; /* Actual allocated length */
|
||||
int eval_result_len=0; /* Length of the eval'ed string */
|
||||
int eval_result_len=0; /* Length of the eval'ed or
|
||||
function-returned string */
|
||||
int match_len; /* Length of the current match */
|
||||
int backref; /* Backreference number */
|
||||
int eval; /* If the replacement string should be eval'ed */
|
||||
int use_func; /* If the matches should be run through
|
||||
a function to get the replacement string */
|
||||
int start_offset; /* Where the new search starts */
|
||||
int g_notempty = 0; /* If the match should not be empty */
|
||||
char *result, /* Result of replacement */
|
||||
@@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex, int regex_len,
|
||||
*match, /* The current match */
|
||||
*piece, /* The current piece of subject */
|
||||
*replace_end, /* End of replacement string */
|
||||
*eval_result, /* Result of eval */
|
||||
*eval_result, /* Result of eval or custom function */
|
||||
walk_last; /* Last walked character */
|
||||
|
||||
/* Compile regex or get it from cache. */
|
||||
@@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex, int regex_len,
|
||||
start_offset = 0;
|
||||
replace_end = replace + replace_len;
|
||||
eval = preg_options & PREG_REPLACE_EVAL;
|
||||
use_func = preg_options & PREG_REPLACE_FUNC;
|
||||
|
||||
while (1) {
|
||||
/* Execute the regular expression. */
|
||||
@@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex, int regex_len,
|
||||
eval_result_len = preg_do_eval(replace, replace_len, subject,
|
||||
offsets, count, &eval_result);
|
||||
new_len += eval_result_len;
|
||||
} else if (use_func) {
|
||||
/* Use custom function to get replacement string and its length. */
|
||||
eval_result_len = preg_do_repl_func(replace, subject, offsets,
|
||||
count, &eval_result);
|
||||
new_len += eval_result_len;
|
||||
} else { /* do regular substitution */
|
||||
walk = replace;
|
||||
walk_last = 0;
|
||||
@@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex, int regex_len,
|
||||
/* copy replacement and backrefs */
|
||||
walkbuf = result + *result_len;
|
||||
|
||||
/* If evaluating, copy result to the buffer and clean up */
|
||||
if (eval) {
|
||||
/* If evaluating or using custom function, copy result to the buffer
|
||||
* and clean up. */
|
||||
if (eval || use_func) {
|
||||
memcpy(walkbuf, eval_result, eval_result_len);
|
||||
*result_len += eval_result_len;
|
||||
efree(eval_result);
|
||||
STR_FREE(eval_result);
|
||||
} else { /* do regular backreference copying */
|
||||
walk = replace;
|
||||
walk_last = 0;
|
||||
|
||||
Reference in New Issue
Block a user