1
0
mirror of https://github.com/php/php-src.git synced 2026-04-13 02:52:48 +02:00

This submission introduces two features.

The first one is support for Perl-style matching regexp delimiters, i.e.
using <[{( and )}]> to delimit the regular expressions.

The second one is a new 'F' modifier that allows you to specify a function name
in the replacement argument to preg_replace(). This function will be called
when the replacement needs to be made. It is passed an array of full matched
pattern and captured subpatterns and it is expected to return a string that
will be used for replacement. 'e' and 'F' modifiers cannot be used together.

@- Implemented support for Perl-style matching regexp delimiters in PCRE.
@  You can use <{[( and )]}> to delimit your expressions now. (Andrei)

@- Introduced new 'F' modifier in PCRE that lets you specify a function
@  name in the replacement argument to preg_replace() that will be called
@  at run-time to provide the replacement string. It is passed an array of
@  matched pattern and subpatterns. (Andrei)
This commit is contained in:
Andrei Zmievski
2000-11-23 17:25:13 +00:00
parent f03b949134
commit 0af5c36d94

View File

@@ -41,6 +41,7 @@
#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_REPLACE_EVAL (1<<0)
#define PREG_REPLACE_FUNC (1<<1)
#ifdef ZTS
int pcre_globals_id;
@@ -147,6 +148,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
const char *error;
int erroffset;
char delimiter;
char start_delimiter;
char end_delimiter;
char *p, *pp;
char *pattern;
int regex_len;
@@ -192,20 +195,47 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
zend_error(E_WARNING, "Delimiter must not be alphanumeric or backslash");
return NULL;
}
/* We need to iterate through the pattern, searching for the ending delimiter,
but skipping the backslashed delimiters. If the ending delimiter is not
found, display a warning. */
pp = p;
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
else if (*pp == delimiter)
break;
pp++;
}
if (*pp == 0) {
zend_error(E_WARNING, "No ending delimiter found");
return NULL;
start_delimiter = delimiter;
if ((pp = strchr("([{< )]}> )]}>", delimiter)))
delimiter = pp[5];
end_delimiter = delimiter;
if (start_delimiter == end_delimiter) {
/* We need to iterate through the pattern, searching for the ending delimiter,
but skipping the backslashed delimiters. If the ending delimiter is not
found, display a warning. */
pp = p;
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
else if (*pp == delimiter)
break;
pp++;
}
if (*pp == 0) {
zend_error(E_WARNING, "No ending delimiter '%c' found", delimiter);
return NULL;
}
} else {
/* We iterate through the pattern, searching for the matching ending
* delimiter. For each matching starting delimiter, we increment nesting
* level, and decrement it for each matching ending delimiter. If we
* reach the end of the pattern without matching, display a warning.
*/
int brackets = 1; /* brackets nesting level */
pp = p;
while (*pp != 0) {
if (*pp == '\\' && pp[1] != 0) pp++;
else if (*pp == end_delimiter && --brackets <= 0)
break;
else if (*pp == start_delimiter)
brackets++;
pp++;
}
if (*pp == 0) {
zend_error(E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
return NULL;
}
}
/* Make a copy of the actual pattern. */
@@ -235,7 +265,8 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
case 'X': coptions |= PCRE_EXTRA; break;
/* Custom preg options */
case 'e': poptions |= PREG_REPLACE_EVAL; break;
case 'e': poptions |= PREG_REPLACE_EVAL; break;
case 'F': poptions |= PREG_REPLACE_FUNC; break;
case ' ':
case '\n':
@@ -247,6 +278,12 @@ static pcre* pcre_get_compiled_regex(char *regex, pcre_extra *extra, int *preg_o
return NULL;
}
}
if ((poptions & PREG_REPLACE_EVAL) && (poptions & PREG_REPLACE_FUNC)) {
zend_error(E_WARNING, "'e' and 'F' modifiers cannot be used together");
efree(pattern);
return NULL;
}
#if HAVE_SETLOCALE
if (strcmp(locale, "C"))
@@ -526,6 +563,40 @@ static inline int preg_get_backref(const char *walk, int *backref)
return 1;
}
static int preg_do_repl_func(char *function_name, char *subject, int *offsets, int count, char **result)
{
zval *retval_ptr; /* Function return value */
zval function; /* Function to call */
zval *function_ptr = &function; /* Pointer to function to call */
zval **args[0]; /* Argument to pass to function */
zval *subpats; /* Captured subpatterns */
int result_len; /* Return value length */
int i;
CLS_FETCH();
ZVAL_STRING(function_ptr, function_name, 0);
MAKE_STD_ZVAL(subpats);
array_init(subpats);
for (i = 0; i < count; i++)
add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
args[0] = &subpats;
if (call_user_function_ex(CG(function_table), NULL, function_ptr, &retval_ptr, 1, args, 0, NULL) == SUCCESS && retval_ptr) {
convert_to_string_ex(&retval_ptr);
*result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
result_len = Z_STRLEN_P(retval_ptr);
zval_ptr_dtor(&retval_ptr);
} else {
php_error(E_WARNING, "Unable to call custom replacement function %s()", function_name);
*result = empty_string;
result_len = 0;
}
zval_dtor(subpats);
FREE_ZVAL(subpats);
return result_len;
}
static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
int *offsets, int count, char **result)
@@ -630,10 +701,13 @@ char *php_pcre_replace(char *regex, int regex_len,
int size_offsets; /* Size of the offsets array */
int new_len; /* Length of needed storage */
int alloc_len; /* Actual allocated length */
int eval_result_len=0; /* Length of the eval'ed string */
int eval_result_len=0; /* Length of the eval'ed or
function-returned string */
int match_len; /* Length of the current match */
int backref; /* Backreference number */
int eval; /* If the replacement string should be eval'ed */
int use_func; /* If the matches should be run through
a function to get the replacement string */
int start_offset; /* Where the new search starts */
int g_notempty = 0; /* If the match should not be empty */
char *result, /* Result of replacement */
@@ -643,7 +717,7 @@ char *php_pcre_replace(char *regex, int regex_len,
*match, /* The current match */
*piece, /* The current piece of subject */
*replace_end, /* End of replacement string */
*eval_result, /* Result of eval */
*eval_result, /* Result of eval or custom function */
walk_last; /* Last walked character */
/* Compile regex or get it from cache. */
@@ -670,6 +744,7 @@ char *php_pcre_replace(char *regex, int regex_len,
start_offset = 0;
replace_end = replace + replace_len;
eval = preg_options & PREG_REPLACE_EVAL;
use_func = preg_options & PREG_REPLACE_FUNC;
while (1) {
/* Execute the regular expression. */
@@ -695,6 +770,11 @@ char *php_pcre_replace(char *regex, int regex_len,
eval_result_len = preg_do_eval(replace, replace_len, subject,
offsets, count, &eval_result);
new_len += eval_result_len;
} else if (use_func) {
/* Use custom function to get replacement string and its length. */
eval_result_len = preg_do_repl_func(replace, subject, offsets,
count, &eval_result);
new_len += eval_result_len;
} else { /* do regular substitution */
walk = replace;
walk_last = 0;
@@ -726,11 +806,12 @@ char *php_pcre_replace(char *regex, int regex_len,
/* copy replacement and backrefs */
walkbuf = result + *result_len;
/* If evaluating, copy result to the buffer and clean up */
if (eval) {
/* If evaluating or using custom function, copy result to the buffer
* and clean up. */
if (eval || use_func) {
memcpy(walkbuf, eval_result, eval_result_len);
*result_len += eval_result_len;
efree(eval_result);
STR_FREE(eval_result);
} else { /* do regular backreference copying */
walk = replace;
walk_last = 0;