mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
ext/pcre: Add "/r" modifier (#13583)
Adds support for "Caseless restricted" matching added in PCRE2lib 10.43 with the "r" modifier. This is `PCRE2_EXTRA_CASELESS_RESTRICT` in PCRE2. This is an "extra" option, which means it is not possible to pass this option as pcre2_compile() function parameter. This option is passed in a pcre2_set_compile_extra_options() call. Previously, these extra options are set at php_pcre_init_pcre2(), but after this change, it is possible to customize the options by adding bits to `eoptions` in pcre_get_compiled_regex_cache_ex(). The tests for this change are ported from upstream test suite[^1]. [^1]: https://github.com/PCRE2Project/pcre2/commit/c13d54f6581#diff-8c8312e4eb2d35bb16485404b7b5cc0eaef0bca1aa95ff5febf6a1890048305c
This commit is contained in:
committed by
GitHub
parent
353d4ce075
commit
7b23470666
@@ -210,6 +210,10 @@ PHP 8.4 UPGRADE NOTES
|
||||
As a consequence, LoongArch JIT support has been added, spaces
|
||||
are now allowed between braces in Perl-compatible items, and
|
||||
variable-length lookbehind assertions are now supported.
|
||||
. Added support for the "r" (PCRE2_EXTRA_CASELESS_RESTRICT) modifier, as well
|
||||
as the (?r) mode modifier. When enabled along with the case-insensitive
|
||||
modifier ("i"), the expression locks out mixing of ASCII and non-ASCII
|
||||
characters.
|
||||
|
||||
- PDO:
|
||||
. Added support for driver-specific subclasses.
|
||||
|
||||
@@ -185,6 +185,9 @@ PHP 8.4 INTERNALS UPGRADE NOTES
|
||||
When flags should be ignored, pass 0 to the flags argument.
|
||||
- php_pcre_match_impl() and pcre_get_compiled_regex_cache_ex() now use
|
||||
proper boolean argument types instead of integer types.
|
||||
- pcre_get_compiled_regex_cache_ex() now provides an option to collect extra
|
||||
options (from modifiers used in the expression, for example), and calls
|
||||
pcre2_set_compile_extra_options() with those options.
|
||||
|
||||
========================
|
||||
4. OpCode changes
|
||||
|
||||
@@ -592,6 +592,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
|
||||
#else
|
||||
uint32_t coptions = 0;
|
||||
#endif
|
||||
uint32_t eoptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
|
||||
PCRE2_UCHAR error[128];
|
||||
PCRE2_SIZE erroffset;
|
||||
int errnumber;
|
||||
@@ -722,6 +723,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
|
||||
/* PCRE specific options */
|
||||
case 'A': coptions |= PCRE2_ANCHORED; break;
|
||||
case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
|
||||
case 'r': eoptions |= PCRE2_EXTRA_CASELESS_RESTRICT; break;
|
||||
case 'S': /* Pass. */ break;
|
||||
case 'X': /* Pass. */ break;
|
||||
case 'U': coptions |= PCRE2_UNGREEDY; break;
|
||||
@@ -776,6 +778,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, bo
|
||||
}
|
||||
pcre2_set_character_tables(cctx, tables);
|
||||
|
||||
pcre2_set_compile_extra_options(cctx, eoptions);
|
||||
|
||||
/* Compile pattern and display a warning if compilation failed. */
|
||||
re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
|
||||
|
||||
|
||||
101
ext/pcre/tests/preg_match_caseless_restrict.phpt
Normal file
101
ext/pcre/tests/preg_match_caseless_restrict.phpt
Normal file
@@ -0,0 +1,101 @@
|
||||
--TEST--
|
||||
testing /r modifier in preg_* functions
|
||||
--FILE--
|
||||
<?php
|
||||
echo "SK substitute matching" . PHP_EOL;
|
||||
var_dump(preg_match('/AskZ/iur', 'AskZ')); // match
|
||||
var_dump(preg_match('/AskZ/iur', 'aSKz')); // match
|
||||
var_dump(preg_match('/AskZ/iur', "A\u{17f}kZ")); // no match
|
||||
var_dump(preg_match('/AskZ/iur', "As\u{212a}Z")); // no match
|
||||
var_dump(preg_match('/AskZ/iu', 'AskZ')); // match
|
||||
var_dump(preg_match('/AskZ/iu', 'aSKz')); // match
|
||||
var_dump(preg_match('/AskZ/iu', "A\u{17f}kZ")); // match
|
||||
var_dump(preg_match('/AskZ/iu', "As\u{212a}Z")); // match
|
||||
|
||||
echo "K substitute matching" . PHP_EOL;
|
||||
var_dump(preg_match('/k/iu', "\u{212A}"));
|
||||
var_dump(preg_match('/k/iur', "\u{212A}"));
|
||||
|
||||
echo "non-ASCII in expressions" . PHP_EOL;
|
||||
var_dump(preg_match('/A\x{17f}\x{212a}Z/iu', 'AskZ')); // match
|
||||
var_dump(preg_match('/A\x{17f}\x{212a}Z/iur', 'AskZ')); // no match
|
||||
|
||||
echo "Character sets" . PHP_EOL;
|
||||
var_dump(preg_match('/[AskZ]+/iur', 'AskZ')); // match
|
||||
var_dump(preg_match('/[AskZ]+/iur', 'aSKz')); // match
|
||||
var_dump(preg_match('/[AskZ]+/iur', "A\u{17f}kZ")); // match
|
||||
var_dump(preg_match('/[AskZ]+/iur', "As\u{212a}Z")); // match
|
||||
var_dump(preg_match('/[AskZ]+/iu', 'AskZ')); // match
|
||||
var_dump(preg_match('/[AskZ]+/iu', 'aSKz')); // match
|
||||
var_dump(preg_match('/[AskZ]+/iu', "A\u{17f}kZ")); // match
|
||||
var_dump(preg_match('/[AskZ]+/iu', "As\u{212a}Z")); // match
|
||||
|
||||
echo "non-ASCII in character sets" . PHP_EOL;
|
||||
var_dump(preg_match('/[\x{17f}\x{212a}]+/iur', 'AskZ')); // no match
|
||||
var_dump(preg_match('/[\x{17f}\x{212a}]+/iu', 'AskZ')); // match
|
||||
|
||||
echo "Meta characters and negate character sets". PHP_EOL;
|
||||
var_dump(preg_match('/[^s]+/iur', "A\u{17f}Z")); // match
|
||||
var_dump(preg_match('/[^s]+/iu', "A\u{17f}Z")); // match
|
||||
var_dump(preg_match('/[^s]+/iu', "A\u{17f}Z")); // match
|
||||
var_dump(preg_match('/[^k]+/iur', "A\u{212a}Z")); // match
|
||||
var_dump(preg_match('/[^k]+/iu', "A\u{212a}Z")); // match
|
||||
var_dump(preg_match('/[^sk]+/iur', "A\u{17f}\u{212a}Z")); // match
|
||||
var_dump(preg_match('/[^sk]+/iu', "A\u{17f}\u{212a}Z")); // match
|
||||
var_dump(preg_match('/[^\x{17f}]+/iur', "AsSZ")); // match
|
||||
var_dump(preg_match('/[^\x{17f}]+/iu', "AsSZ")); // match
|
||||
|
||||
echo "Modifier used within the expression" . PHP_EOL;
|
||||
var_dump(preg_match('/s(?r)s(?-r)s(?r:s)s/iu', "\u{17f}S\u{17f}S\u{17f}")); // match
|
||||
var_dump(preg_match('/s(?r)s(?-r)s(?r:s)s/iu', "\u{17f}\u{17f}\u{17f}S\u{17f}")); // no match
|
||||
var_dump(preg_match('/s(?r)s(?-r)s(?r:s)s/iu', "\u{17f}S\u{17f}\u{17f}\u{17f}")); // no match
|
||||
var_dump(preg_match('/k(?^i)k/iur', "K\u{212a}")); // match
|
||||
var_dump(preg_match('/k(?^i)k/iur', "\u{212a}\u{212a}")); // no match
|
||||
|
||||
echo "Done";
|
||||
?>
|
||||
--EXPECT--
|
||||
SK substitute matching
|
||||
int(1)
|
||||
int(1)
|
||||
int(0)
|
||||
int(0)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
K substitute matching
|
||||
int(1)
|
||||
int(0)
|
||||
non-ASCII in expressions
|
||||
int(1)
|
||||
int(0)
|
||||
Character sets
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
non-ASCII in character sets
|
||||
int(0)
|
||||
int(1)
|
||||
Meta characters and negate character sets
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
int(1)
|
||||
Modifier used within the expression
|
||||
int(1)
|
||||
int(0)
|
||||
int(0)
|
||||
int(1)
|
||||
int(0)
|
||||
Done
|
||||
Reference in New Issue
Block a user