mirror of
https://github.com/php/php-src.git
synced 2026-04-30 03:33:17 +02:00
MFB: upgrade to PCRE 7.3
This commit is contained in:
+1
-1
@@ -5,7 +5,7 @@ ARG_WITH("pcre-regex", "Perl Compatible Regular Expressions", "yes");
|
||||
|
||||
if (PHP_PCRE_REGEX == "yes") {
|
||||
EXTENSION("pcre", "php_pcre.c", PHP_PCRE_REGEX_SHARED,
|
||||
"-DNO_RECURSE -Iext/pcre/pcrelib");
|
||||
"-DNO_RECURSE -DHAVE_CONFIG_H -Iext/pcre/pcrelib");
|
||||
ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
|
||||
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
|
||||
|
||||
|
||||
+1
-1
@@ -11,7 +11,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
if test "$PHP_PCRE_REGEX" = "yes"; then
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-I@ext_srcdir@/pcrelib)
|
||||
PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
|
||||
PHP_ADD_INCLUDE(pcrelib)
|
||||
|
||||
+166
-1
@@ -1,7 +1,172 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 7.2 19-June-07
|
||||
Version 7.3 28-Aug-07
|
||||
---------------------
|
||||
|
||||
1. In the rejigging of the build system that eventually resulted in 7.1, the
|
||||
line "#include <pcre.h>" was included in pcre_internal.h. The use of angle
|
||||
brackets there is not right, since it causes compilers to look for an
|
||||
installed pcre.h, not the version that is in the source that is being
|
||||
compiled (which of course may be different). I have changed it back to:
|
||||
|
||||
#include "pcre.h"
|
||||
|
||||
I have a vague recollection that the change was concerned with compiling in
|
||||
different directories, but in the new build system, that is taken care of
|
||||
by the VPATH setting the Makefile.
|
||||
|
||||
2. The pattern .*$ when run in not-DOTALL UTF-8 mode with newline=any failed
|
||||
when the subject happened to end in the byte 0x85 (e.g. if the last
|
||||
character was \x{1ec5}). *Character* 0x85 is one of the "any" newline
|
||||
characters but of course it shouldn't be taken as a newline when it is part
|
||||
of another character. The bug was that, for an unlimited repeat of . in
|
||||
not-DOTALL UTF-8 mode, PCRE was advancing by bytes rather than by
|
||||
characters when looking for a newline.
|
||||
|
||||
3. A small performance improvement in the DOTALL UTF-8 mode .* case.
|
||||
|
||||
4. Debugging: adjusted the names of opcodes for different kinds of parentheses
|
||||
in debug output.
|
||||
|
||||
5. Arrange to use "%I64d" instead of "%lld" and "%I64u" instead of "%llu" for
|
||||
long printing in the pcrecpp unittest when running under MinGW.
|
||||
|
||||
6. ESC_K was left out of the EBCDIC table.
|
||||
|
||||
7. Change 7.0/38 introduced a new limit on the number of nested non-capturing
|
||||
parentheses; I made it 1000, which seemed large enough. Unfortunately, the
|
||||
limit also applies to "virtual nesting" when a pattern is recursive, and in
|
||||
this case 1000 isn't so big. I have been able to remove this limit at the
|
||||
expense of backing off one optimization in certain circumstances. Normally,
|
||||
when pcre_exec() would call its internal match() function recursively and
|
||||
immediately return the result unconditionally, it uses a "tail recursion"
|
||||
feature to save stack. However, when a subpattern that can match an empty
|
||||
string has an unlimited repetition quantifier, it no longer makes this
|
||||
optimization. That gives it a stack frame in which to save the data for
|
||||
checking that an empty string has been matched. Previously this was taken
|
||||
from the 1000-entry workspace that had been reserved. So now there is no
|
||||
explicit limit, but more stack is used.
|
||||
|
||||
8. Applied Daniel's patches to solve problems with the import/export magic
|
||||
syntax that is required for Windows, and which was going wrong for the
|
||||
pcreposix and pcrecpp parts of the library. These were overlooked when this
|
||||
problem was solved for the main library.
|
||||
|
||||
9. There were some crude static tests to avoid integer overflow when computing
|
||||
the size of patterns that contain repeated groups with explicit upper
|
||||
limits. As the maximum quantifier is 65535, the maximum group length was
|
||||
set at 30,000 so that the product of these two numbers did not overflow a
|
||||
32-bit integer. However, it turns out that people want to use groups that
|
||||
are longer than 30,000 bytes (though not repeat them that many times).
|
||||
Change 7.0/17 (the refactoring of the way the pattern size is computed) has
|
||||
made it possible to implement the integer overflow checks in a much more
|
||||
dynamic way, which I have now done. The artificial limitation on group
|
||||
length has been removed - we now have only the limit on the total length of
|
||||
the compiled pattern, which depends on the LINK_SIZE setting.
|
||||
|
||||
10. Fixed a bug in the documentation for get/copy named substring when
|
||||
duplicate names are permitted. If none of the named substrings are set, the
|
||||
functions return PCRE_ERROR_NOSUBSTRING (7); the doc said they returned an
|
||||
empty string.
|
||||
|
||||
11. Because Perl interprets \Q...\E at a high level, and ignores orphan \E
|
||||
instances, patterns such as [\Q\E] or [\E] or even [^\E] cause an error,
|
||||
because the ] is interpreted as the first data character and the
|
||||
terminating ] is not found. PCRE has been made compatible with Perl in this
|
||||
regard. Previously, it interpreted [\Q\E] as an empty class, and [\E] could
|
||||
cause memory overwriting.
|
||||
|
||||
10. Like Perl, PCRE automatically breaks an unlimited repeat after an empty
|
||||
string has been matched (to stop an infinite loop). It was not recognizing
|
||||
a conditional subpattern that could match an empty string if that
|
||||
subpattern was within another subpattern. For example, it looped when
|
||||
trying to match (((?(1)X|))*) but it was OK with ((?(1)X|)*) where the
|
||||
condition was not nested. This bug has been fixed.
|
||||
|
||||
12. A pattern like \X?\d or \P{L}?\d in non-UTF-8 mode could cause a backtrack
|
||||
past the start of the subject in the presence of bytes with the top bit
|
||||
set, for example "\x8aBCD".
|
||||
|
||||
13. Added Perl 5.10 experimental backtracking controls (*FAIL), (*F), (*PRUNE),
|
||||
(*SKIP), (*THEN), (*COMMIT), and (*ACCEPT).
|
||||
|
||||
14. Optimized (?!) to (*FAIL).
|
||||
|
||||
15. Updated the test for a valid UTF-8 string to conform to the later RFC 3629.
|
||||
This restricts code points to be within the range 0 to 0x10FFFF, excluding
|
||||
the "low surrogate" sequence 0xD800 to 0xDFFF. Previously, PCRE allowed the
|
||||
full range 0 to 0x7FFFFFFF, as defined by RFC 2279. Internally, it still
|
||||
does: it's just the validity check that is more restrictive.
|
||||
|
||||
16. Inserted checks for integer overflows during escape sequence (backslash)
|
||||
processing, and also fixed erroneous offset values for syntax errors during
|
||||
backslash processing.
|
||||
|
||||
17. Fixed another case of looking too far back in non-UTF-8 mode (cf 12 above)
|
||||
for patterns like [\PPP\x8a]{1,}\x80 with the subject "A\x80".
|
||||
|
||||
18. An unterminated class in a pattern like (?1)\c[ with a "forward reference"
|
||||
caused an overrun.
|
||||
|
||||
19. A pattern like (?:[\PPa*]*){8,} which had an "extended class" (one with
|
||||
something other than just ASCII characters) inside a group that had an
|
||||
unlimited repeat caused a loop at compile time (while checking to see
|
||||
whether the group could match an empty string).
|
||||
|
||||
20. Debugging a pattern containing \p or \P could cause a crash. For example,
|
||||
[\P{Any}] did so. (Error in the code for printing property names.)
|
||||
|
||||
21. An orphan \E inside a character class could cause a crash.
|
||||
|
||||
22. A repeated capturing bracket such as (A)? could cause a wild memory
|
||||
reference during compilation.
|
||||
|
||||
23. There are several functions in pcre_compile() that scan along a compiled
|
||||
expression for various reasons (e.g. to see if it's fixed length for look
|
||||
behind). There were bugs in these functions when a repeated \p or \P was
|
||||
present in the pattern. These operators have additional parameters compared
|
||||
with \d, etc, and these were not being taken into account when moving along
|
||||
the compiled data. Specifically:
|
||||
|
||||
(a) A item such as \p{Yi}{3} in a lookbehind was not treated as fixed
|
||||
length.
|
||||
|
||||
(b) An item such as \pL+ within a repeated group could cause crashes or
|
||||
loops.
|
||||
|
||||
(c) A pattern such as \p{Yi}+(\P{Yi}+)(?1) could give an incorrect
|
||||
"reference to non-existent subpattern" error.
|
||||
|
||||
(d) A pattern like (\P{Yi}{2}\277)? could loop at compile time.
|
||||
|
||||
24. A repeated \S or \W in UTF-8 mode could give wrong answers when multibyte
|
||||
characters were involved (for example /\S{2}/8g with "A\x{a3}BC").
|
||||
|
||||
25. Using pcregrep in multiline, inverted mode (-Mv) caused it to loop.
|
||||
|
||||
26. Patterns such as [\P{Yi}A] which include \p or \P and just one other
|
||||
character were causing crashes (broken optimization).
|
||||
|
||||
27. Patterns such as (\P{Yi}*\277)* (group with possible zero repeat containing
|
||||
\p or \P) caused a compile-time loop.
|
||||
|
||||
28. More problems have arisen in unanchored patterns when CRLF is a valid line
|
||||
break. For example, the unstudied pattern [\r\n]A does not match the string
|
||||
"\r\nA" because change 7.0/46 below moves the current point on by two
|
||||
characters after failing to match at the start. However, the pattern \nA
|
||||
*does* match, because it doesn't start till \n, and if [\r\n]A is studied,
|
||||
the same is true. There doesn't seem any very clean way out of this, but
|
||||
what I have chosen to do makes the common cases work: PCRE now takes note
|
||||
of whether there can be an explicit match for \r or \n anywhere in the
|
||||
pattern, and if so, 7.0/46 no longer applies. As part of this change,
|
||||
there's a new PCRE_INFO_HASCRORLF option for finding out whether a compiled
|
||||
pattern has explicit CR or LF references.
|
||||
|
||||
29. Added (*CR) etc for changing newline setting at start of pattern.
|
||||
|
||||
|
||||
Version 7.2 19-Jun-07
|
||||
---------------------
|
||||
|
||||
1. If the fr_FR locale cannot be found for test 3, try the "french" locale,
|
||||
|
||||
@@ -109,15 +109,15 @@ variable length. The first byte in an item is an opcode, and the length of the
|
||||
item is either implicit in the opcode or contained in the data bytes that
|
||||
follow it.
|
||||
|
||||
In many cases below "two-byte" data values are specified. This is in fact just
|
||||
a default when the number is an offset within the compiled pattern. PCRE can be
|
||||
In many cases below LINK_SIZE data values are specified for offsets within the
|
||||
compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be
|
||||
compiled to use 3-byte or 4-byte values for these offsets (impairing the
|
||||
performance). This is necessary only when patterns whose compiled length is
|
||||
greater than 64K are going to be processed. In this description, we assume the
|
||||
"normal" compilation options. "Two-byte" data values that are counts (e.g. for
|
||||
quantifiers) are always just two bytes.
|
||||
"normal" compilation options. Data values that are counts (e.g. for
|
||||
quantifiers) are always just two bytes long.
|
||||
|
||||
A list of all the opcodes follows:
|
||||
A list of the opcodes follows:
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
@@ -149,6 +149,13 @@ These items are all just one byte long
|
||||
OP_EXTUNI match an extended Unicode character
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ACCEPT )
|
||||
OP_COMMIT )
|
||||
OP_FAIL ) These are Perl 5.10's "backtracking
|
||||
OP_PRUNE ) control verbs".
|
||||
OP_SKIP )
|
||||
OP_THEN )
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
@@ -404,4 +411,4 @@ at compile time, and so does not cause anything to be put into the compiled
|
||||
data.
|
||||
|
||||
Philip Hazel
|
||||
June 2007
|
||||
August 2007
|
||||
|
||||
@@ -2,6 +2,30 @@ News about PCRE releases
|
||||
------------------------
|
||||
|
||||
|
||||
Release 7.3 28-Aug-07
|
||||
---------------------
|
||||
|
||||
Most changes are bug fixes. Some that are not:
|
||||
|
||||
1. There is some support for Perl 5.10's experimental "backtracking control
|
||||
verbs" such as (*PRUNE).
|
||||
|
||||
2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
|
||||
restrictive in the strings it accepts.
|
||||
|
||||
3. Checking for potential integer overflow has been made more dynamic, and as a
|
||||
consequence there is no longer a hard limit on the size of a subpattern that
|
||||
has a limited repeat count.
|
||||
|
||||
4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
|
||||
no longer advance by two characters instead of one when an unanchored match
|
||||
fails at CRLF if there are explicit CR or LF matches within the pattern.
|
||||
This gets rid of some anomalous effects that previously occurred.
|
||||
|
||||
5. Some PCRE-specific settings for varying the newline options at the start of
|
||||
a pattern have been added.
|
||||
|
||||
|
||||
Release 7.2 19-Jun-07
|
||||
---------------------
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ This document contains the following sections:
|
||||
Generic instructions for the PCRE C library
|
||||
The C++ wrapper functions
|
||||
Building for virtual Pascal
|
||||
Stack size in Windows environments
|
||||
Comments about Win32 builds
|
||||
Building under Windows with BCC5.5
|
||||
Building PCRE on OpenVMS
|
||||
@@ -14,7 +15,7 @@ This document contains the following sections:
|
||||
|
||||
GENERAL
|
||||
|
||||
I (Philip Hazel) have no knowledge of Windows or VMS sytems and how their
|
||||
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
|
||||
libraries work. The items in the PCRE distribution and Makefile that relate to
|
||||
anything other than Unix-like systems are untested by me.
|
||||
|
||||
@@ -38,79 +39,97 @@ GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
|
||||
|
||||
The following are generic comments about building the PCRE C library "by hand".
|
||||
|
||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||
settings that it contains to whatever is appropriate for your environment.
|
||||
In particular, if you want to force a specific value for newline, you can
|
||||
define the NEWLINE macro.
|
||||
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
|
||||
settings that it contains to whatever is appropriate for your environment.
|
||||
In particular, if you want to force a specific value for newline, you can
|
||||
define the NEWLINE macro.
|
||||
|
||||
An alternative approach is not to edit config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need.
|
||||
An alternative approach is not to edit config.h, but to use -D on the
|
||||
compiler command line to make any changes that you need.
|
||||
|
||||
NOTE: There have been occasions when the way in which certain parameters in
|
||||
config.h are used has changed between releases. (In the configure/make
|
||||
world, this is handled automatically.) When upgrading to a new release, you
|
||||
are strongly advised to review config.h.generic before re-using what you
|
||||
had previously.
|
||||
NOTE: There have been occasions when the way in which certain parameters
|
||||
in config.h are used has changed between releases. (In the configure/make
|
||||
world, this is handled automatically.) When upgrading to a new release,
|
||||
you are strongly advised to review config.h.generic before re-using what
|
||||
you had previously.
|
||||
|
||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||
(2) Copy or rename the file pcre.h.generic as pcre.h.
|
||||
|
||||
(3) EITHER:
|
||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||
(3) EITHER:
|
||||
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
|
||||
|
||||
OR:
|
||||
Compile dftables.c as a stand-alone program, and then run it with the
|
||||
single argument "pcre_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on
|
||||
a system that uses EBCDIC code.
|
||||
OR:
|
||||
Compile dftables.c as a stand-alone program, and then run it with the
|
||||
single argument "pcre_chartables.c". This generates a set of standard
|
||||
character tables and writes them to that file. The tables are generated
|
||||
using the default C locale for your system. If you want to use a locale
|
||||
that is specified by LC_xxx environment variables, add the -L option to
|
||||
the dftables command. You must use this method if you are building on
|
||||
a system that uses EBCDIC code.
|
||||
|
||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||
specify alternative tables at run time.
|
||||
The tables in pcre_chartables.c are defaults. The caller of PCRE can
|
||||
specify alternative tables at run time.
|
||||
|
||||
(4) Compile the following source files:
|
||||
(4) Ensure that you have the following header files:
|
||||
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucp_searchfuncs.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
pcre_internal.h
|
||||
ucp.h
|
||||
ucpinternal.h
|
||||
ucptable.h
|
||||
|
||||
Now link them all together into an object library in whichever form your
|
||||
system keeps such libraries. This is the basic PCRE C library. If your
|
||||
system has static and shared libraries, you may have to do this once for
|
||||
each type.
|
||||
(5) Also ensure that you have the following file, which is #included as source
|
||||
when building a debugging version of PCRE and is also used by pcretest.
|
||||
|
||||
(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix
|
||||
library.
|
||||
pcre_printint.src
|
||||
|
||||
(6) Compile the test program pcretest.c. This needs the functions in the
|
||||
pcre and pcreposix libraries when linking.
|
||||
(6) Compile the following source files:
|
||||
|
||||
(7) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. Note that the
|
||||
supplied files are in Unix format, with just LF characters as line
|
||||
terminators. You may need to edit them to change this if your system uses a
|
||||
different convention.
|
||||
pcre_chartables.c
|
||||
pcre_compile.c
|
||||
pcre_config.c
|
||||
pcre_dfa_exec.c
|
||||
pcre_exec.c
|
||||
pcre_fullinfo.c
|
||||
pcre_get.c
|
||||
pcre_globals.c
|
||||
pcre_info.c
|
||||
pcre_maketables.c
|
||||
pcre_newline.c
|
||||
pcre_ord2utf8.c
|
||||
pcre_refcount.c
|
||||
pcre_study.c
|
||||
pcre_tables.c
|
||||
pcre_try_flipped.c
|
||||
pcre_ucp_searchfuncs.c
|
||||
pcre_valid_utf8.c
|
||||
pcre_version.c
|
||||
pcre_xclass.c
|
||||
|
||||
(8) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library (it does not need the pcreposix library).
|
||||
Make sure that you include -I. in the compiler command (or equivalent for
|
||||
an unusual compiler) so that all included PCRE header files are first
|
||||
sought in the current directory. Otherwise you run the risk of picking up
|
||||
a previously-installed file from somewhere else.
|
||||
|
||||
(7) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE C library. If
|
||||
your system has static and shared libraries, you may have to do this once
|
||||
for each type.
|
||||
|
||||
(8) Similarly, compile pcreposix.c and link the result (on its own) as the
|
||||
pcreposix library.
|
||||
|
||||
(9) Compile the test program pcretest.c. This needs the functions in the
|
||||
pcre and pcreposix libraries when linking. It also needs the
|
||||
pcre_printint.src source file, which it #includes.
|
||||
|
||||
(10) Run pcretest on the testinput files in the testdata directory, and check
|
||||
that the output matches the corresponding testoutput files. Note that the
|
||||
supplied files are in Unix format, with just LF characters as line
|
||||
terminators. You may need to edit them to change this if your system uses
|
||||
a different convention.
|
||||
|
||||
(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
|
||||
uses only the basic PCRE library (it does not need the pcreposix library).
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
@@ -131,6 +150,18 @@ additional files. The following files in the distribution are for building PCRE
|
||||
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
||||
The default processor stack size of 1Mb in some Windows environments is too
|
||||
small for matching patterns that need much recursion. In particular, test 2 may
|
||||
fail because of this. Normally, running out of stack causes a crash, but there
|
||||
have been cases where the test program has just died silently. See your linker
|
||||
documentation for how to increase stack size if you experience problems. The
|
||||
Linux default of 8Mb is a reasonable choice for the stack, though even that can
|
||||
be too small for some pattern/subject combinations. There is more about stack
|
||||
usage in the "pcrestack" documentation.
|
||||
|
||||
|
||||
COMMENTS ABOUT WIN32 BUILDS
|
||||
|
||||
There are two ways of building PCRE using the "configure, make, make install"
|
||||
@@ -284,5 +315,5 @@ $! Locale could not be set to fr
|
||||
$!
|
||||
=========================
|
||||
|
||||
Last Updated: 13 June 2007
|
||||
Last Updated: 01 August 2007
|
||||
****
|
||||
|
||||
@@ -175,13 +175,6 @@ them both to 0; an emulation function will be used. */
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_DUPLENGTH
|
||||
#define MAX_DUPLENGTH 30000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
@@ -224,13 +217,13 @@ them both to 0; an emulation function will be used. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.2"
|
||||
#define PACKAGE_STRING "PCRE 7.3"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.2"
|
||||
#define PACKAGE_VERSION "7.3"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
@@ -272,7 +265,7 @@ them both to 0; an emulation function will be used. */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.2"
|
||||
#define VERSION "7.3"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
|
||||
@@ -43,6 +43,10 @@ character tables for PCRE. The tables are built according to the current
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@@ -99,12 +103,15 @@ fprintf(f,
|
||||
"tables are passed to PCRE by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256.\n\n");
|
||||
fprintf(f,
|
||||
"The following #include is present because without it gcc 4.x may remove\n"
|
||||
"The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n"
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include <config.h>\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre_internal.h\"\n\n");
|
||||
fprintf(f,
|
||||
"const unsigned char _pcre_default_tables[] = {\n\n"
|
||||
|
||||
+1399
-839
File diff suppressed because it is too large
Load Diff
+26
-10
@@ -42,19 +42,25 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 2
|
||||
#define PCRE_MINOR 3
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2007-06-19
|
||||
#define PCRE_DATE 2007-08-28
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change an existing definition of PCRE_EXP_DECL. */
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
@@ -63,9 +69,18 @@ don't change an existing definition of PCRE_EXP_DECL. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -132,7 +147,7 @@ extern "C" {
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22)
|
||||
#define PCRE_ERROR_NOTUSED (-22)
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
@@ -152,6 +167,7 @@ extern "C" {
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
@@ -14,12 +14,16 @@ example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #include is present because without it gcc 4.x may remove the
|
||||
The following #includes are present because without the gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
+330
-80
@@ -42,11 +42,14 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting internal functions that are not used by other modules. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#define NLBLOCK cd /* Block containing newline information */
|
||||
#define PSSTART start_pattern /* Field containing processed string start */
|
||||
#define PSEND end_pattern /* Field containing processed string end */
|
||||
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -62,6 +65,13 @@ used by pcretest. DEBUG is not defined when building a production library. */
|
||||
|
||||
#define SETBIT(a,b) a[b/8] |= (1 << (b%8))
|
||||
|
||||
/* Maximum length value to check against when making sure that the integer that
|
||||
holds the compiled pattern length does not overflow. We make it a bit less than
|
||||
INT_MAX to allow for adding in group terminating bytes, so that we don't have
|
||||
to check them every time. */
|
||||
|
||||
#define OFLOW_MAX (INT_MAX - 20)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Code parameters and static tables *
|
||||
@@ -120,7 +130,7 @@ static const short int escapes[] = {
|
||||
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
|
||||
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,
|
||||
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,
|
||||
/* D0 */ '}', 0, -ESC_K, 0, 0, 0, 0, -ESC_P,
|
||||
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0,
|
||||
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X,
|
||||
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,
|
||||
@@ -130,6 +140,27 @@ static const short int escapes[] = {
|
||||
#endif
|
||||
|
||||
|
||||
/* Table of special "verbs" like (*PRUNE) */
|
||||
|
||||
typedef struct verbitem {
|
||||
const char *name;
|
||||
int len;
|
||||
int op;
|
||||
} verbitem;
|
||||
|
||||
static verbitem verbs[] = {
|
||||
{ "ACCEPT", 6, OP_ACCEPT },
|
||||
{ "COMMIT", 6, OP_COMMIT },
|
||||
{ "F", 1, OP_FAIL },
|
||||
{ "FAIL", 4, OP_FAIL },
|
||||
{ "PRUNE", 5, OP_PRUNE },
|
||||
{ "SKIP", 4, OP_SKIP },
|
||||
{ "THEN", 4, OP_THEN }
|
||||
};
|
||||
|
||||
static int verbcount = sizeof(verbs)/sizeof(verbitem);
|
||||
|
||||
|
||||
/* Tables of names of POSIX character classes and their lengths. The list is
|
||||
terminated by a zero length entry. The first three must be alpha, lower, upper,
|
||||
as this is assumed for handling case independence. */
|
||||
@@ -203,7 +234,7 @@ static const char *error_texts[] = {
|
||||
"missing ) after comment",
|
||||
"parentheses nested too deeply", /** DEAD **/
|
||||
/* 20 */
|
||||
"regular expression too large",
|
||||
"regular expression is too large",
|
||||
"failed to get memory",
|
||||
"unmatched parentheses",
|
||||
"internal error: code overflow",
|
||||
@@ -239,7 +270,7 @@ static const char *error_texts[] = {
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",
|
||||
/* 50 */
|
||||
"repeated subpattern is too long",
|
||||
"repeated subpattern is too long", /** DEAD **/
|
||||
"octal value is greater than \\377 (not in UTF-8 mode)",
|
||||
"internal error: overran compiling workspace",
|
||||
"internal error: previously-checked referenced subpattern not found",
|
||||
@@ -248,7 +279,11 @@ static const char *error_texts[] = {
|
||||
"repeating a DEFINE group is not allowed",
|
||||
"inconsistent NEWLINE options",
|
||||
"\\g is not followed by a braced name or an optionally braced non-zero number",
|
||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
|
||||
"(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",
|
||||
"(*VERB) with an argument is not supported",
|
||||
/* 60 */
|
||||
"(*VERB) not recognized",
|
||||
"number is too big"
|
||||
};
|
||||
|
||||
|
||||
@@ -405,7 +440,7 @@ Arguments:
|
||||
|
||||
Returns: zero or positive => a data character
|
||||
negative => a special escape sequence
|
||||
on error, errorptr is set
|
||||
on error, errorcodeptr is set
|
||||
*/
|
||||
|
||||
static int
|
||||
@@ -490,10 +525,16 @@ else
|
||||
while ((digitab[ptr[1]] & ctype_digit) != 0)
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
*errorcodeptr = ERR61;
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == 0 || (braced && *(++ptr) != '}'))
|
||||
{
|
||||
*errorcodeptr = ERR57;
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (negated)
|
||||
@@ -501,7 +542,7 @@ else
|
||||
if (c > bracount)
|
||||
{
|
||||
*errorcodeptr = ERR15;
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
c = bracount - (c - 1);
|
||||
}
|
||||
@@ -530,6 +571,11 @@ else
|
||||
c -= '0';
|
||||
while ((digitab[ptr[1]] & ctype_digit) != 0)
|
||||
c = c * 10 + *(++ptr) - '0';
|
||||
if (c < 0)
|
||||
{
|
||||
*errorcodeptr = ERR61;
|
||||
break;
|
||||
}
|
||||
if (c < 10 || c <= bracount)
|
||||
{
|
||||
c = -(ESC_REF + c);
|
||||
@@ -625,7 +671,7 @@ else
|
||||
if (c == 0)
|
||||
{
|
||||
*errorcodeptr = ERR2;
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifndef EBCDIC /* ASCII coding */
|
||||
@@ -701,7 +747,7 @@ if (c == '{')
|
||||
*negptr = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
for (i = 0; i < sizeof(name) - 1; i++)
|
||||
for (i = 0; i < (int)sizeof(name) - 1; i++)
|
||||
{
|
||||
c = *(++ptr);
|
||||
if (c == 0) goto ERROR_RETURN;
|
||||
@@ -904,6 +950,7 @@ for (; *ptr != 0; ptr++)
|
||||
{
|
||||
while (*(++ptr) != ']')
|
||||
{
|
||||
if (*ptr == 0) return -1;
|
||||
if (*ptr == '\\')
|
||||
{
|
||||
if (*(++ptr) == 0) return -1;
|
||||
@@ -931,7 +978,7 @@ for (; *ptr != 0; ptr++)
|
||||
/* An opening parens must now be a real metacharacter */
|
||||
|
||||
if (*ptr != '(') continue;
|
||||
if (ptr[1] != '?')
|
||||
if (ptr[1] != '?' && ptr[1] != '*')
|
||||
{
|
||||
count++;
|
||||
if (name == NULL && count == lorn) return count;
|
||||
@@ -1059,7 +1106,6 @@ for (;;)
|
||||
{
|
||||
int d;
|
||||
register int op = *cc;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OP_CBRA:
|
||||
@@ -1148,6 +1194,7 @@ for (;;)
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
branchlength += GET2(cc,1);
|
||||
if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
|
||||
cc += 4;
|
||||
break;
|
||||
|
||||
@@ -1256,13 +1303,42 @@ for (;;)
|
||||
code += _pcre_OP_lengths[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += _pcre_OP_lengths[c];
|
||||
|
||||
/* In UTF-8 mode, opcodes that are followed by a character may be followed by
|
||||
a multi-byte character. The length in the table is a minimum, so we have to
|
||||
arrange to skip the extra bytes. */
|
||||
|
||||
else
|
||||
{
|
||||
code += _pcre_OP_lengths[c];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
@@ -1320,14 +1396,42 @@ for (;;)
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
|
||||
/* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes
|
||||
that are followed by a character may be followed by a multi-byte character.
|
||||
The length in the table is a minimum, so we have to arrange to skip the extra
|
||||
bytes. */
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEPOSUPTO:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += _pcre_OP_lengths[c];
|
||||
|
||||
/* In UTF-8 mode, opcodes that are followed by a character may be followed
|
||||
by a multi-byte character. The length in the table is a minimum, so we have
|
||||
to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) switch(c)
|
||||
{
|
||||
@@ -1399,7 +1503,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
/* For other groups, scan the branches. */
|
||||
|
||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
|
||||
if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
|
||||
{
|
||||
BOOL empty_branch;
|
||||
if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
|
||||
@@ -1423,11 +1527,15 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
|
||||
switch (c)
|
||||
{
|
||||
/* Check for quantifiers after a class */
|
||||
/* Check for quantifiers after a class. XCLASS is used for classes that
|
||||
cannot be represented just by a bit map. This includes negated single
|
||||
high-valued characters. The length in _pcre_OP_lengths[] is zero; the
|
||||
actual length is stored in the compiled code, so we must update "code"
|
||||
here. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
case OP_XCLASS:
|
||||
ccode = code + GET(code, 1);
|
||||
ccode = code += GET(code, 1);
|
||||
goto CHECK_CLASS_REPEAT;
|
||||
#endif
|
||||
|
||||
@@ -1489,6 +1597,26 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
case OP_TYPEEXACT:
|
||||
return FALSE;
|
||||
|
||||
/* These are going to continue, as they may be empty, but we have to
|
||||
fudge the length for the \p and \P cases. */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
/* Same for these */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
/* End of branch */
|
||||
|
||||
case OP_KET:
|
||||
@@ -1651,6 +1779,7 @@ adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
|
||||
uschar *save_hwm)
|
||||
{
|
||||
uschar *ptr = group;
|
||||
|
||||
while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
|
||||
{
|
||||
int offset;
|
||||
@@ -2255,6 +2384,15 @@ for (;; ptr++)
|
||||
*/
|
||||
|
||||
if (code < last_code) code = last_code;
|
||||
|
||||
/* Paranoid check for integer overflow */
|
||||
|
||||
if (OFLOW_MAX - *lengthptr < code - last_code)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
*lengthptr += code - last_code;
|
||||
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
|
||||
|
||||
@@ -2367,6 +2505,11 @@ for (;; ptr++)
|
||||
*ptrptr = ptr;
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (OFLOW_MAX - *lengthptr < code - last_code)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
goto FAILED;
|
||||
}
|
||||
*lengthptr += code - last_code; /* To include callout length */
|
||||
DPRINTF((">> end branch\n"));
|
||||
}
|
||||
@@ -2429,16 +2572,23 @@ for (;; ptr++)
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* If the first character is '^', set the negation flag and skip it. */
|
||||
/* If the first character is '^', set the negation flag and skip it. Also,
|
||||
if the first few characters (either before or after ^) are \Q\E or \E we
|
||||
skip them too. This makes for compatibility with Perl. */
|
||||
|
||||
if ((c = *(++ptr)) == '^')
|
||||
negate_class = FALSE;
|
||||
for (;;)
|
||||
{
|
||||
negate_class = TRUE;
|
||||
c = *(++ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
negate_class = FALSE;
|
||||
if (c == '\\')
|
||||
{
|
||||
if (ptr[1] == 'E') ptr++;
|
||||
else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
|
||||
else break;
|
||||
}
|
||||
else if (!negate_class && c == '^')
|
||||
negate_class = TRUE;
|
||||
else break;
|
||||
}
|
||||
|
||||
/* Keep a count of chars with values < 256 so that we can optimize the case
|
||||
@@ -2579,7 +2729,7 @@ for (;; ptr++)
|
||||
of the specials, which just set a flag. The sequence \b is a special
|
||||
case. Inside a class (and only there) it is treated as backspace.
|
||||
Elsewhere it marks a word boundary. Other escapes have preset maps ready
|
||||
to or into the one we are building. We assume they have more than one
|
||||
to 'or' into the one we are building. We assume they have more than one
|
||||
character in them, so set class_charcount bigger than one. */
|
||||
|
||||
if (c == '\\')
|
||||
@@ -2599,6 +2749,7 @@ for (;; ptr++)
|
||||
else inescq = TRUE;
|
||||
continue;
|
||||
}
|
||||
else if (-c == ESC_E) continue; /* Ignore orphan \E */
|
||||
|
||||
if (c < 0)
|
||||
{
|
||||
@@ -3045,12 +3196,26 @@ for (;; ptr++)
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* Remember whether \r or \n are in this class */
|
||||
|
||||
if (negate_class)
|
||||
{
|
||||
if ((classbits[1] & 0x24) != 0x24) cd->external_options |= PCRE_HASCRORLF;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((classbits[1] & 0x24) != 0) cd->external_options |= PCRE_HASCRORLF;
|
||||
}
|
||||
|
||||
/* If class_charcount is 1, we saw precisely one character whose value is
|
||||
less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we
|
||||
can optimize the negative case only if there were no characters >= 128
|
||||
because OP_NOT and the related opcodes like OP_NOTSTAR operate on
|
||||
single-bytes only. This is an historical hangover. Maybe one day we can
|
||||
tidy these opcodes to handle multi-byte characters.
|
||||
less than 256. As long as there were no characters >= 128 and there was no
|
||||
use of \p or \P, in other words, no use of any XCLASS features, we can
|
||||
optimize.
|
||||
|
||||
In UTF-8 mode, we can optimize the negative case only if there were no
|
||||
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
|
||||
operate on single-bytes only. This is an historical hangover. Maybe one day
|
||||
we can tidy these opcodes to handle multi-byte characters.
|
||||
|
||||
The optimization throws away the bit map. We turn the item into a
|
||||
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note
|
||||
@@ -3060,10 +3225,8 @@ for (;; ptr++)
|
||||
reqbyte, save the previous value for reinstating. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (class_charcount == 1 &&
|
||||
(!utf8 ||
|
||||
(!class_utf8 && (!negate_class || class_lastchar < 128))))
|
||||
|
||||
if (class_charcount == 1 && !class_utf8 &&
|
||||
(!utf8 || !negate_class || class_lastchar < 128))
|
||||
#else
|
||||
if (class_charcount == 1)
|
||||
#endif
|
||||
@@ -3521,14 +3684,6 @@ for (;; ptr++)
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* This is a paranoid check to stop integer overflow later on */
|
||||
|
||||
if (len > MAX_DUPLENGTH)
|
||||
{
|
||||
*errorcodeptr = ERR50;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* If the maximum repeat count is unlimited, find the end of the bracket
|
||||
by scanning through from the start, and compute the offset back to it
|
||||
from the current code pointer. There may be an OP_OPT setting following
|
||||
@@ -3617,10 +3772,21 @@ for (;; ptr++)
|
||||
if (repeat_min > 1)
|
||||
{
|
||||
/* In the pre-compile phase, we don't actually do the replication. We
|
||||
just adjust the length as if we had. */
|
||||
just adjust the length as if we had. Do some paranoid checks for
|
||||
potential integer overflow. */
|
||||
|
||||
if (lengthptr != NULL)
|
||||
*lengthptr += (repeat_min - 1)*length_prevgroup;
|
||||
{
|
||||
int delta = (repeat_min - 1)*length_prevgroup;
|
||||
if ((double)(repeat_min - 1)*(double)length_prevgroup >
|
||||
(double)INT_MAX ||
|
||||
OFLOW_MAX - *lengthptr < delta)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
goto FAILED;
|
||||
}
|
||||
*lengthptr += delta;
|
||||
}
|
||||
|
||||
/* This is compiling for real */
|
||||
|
||||
@@ -3658,11 +3824,23 @@ for (;; ptr++)
|
||||
/* In the pre-compile phase, we don't actually do the replication. We
|
||||
just adjust the length as if we had. For each repetition we must add 1
|
||||
to the length for BRAZERO and for all but the last repetition we must
|
||||
add 2 + 2*LINKSIZE to allow for the nesting that occurs. */
|
||||
add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
|
||||
paranoid checks to avoid integer overflow. */
|
||||
|
||||
if (lengthptr != NULL && repeat_max > 0)
|
||||
*lengthptr += repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
|
||||
2 - 2*LINK_SIZE; /* Last one doesn't nest */
|
||||
{
|
||||
int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
|
||||
2 - 2*LINK_SIZE; /* Last one doesn't nest */
|
||||
if ((double)repeat_max *
|
||||
(double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
|
||||
> (double)INT_MAX ||
|
||||
OFLOW_MAX - *lengthptr < delta)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
goto FAILED;
|
||||
}
|
||||
*lengthptr += delta;
|
||||
}
|
||||
|
||||
/* This is compiling for real */
|
||||
|
||||
@@ -3814,9 +3992,7 @@ for (;; ptr++)
|
||||
/* ===================================================================*/
|
||||
/* Start of nested parenthesized sub-expression, or comment or lookahead or
|
||||
lookbehind or option setting or condition or all the other extended
|
||||
parenthesis forms. First deal with the specials; all are introduced by ?,
|
||||
and the appearance of any of them means that this is not a capturing
|
||||
group. */
|
||||
parenthesis forms. */
|
||||
|
||||
case '(':
|
||||
newoptions = options;
|
||||
@@ -3825,7 +4001,44 @@ for (;; ptr++)
|
||||
save_hwm = cd->hwm;
|
||||
reset_bracount = FALSE;
|
||||
|
||||
if (*(++ptr) == '?')
|
||||
/* First deal with various "verbs" that can be introduced by '*'. */
|
||||
|
||||
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
|
||||
{
|
||||
int i, namelen;
|
||||
const uschar *name = ++ptr;
|
||||
previous = NULL;
|
||||
while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
|
||||
if (*ptr == ':')
|
||||
{
|
||||
*errorcodeptr = ERR59; /* Not supported */
|
||||
goto FAILED;
|
||||
}
|
||||
if (*ptr != ')')
|
||||
{
|
||||
*errorcodeptr = ERR60;
|
||||
goto FAILED;
|
||||
}
|
||||
namelen = ptr - name;
|
||||
for (i = 0; i < verbcount; i++)
|
||||
{
|
||||
if (namelen == verbs[i].len &&
|
||||
strncmp((char *)name, verbs[i].name, namelen) == 0)
|
||||
{
|
||||
*code = verbs[i].op;
|
||||
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < verbcount) continue;
|
||||
*errorcodeptr = ERR60;
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* Deal with the extended parentheses; all are introduced by '?', and the
|
||||
appearance of any of them means that this is not a capturing group. */
|
||||
|
||||
else if (*ptr == '?')
|
||||
{
|
||||
int i, set, unset, namelen;
|
||||
int *optset;
|
||||
@@ -4067,8 +4280,14 @@ for (;; ptr++)
|
||||
|
||||
/* ------------------------------------------------------------ */
|
||||
case '!': /* Negative lookahead */
|
||||
bravalue = OP_ASSERT_NOT;
|
||||
ptr++;
|
||||
if (*ptr == ')') /* Optimize (?!) */
|
||||
{
|
||||
*code++ = OP_FAIL;
|
||||
previous = NULL;
|
||||
continue;
|
||||
}
|
||||
bravalue = OP_ASSERT_NOT;
|
||||
break;
|
||||
|
||||
|
||||
@@ -4617,23 +4836,29 @@ for (;; ptr++)
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
/* In the pre-compile phase, update the length by the length of the nested
|
||||
group, less the brackets at either end. Then reduce the compiled code to
|
||||
just the brackets so that it doesn't use much memory if it is duplicated by
|
||||
a quantifier. */
|
||||
/* In the pre-compile phase, update the length by the length of the group,
|
||||
less the brackets at either end. Then reduce the compiled code to just a
|
||||
set of non-capturing brackets so that it doesn't use much memory if it is
|
||||
duplicated by a quantifier.*/
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
goto FAILED;
|
||||
}
|
||||
*lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
|
||||
code++;
|
||||
*code++ = OP_BRA;
|
||||
PUTINC(code, 0, 1 + LINK_SIZE);
|
||||
*code++ = OP_KET;
|
||||
PUTINC(code, 0, 1 + LINK_SIZE);
|
||||
break; /* No need to waste time with special character handling */
|
||||
}
|
||||
|
||||
/* Otherwise update the main code pointer to the end of the group. */
|
||||
|
||||
else code = tempcode;
|
||||
code = tempcode;
|
||||
|
||||
/* For a DEFINE group, required and first character settings are not
|
||||
relevant. */
|
||||
@@ -4837,6 +5062,11 @@ for (;; ptr++)
|
||||
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
|
||||
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
|
||||
|
||||
/* Remember if \r or \n were seen */
|
||||
|
||||
if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
|
||||
cd->external_options |= PCRE_HASCRORLF;
|
||||
|
||||
/* Set the first and required bytes appropriately. If no previous first
|
||||
byte, set it from this character, but revert to none on a zero repeat.
|
||||
Otherwise, leave the firstbyte value alone, and don't change it on a zero
|
||||
@@ -5119,7 +5349,15 @@ for (;;)
|
||||
*ptrptr = ptr;
|
||||
*firstbyteptr = firstbyte;
|
||||
*reqbyteptr = reqbyte;
|
||||
if (lengthptr != NULL) *lengthptr += length;
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
if (OFLOW_MAX - *lengthptr < length)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
return FALSE;
|
||||
}
|
||||
*lengthptr += length;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@@ -5428,6 +5666,7 @@ real_pcre *re;
|
||||
int length = 1; /* For final END opcode */
|
||||
int firstbyte, reqbyte, newline;
|
||||
int errorcode = 0;
|
||||
int skipatstart = 0;
|
||||
#ifdef SUPPORT_UTF8
|
||||
BOOL utf8;
|
||||
#endif
|
||||
@@ -5506,13 +5745,33 @@ cd->fcc = tables + fcc_offset;
|
||||
cd->cbits = tables + cbits_offset;
|
||||
cd->ctypes = tables + ctypes_offset;
|
||||
|
||||
/* Check for newline settings at the start of the pattern, and remember the
|
||||
offset for later. */
|
||||
|
||||
if (ptr[0] == '(' && ptr[1] == '*')
|
||||
{
|
||||
int newnl = 0;
|
||||
if (strncmp((char *)(ptr+2), "CR)", 3) == 0)
|
||||
{ skipatstart = 5; newnl = PCRE_NEWLINE_CR; }
|
||||
else if (strncmp((char *)(ptr+2), "LF)", 3) == 0)
|
||||
{ skipatstart = 5; newnl = PCRE_NEWLINE_LF; }
|
||||
else if (strncmp((char *)(ptr+2), "CRLF)", 5) == 0)
|
||||
{ skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
|
||||
else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0)
|
||||
{ skipatstart = 6; newnl = PCRE_NEWLINE_ANY; }
|
||||
else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8) == 0)
|
||||
{ skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; }
|
||||
if (skipatstart > 0)
|
||||
options = (options & ~PCRE_NEWLINE_BITS) | newnl;
|
||||
}
|
||||
|
||||
/* Handle different types of newline. The three bits give seven cases. The
|
||||
current code allows for fixed one- or two-byte sequences, plus "any" and
|
||||
"anycrlf". */
|
||||
|
||||
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
|
||||
switch (options & PCRE_NEWLINE_BITS)
|
||||
{
|
||||
case 0: newline = NEWLINE; break; /* Compile-time default */
|
||||
case 0: newline = NEWLINE; break; /* Build-time default */
|
||||
case PCRE_NEWLINE_CR: newline = '\r'; break;
|
||||
case PCRE_NEWLINE_LF: newline = '\n'; break;
|
||||
case PCRE_NEWLINE_CR+
|
||||
@@ -5584,6 +5843,7 @@ been put into the cd block so that they can be changed if an option setting is
|
||||
found within the regex right at the beginning. Bringing initial option settings
|
||||
outside can help speed up starting point checks. */
|
||||
|
||||
ptr += skipatstart;
|
||||
code = cworkspace;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
|
||||
@@ -5647,12 +5907,13 @@ cd->start_code = codestart;
|
||||
cd->hwm = cworkspace;
|
||||
cd->req_varyopt = 0;
|
||||
cd->nopartial = FALSE;
|
||||
cd->had_accept = FALSE;
|
||||
|
||||
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
||||
error, errorcode will be set non-zero, so we don't need to look at the result
|
||||
of the function here. */
|
||||
|
||||
ptr = (const uschar *)pattern;
|
||||
ptr = (const uschar *)pattern + skipatstart;
|
||||
code = (uschar *)codestart;
|
||||
*code = OP_BRA;
|
||||
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
|
||||
@@ -5661,6 +5922,7 @@ re->top_bracket = cd->bracount;
|
||||
re->top_backref = cd->top_backref;
|
||||
|
||||
if (cd->nopartial) re->options |= PCRE_NOPARTIAL;
|
||||
if (cd->had_accept) reqbyte = -1; /* Must disable after (*ACCEPT) */
|
||||
|
||||
/* If not reached end of pattern on success, there's an excess bracket. */
|
||||
|
||||
@@ -5759,19 +6021,7 @@ case when building a production library. */
|
||||
printf("Length = %d top_bracket = %d top_backref = %d\n",
|
||||
length, re->top_bracket, re->top_backref);
|
||||
|
||||
if (re->options != 0)
|
||||
{
|
||||
printf("%s%s%s%s%s%s%s%s%s\n",
|
||||
((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",
|
||||
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
|
||||
((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
|
||||
((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
|
||||
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
|
||||
((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
|
||||
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
|
||||
((re->options & PCRE_EXTRA) != 0)? "extra " : "",
|
||||
((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
|
||||
}
|
||||
printf("Options=%08x\n", re->options);
|
||||
|
||||
if ((re->options & PCRE_FIRSTSET) != 0)
|
||||
{
|
||||
|
||||
@@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
+180
-102
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
|
||||
possible. There are also some static supporting functions. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#define NLBLOCK md /* Block containing newline information */
|
||||
#define PSSTART start_subject /* Field containing processed string start */
|
||||
#define PSEND end_subject /* Field containing processed string end */
|
||||
@@ -53,16 +57,10 @@ possible. There are also some static supporting functions. */
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
/* The chain of eptrblocks for tail recursions uses memory in stack workspace,
|
||||
obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
|
||||
|
||||
#define EPTR_WORK_SIZE (1000)
|
||||
|
||||
/* Flag bits for the match() function */
|
||||
|
||||
#define match_condassert 0x01 /* Called to check a condition assertion */
|
||||
#define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
|
||||
#define match_tail_recursed 0x04 /* Tail recursive call */
|
||||
|
||||
/* Non-error returns from the match() function. Error returns are externally
|
||||
defined PCRE_ERROR_xxx codes, which are all negative. */
|
||||
@@ -70,6 +68,14 @@ defined PCRE_ERROR_xxx codes, which are all negative. */
|
||||
#define MATCH_MATCH 1
|
||||
#define MATCH_NOMATCH 0
|
||||
|
||||
/* Special internal returns from the match() function. Make them sufficiently
|
||||
negative to avoid the external error codes. */
|
||||
|
||||
#define MATCH_COMMIT (-999)
|
||||
#define MATCH_PRUNE (-998)
|
||||
#define MATCH_SKIP (-997)
|
||||
#define MATCH_THEN (-996)
|
||||
|
||||
/* Maximum number of ints of offset to save on the stack for recursive calls.
|
||||
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
|
||||
because the offset vector is always a multiple of 3 long. */
|
||||
@@ -205,15 +211,15 @@ variable instead of being passed in the frame.
|
||||
****************************************************************************
|
||||
***************************************************************************/
|
||||
|
||||
|
||||
/* Numbers for RMATCH calls */
|
||||
/* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
|
||||
below must be updated in sync. */
|
||||
|
||||
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
||||
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
|
||||
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
|
||||
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
|
||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47 };
|
||||
|
||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
|
||||
RM51, RM52, RM53, RM54 };
|
||||
|
||||
/* These versions of the macros use the stack, as normal. There are debugging
|
||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||
@@ -384,7 +390,6 @@ Arguments:
|
||||
match_condassert - this is an assertion condition
|
||||
match_cbegroup - this is the start of an unlimited repeat
|
||||
group that can match an empty string
|
||||
match_tail_recursed - this is a tail_recursed group
|
||||
rdepth the recursion depth
|
||||
|
||||
Returns: MATCH_MATCH if matched ) these values are >= 0
|
||||
@@ -586,22 +591,16 @@ original_ims = ims; /* Save for resetting on ')' */
|
||||
string, the match_cbegroup flag is set. When this is the case, add the current
|
||||
subject pointer to the chain of such remembered pointers, to be checked when we
|
||||
hit the closing ket, in order to break infinite loops that match no characters.
|
||||
When match() is called in other circumstances, don't add to the chain. If this
|
||||
is a tail recursion, use a block from the workspace, as the one on the stack is
|
||||
already used. */
|
||||
When match() is called in other circumstances, don't add to the chain. The
|
||||
match_cbegroup flag must NOT be used with tail recursion, because the memory
|
||||
block that is used is on the stack, so a new one may be required for each
|
||||
match(). */
|
||||
|
||||
if ((flags & match_cbegroup) != 0)
|
||||
{
|
||||
eptrblock *p;
|
||||
if ((flags & match_tail_recursed) != 0)
|
||||
{
|
||||
if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
|
||||
p = md->eptrchain + md->eptrn++;
|
||||
}
|
||||
else p = &newptrb;
|
||||
p->epb_saved_eptr = eptr;
|
||||
p->epb_prev = eptrb;
|
||||
eptrb = p;
|
||||
newptrb.epb_saved_eptr = eptr;
|
||||
newptrb.epb_prev = eptrb;
|
||||
eptrb = &newptrb;
|
||||
}
|
||||
|
||||
/* Now start processing the opcodes. */
|
||||
@@ -621,6 +620,34 @@ for (;;)
|
||||
|
||||
switch(op)
|
||||
{
|
||||
case OP_FAIL:
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
case OP_PRUNE:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM51);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_PRUNE);
|
||||
|
||||
case OP_COMMIT:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM52);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_COMMIT);
|
||||
|
||||
case OP_SKIP:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM53);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
md->start_match_ptr = eptr; /* Pass back current position */
|
||||
RRETURN(MATCH_SKIP);
|
||||
|
||||
case OP_THEN:
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM54);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
RRETURN(MATCH_THEN);
|
||||
|
||||
/* Handle a capturing bracket. If there is space in the offset vector, save
|
||||
the current subject position in the working slot at the top of the vector.
|
||||
We mustn't change the current values of the data slot, because they may be
|
||||
@@ -662,7 +689,7 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
|
||||
ims, eptrb, flags, RM1);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
md->capture_last = save_capture_last;
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
@@ -677,15 +704,22 @@ for (;;)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
||||
/* Insufficient room for saving captured contents. Treat as a non-capturing
|
||||
bracket. */
|
||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
||||
as a non-capturing bracket. */
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
|
||||
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
|
||||
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
|
||||
/* Non-capturing bracket. Loop for all the alternatives. When we get to the
|
||||
final alternative within the brackets, we would return the result of a
|
||||
recursive call to match() whatever happened. We can reduce stack usage by
|
||||
turning this into a tail recursion. */
|
||||
turning this into a tail recursion, except in the case when match_cbegroup
|
||||
is set.*/
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
@@ -693,12 +727,20 @@ for (;;)
|
||||
flags = (op >= OP_SBRA)? match_cbegroup : 0;
|
||||
for (;;)
|
||||
{
|
||||
if (ecode[GET(ecode, 1)] != OP_ALT)
|
||||
if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
|
||||
{
|
||||
ecode += _pcre_OP_lengths[*ecode];
|
||||
flags |= match_tail_recursed;
|
||||
DPRINTF(("bracket 0 tail recursion\n"));
|
||||
goto TAIL_RECURSE;
|
||||
if (flags == 0) /* Not a possibly empty group */
|
||||
{
|
||||
ecode += _pcre_OP_lengths[*ecode];
|
||||
DPRINTF(("bracket 0 tail recursion\n"));
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
|
||||
/* Possibly empty group; can't use tail recursion. */
|
||||
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
|
||||
eptrb, flags, RM48);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
|
||||
/* For non-final alternatives, continue the loop for a NOMATCH result;
|
||||
@@ -706,7 +748,7 @@ for (;;)
|
||||
|
||||
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
|
||||
eptrb, flags, RM2);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
/* Control never reaches here. */
|
||||
@@ -754,7 +796,7 @@ for (;;)
|
||||
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
|
||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||
}
|
||||
else if (rrc != MATCH_NOMATCH)
|
||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||
{
|
||||
RRETURN(rrc); /* Need braces because of following else */
|
||||
}
|
||||
@@ -766,25 +808,36 @@ for (;;)
|
||||
}
|
||||
|
||||
/* We are now at the branch that is to be obeyed. As there is only one,
|
||||
we can use tail recursion to avoid using another stack frame. If the second
|
||||
alternative doesn't exist, we can just plough on. */
|
||||
we can use tail recursion to avoid using another stack frame, except when
|
||||
match_cbegroup is required for an unlimited repeat of a possibly empty
|
||||
group. If the second alternative doesn't exist, we can just plough on. */
|
||||
|
||||
if (condition || *ecode == OP_ALT)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
|
||||
goto TAIL_RECURSE;
|
||||
if (op == OP_SCOND) /* Possibly empty group */
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
else /* Group must match something */
|
||||
{
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
else /* Condition false & no 2nd alternative */
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
/* End of the pattern. If we are in a top-level recursion, we should
|
||||
restore the offsets appropriately and continue from after the call. */
|
||||
/* End of the pattern, either real or forced. If we are in a top-level
|
||||
recursion, we should restore the offsets appropriately and continue from
|
||||
after the call. */
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_END:
|
||||
if (md->recursive != NULL && md->recursive->group_num == 0)
|
||||
{
|
||||
@@ -805,7 +858,7 @@ for (;;)
|
||||
if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
|
||||
md->end_match_ptr = eptr; /* Record where we ended */
|
||||
md->end_offset_top = offset_top; /* and how many extracts were taken */
|
||||
md->start_match_ptr = mstart; /* and the start (\K can modify) */
|
||||
md->start_match_ptr = mstart; /* and the start (\K can modify) */
|
||||
RRETURN(MATCH_MATCH);
|
||||
|
||||
/* Change option settings */
|
||||
@@ -829,7 +882,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
|
||||
RM4);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@@ -856,7 +909,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
|
||||
RM5);
|
||||
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@@ -880,7 +933,7 @@ for (;;)
|
||||
{
|
||||
eptr--;
|
||||
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
|
||||
BACKCHAR(eptr)
|
||||
BACKCHAR(eptr);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -993,7 +1046,7 @@ for (;;)
|
||||
(pcre_free)(new_recursive.offset_save);
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
else if (rrc != MATCH_NOMATCH)
|
||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||
{
|
||||
DPRINTF(("Recursion gave error %d\n", rrc));
|
||||
RRETURN(rrc);
|
||||
@@ -1027,10 +1080,9 @@ for (;;)
|
||||
|
||||
do
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
|
||||
eptrb, 0, RM7);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
|
||||
if (rrc == MATCH_MATCH) break;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
ecode += GET(ecode,1);
|
||||
}
|
||||
while (*ecode == OP_ALT);
|
||||
@@ -1073,11 +1125,10 @@ for (;;)
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
|
||||
RM8);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode = prev;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
@@ -1085,7 +1136,7 @@ for (;;)
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
@@ -1216,17 +1267,21 @@ for (;;)
|
||||
|
||||
/* The repeating kets try the rest of the pattern or restart from the
|
||||
preceding bracket, in the appropriate order. In the second case, we can use
|
||||
tail recursion to avoid using another stack frame. */
|
||||
tail recursion to avoid using another stack frame, unless we have an
|
||||
unlimited repeat of a group that can match an empty string. */
|
||||
|
||||
flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
|
||||
|
||||
if (*ecode == OP_KETRMIN)
|
||||
{
|
||||
RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
|
||||
RM12);
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (flags != 0) /* Could match an empty string */
|
||||
{
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
ecode = prev;
|
||||
flags |= match_tail_recursed;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
else /* OP_KETRMAX */
|
||||
@@ -1234,7 +1289,7 @@ for (;;)
|
||||
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
ecode += 1 + LINK_SIZE;
|
||||
flags = match_tail_recursed;
|
||||
flags = 0;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
/* Control never gets here */
|
||||
@@ -2033,7 +2088,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||
BACKCHAR(eptr)
|
||||
if (utf8) BACKCHAR(eptr);
|
||||
}
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -3038,9 +3093,9 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
|
||||
(*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -3058,9 +3113,9 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject ||
|
||||
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
|
||||
(*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -3702,7 +3757,7 @@ for (;;)
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr-- == pp) break; /* Stop if tried at original pos */
|
||||
BACKCHAR(eptr);
|
||||
if (utf8) BACKCHAR(eptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3741,9 +3796,9 @@ for (;;)
|
||||
for (;;) /* Move back over one extended */
|
||||
{
|
||||
int len = 1;
|
||||
BACKCHAR(eptr);
|
||||
if (!utf8) c = *eptr; else
|
||||
{
|
||||
BACKCHAR(eptr);
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
@@ -3764,11 +3819,6 @@ for (;;)
|
||||
switch(ctype)
|
||||
{
|
||||
case OP_ANY:
|
||||
|
||||
/* Special code is required for UTF8, but when the maximum is
|
||||
unlimited we don't need it, so we repeat the non-UTF8 code. This is
|
||||
probably worth it, because .* is quite a common idiom. */
|
||||
|
||||
if (max < INT_MAX)
|
||||
{
|
||||
if ((ims & PCRE_DOTALL) == 0)
|
||||
@@ -3801,15 +3851,12 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
|
||||
eptr++;
|
||||
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = max - min;
|
||||
if (c > (unsigned int)(md->end_subject - eptr))
|
||||
c = md->end_subject - eptr;
|
||||
eptr += c;
|
||||
eptr = md->end_subject;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -3990,7 +4037,7 @@ for (;;)
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* Not UTF-8 mode */
|
||||
{
|
||||
@@ -4181,7 +4228,8 @@ switch (frame->Xwhere)
|
||||
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
|
||||
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
|
||||
LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
|
||||
LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
|
||||
LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)
|
||||
LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)
|
||||
default:
|
||||
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
|
||||
return PCRE_ERROR_INTERNAL;
|
||||
@@ -4298,7 +4346,6 @@ const uschar *start_bits = NULL;
|
||||
USPTR start_match = (USPTR)subject + start_offset;
|
||||
USPTR end_subject;
|
||||
USPTR req_byte_ptr = start_match - 1;
|
||||
eptrblock eptrchain[EPTR_WORK_SIZE];
|
||||
|
||||
pcre_study_data internal_study;
|
||||
const pcre_study_data *study;
|
||||
@@ -4384,7 +4431,6 @@ md->partial = (options & PCRE_PARTIAL) != 0;
|
||||
md->hitend = FALSE;
|
||||
|
||||
md->recursive = NULL; /* No recursion at top level */
|
||||
md->eptrchain = eptrchain; /* Make workspace generally available */
|
||||
|
||||
md->lcc = tables + lcc_offset;
|
||||
md->ctypes = tables + ctypes_offset;
|
||||
@@ -4540,6 +4586,7 @@ the loop runs just once. */
|
||||
for(;;)
|
||||
{
|
||||
USPTR save_end_subject = end_subject;
|
||||
USPTR new_start_match;
|
||||
|
||||
/* Reset the maximum number of extractions we might see. */
|
||||
|
||||
@@ -4680,15 +4727,48 @@ for(;;)
|
||||
|
||||
/* OK, we can now run the match. */
|
||||
|
||||
md->start_match_ptr = start_match; /* Insurance */
|
||||
md->start_match_ptr = start_match;
|
||||
md->match_call_count = 0;
|
||||
md->eptrn = 0; /* Next free eptrchain slot */
|
||||
rc = match(start_match, md->start_code, start_match, 2, md,
|
||||
ims, NULL, 0, 0);
|
||||
rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
|
||||
|
||||
/* Any return other than MATCH_NOMATCH breaks the loop. */
|
||||
switch(rc)
|
||||
{
|
||||
/* NOMATCH and PRUNE advance by one character. THEN at this level acts
|
||||
exactly like PRUNE. */
|
||||
|
||||
if (rc != MATCH_NOMATCH) break;
|
||||
case MATCH_NOMATCH:
|
||||
case MATCH_PRUNE:
|
||||
case MATCH_THEN:
|
||||
new_start_match = start_match + 1;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
|
||||
new_start_match++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* SKIP passes back the next starting point explicitly. */
|
||||
|
||||
case MATCH_SKIP:
|
||||
new_start_match = md->start_match_ptr;
|
||||
break;
|
||||
|
||||
/* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
|
||||
|
||||
case MATCH_COMMIT:
|
||||
rc = MATCH_NOMATCH;
|
||||
goto ENDLOOP;
|
||||
|
||||
/* Any other return is some kind of error. */
|
||||
|
||||
default:
|
||||
goto ENDLOOP;
|
||||
}
|
||||
|
||||
/* Control reaches here for the various types of "no match at this point"
|
||||
result. Reset the code to MATCH_NOMATCH for subsequent checking. */
|
||||
|
||||
rc = MATCH_NOMATCH;
|
||||
|
||||
/* If PCRE_FIRSTLINE is set, the match must happen before or at the first
|
||||
newline in the subject (though it may continue over the newline). Therefore,
|
||||
@@ -4696,30 +4776,26 @@ for(;;)
|
||||
|
||||
if (firstline && IS_NEWLINE(start_match)) break;
|
||||
|
||||
/* Advance the match position by one character. */
|
||||
/* Advance to new matching position */
|
||||
|
||||
start_match++;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
|
||||
start_match++;
|
||||
#endif
|
||||
start_match = new_start_match;
|
||||
|
||||
/* Break the loop if the pattern is anchored or if we have passed the end of
|
||||
the subject. */
|
||||
|
||||
if (anchored || start_match > end_subject) break;
|
||||
|
||||
/* If we have just passed a CR and the newline option is CRLF or ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
character. */
|
||||
/* If we have just passed a CR and we are now at a LF, and the pattern does
|
||||
not contain any explicit matches for \r or \n, and the newline option is CRLF
|
||||
or ANY or ANYCRLF, advance the match position by one more character. */
|
||||
|
||||
if (start_match[-1] == '\r' &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2) &&
|
||||
start_match < end_subject &&
|
||||
*start_match == '\n')
|
||||
start_match < end_subject &&
|
||||
*start_match == '\n' &&
|
||||
(re->options & PCRE_HASCRORLF) == 0 &&
|
||||
(md->nltype == NLTYPE_ANY ||
|
||||
md->nltype == NLTYPE_ANYCRLF ||
|
||||
md->nllen == 2))
|
||||
start_match++;
|
||||
|
||||
} /* End of for(;;) "bumpalong" loop */
|
||||
@@ -4729,7 +4805,7 @@ for(;;)
|
||||
/* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
|
||||
conditions is true:
|
||||
|
||||
(1) The pattern is anchored;
|
||||
(1) The pattern is anchored or the match was failed by (*COMMIT);
|
||||
|
||||
(2) We are past the end of the subject;
|
||||
|
||||
@@ -4744,6 +4820,8 @@ processing, copy those that we can. In this case there need not be overflow if
|
||||
certain parts of the pattern were not used, even though there are more
|
||||
capturing parentheses than vector slots. */
|
||||
|
||||
ENDLOOP:
|
||||
|
||||
if (rc == MATCH_MATCH)
|
||||
{
|
||||
if (using_temporary_offsets)
|
||||
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -148,6 +152,10 @@ switch (what)
|
||||
*((int *)where) = (re->options & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->options & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,6 +43,10 @@ from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -46,6 +46,10 @@ indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
|
||||
@@ -43,6 +43,10 @@ information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -67,10 +67,6 @@ be absolutely sure we get our version. */
|
||||
#endif
|
||||
|
||||
|
||||
/* Get the definitions provided by running "configure" */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* Standard C headers plus the external interface definition. The only time
|
||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
|
||||
@@ -112,7 +108,7 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef _WIN32
|
||||
# ifdef DLL_EXPORT
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCRE_EXP_DEFN __declspec(dllexport)
|
||||
# define PCRE_EXP_DATA_DEFN __declspec(dllexport)
|
||||
@@ -121,7 +117,6 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
# define PCRE_EXP_DEFN
|
||||
# define PCRE_EXP_DATA_DEFN
|
||||
# endif
|
||||
#
|
||||
# else
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
@@ -234,7 +229,7 @@ must begin with PCRE_. */
|
||||
/* Include the public PCRE header and the definitions of UCP character property
|
||||
values. */
|
||||
|
||||
#include <pcre.h>
|
||||
#include "pcre.h"
|
||||
#include "ucp.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
@@ -363,7 +358,9 @@ capturing parenthesis numbers in back references. */
|
||||
|
||||
/* When UTF-8 encoding is being used, a character is no longer just a single
|
||||
byte. The macros for character handling generate simple sequences when used in
|
||||
byte-mode, and more complicated ones for UTF-8 characters. */
|
||||
byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should
|
||||
never be called in byte mode. To make sure it can never even appear when UTF-8
|
||||
support is omitted, we don't even define it. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
@@ -371,7 +368,7 @@ byte-mode, and more complicated ones for UTF-8 characters. */
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define BACKCHAR(eptr)
|
||||
/* #define BACKCHAR(eptr) */
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
@@ -464,9 +461,10 @@ if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
}
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. Called only in UTF-8 mode. */
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
|
||||
|
||||
#endif
|
||||
|
||||
@@ -494,6 +492,7 @@ bits. */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_JCHANGED 0x08000000 /* j option changes within regex */
|
||||
#define PCRE_HASCRORLF 0x04000000 /* explicit \r or \n in pattern */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
@@ -610,14 +609,9 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
|
||||
To keep stored, compiled patterns compatible, new opcodes should be added
|
||||
immediately before OP_BRA, where (since release 7.0) a gap is left for this
|
||||
purpose.
|
||||
|
||||
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
||||
that follow must also be updated to match. There is also a table called
|
||||
"coptable" in pcre_dfa_exec.c that must be updated. */
|
||||
@@ -744,7 +738,7 @@ enum {
|
||||
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 92 Atomic group */
|
||||
OP_BRA, /* 83 Start of non-capturing bracket */
|
||||
OP_BRA, /* 93 Start of non-capturing bracket */
|
||||
OP_CBRA, /* 94 Start of capturing bracket */
|
||||
OP_COND, /* 95 Conditional group */
|
||||
|
||||
@@ -760,7 +754,19 @@ enum {
|
||||
OP_DEF, /* 101 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 102 These two must remain together and in this */
|
||||
OP_BRAMINZERO /* 103 order. */
|
||||
OP_BRAMINZERO, /* 103 order. */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_PRUNE, /* 104 */
|
||||
OP_SKIP, /* 105 */
|
||||
OP_THEN, /* 106 */
|
||||
OP_COMMIT, /* 107 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 108 */
|
||||
OP_ACCEPT /* 109 */
|
||||
};
|
||||
|
||||
|
||||
@@ -783,8 +789,9 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", \
|
||||
"Once", "Bra 0", "Bra", "Cond", "SBra 0", "SBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"
|
||||
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
@@ -848,6 +855,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
3, /* RREF */ \
|
||||
1, /* DEF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
||||
1, 1 /* FAIL, ACCEPT */
|
||||
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
@@ -862,7 +871,8 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 };
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@@ -931,6 +941,7 @@ typedef struct compile_data {
|
||||
int external_options; /* External (initial) options */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
uschar nl[4]; /* Newline string when fixed length */
|
||||
|
||||
@@ -45,7 +45,10 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "pcre_internal.h"
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
# endif
|
||||
# include "pcre_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -47,6 +47,10 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -124,12 +128,16 @@ _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else /* no UTF-8 support */
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
|
||||
@@ -41,6 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ get_ucpname(int ptype, int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
int i;
|
||||
for (i = _pcre_utt_size; i >= 0; i--)
|
||||
for (i = _pcre_utt_size - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
|
||||
}
|
||||
|
||||
@@ -43,6 +43,11 @@ auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -44,6 +44,10 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,10 @@ see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
strings. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
@@ -55,6 +59,13 @@ that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying
|
||||
an invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applies. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
@@ -81,31 +92,48 @@ for (p = string; length-- > 0; p++)
|
||||
register int c = *p;
|
||||
if (c < 128) continue;
|
||||
if (c < 0xc0) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab || ab > 3) return p - string;
|
||||
length -= ab;
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
|
||||
/* Check for overlong sequences for each different length */
|
||||
/* Check for overlong sequences for each different length, and for the
|
||||
excluded range 0xd000 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* Check for xx00 000x */
|
||||
/* Check for xx00 000x (overlong sequence) */
|
||||
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0) return p - string;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx */
|
||||
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if (c == 0xe0 && (*p & 0x20) == 0) return p - string;
|
||||
if ((c == 0xe0 && (*p & 0x20) == 0) ||
|
||||
(c == 0xed && *p >= 0xa0))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx */
|
||||
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
|
||||
greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if (c == 0xf0 && (*p & 0x30) == 0) return p - string;
|
||||
if ((c == 0xf0 && (*p & 0x30) == 0) ||
|
||||
(c > 0xf4 ) ||
|
||||
(c == 0xf4 && *p > 0x8f))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
#if 0
|
||||
/* These cases can no longer occur, as we restrict to a maximum of four
|
||||
bytes nowadays. Leave the code here in case we ever want to add an option
|
||||
for longer sequences. */
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
||||
@@ -116,6 +144,8 @@ for (p = string; length-- > 0; p++)
|
||||
if (c == 0xfe || c == 0xff ||
|
||||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
||||
break;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,10 @@ class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -11,15 +11,12 @@ Compile thuswise:
|
||||
-R/usr/local/lib -lpcre
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE are installed on your system. Only some operating
|
||||
library files for PCRE are installed on your system. You don't need -I and -L
|
||||
if PCRE is installed in the standard system libraries. Only some operating
|
||||
systems (e.g. Solaris) use the -R option.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
|
||||
+35
-11
@@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include <config.h>
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
@@ -50,8 +50,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
# include <unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <pcre.h>
|
||||
@@ -855,7 +856,7 @@ while (ptr < endptr)
|
||||
|
||||
t = end_of_line(t, endptr, &endlinelength);
|
||||
linelength = t - ptr - endlinelength;
|
||||
length = multiline? endptr - ptr : linelength;
|
||||
length = multiline? (size_t)(endptr - ptr) : linelength;
|
||||
|
||||
/* Extra processing for Jeffrey Friedl's debugging. */
|
||||
|
||||
@@ -1063,18 +1064,23 @@ while (ptr < endptr)
|
||||
|
||||
/* In multiline mode, we want to print to the end of the line in which
|
||||
the end of the matched string is found, so we adjust linelength and the
|
||||
line number appropriately. Because the PCRE_FIRSTLINE option is set, the
|
||||
start of the match will always be before the first newline sequence. */
|
||||
line number appropriately, but only when there actually was a match
|
||||
(invert not set). Because the PCRE_FIRSTLINE option is set, the start of
|
||||
the match will always be before the first newline sequence. */
|
||||
|
||||
if (multiline)
|
||||
{
|
||||
int ellength;
|
||||
char *endmatch = ptr + offsets[1];
|
||||
t = ptr;
|
||||
while (t < endmatch)
|
||||
char *endmatch = ptr;
|
||||
if (!invert)
|
||||
{
|
||||
t = end_of_line(t, endptr, &ellength);
|
||||
if (t <= endmatch) linenumber++; else break;
|
||||
endmatch += offsets[1];
|
||||
t = ptr;
|
||||
while (t < endmatch)
|
||||
{
|
||||
t = end_of_line(t, endptr, &ellength);
|
||||
if (t <= endmatch) linenumber++; else break;
|
||||
}
|
||||
}
|
||||
endmatch = end_of_line(endmatch, endptr, &ellength);
|
||||
linelength = endmatch - ptr - ellength;
|
||||
@@ -1123,6 +1129,24 @@ while (ptr < endptr)
|
||||
lastmatchnumber = linenumber + 1;
|
||||
}
|
||||
|
||||
/* For a match in multiline inverted mode (which of course did not cause
|
||||
anything to be printed), we have to move on to the end of the match before
|
||||
proceeding. */
|
||||
|
||||
if (multiline && invert && match)
|
||||
{
|
||||
int ellength;
|
||||
char *endmatch = ptr + offsets[1];
|
||||
t = ptr;
|
||||
while (t < endmatch)
|
||||
{
|
||||
t = end_of_line(t, endptr, &ellength);
|
||||
if (t <= endmatch) linenumber++; else break;
|
||||
}
|
||||
endmatch = end_of_line(endmatch, endptr, &ellength);
|
||||
linelength = endmatch - ptr - ellength;
|
||||
}
|
||||
|
||||
/* Advance to after the newline and increment the line number. */
|
||||
|
||||
ptr += linelength + endlinelength;
|
||||
@@ -1625,7 +1649,7 @@ for (i = 1; i < argc; i++)
|
||||
else /* Special case xxx=data */
|
||||
{
|
||||
int oplen = equals - op->long_name;
|
||||
int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
|
||||
int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
|
||||
if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
|
||||
{
|
||||
option_data = arg + arglen;
|
||||
|
||||
@@ -42,9 +42,24 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
|
||||
compiling these functions. This must come before including pcreposix.h, where
|
||||
they are set for an application (using these functions) if they have not
|
||||
previously been set. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllexport)
|
||||
#endif
|
||||
|
||||
#include <pcre.h>
|
||||
#include "pcre_internal.h"
|
||||
#include "pcreposix.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
|
||||
|
||||
@@ -109,7 +124,8 @@ static const int eint[] = {
|
||||
REG_BADPAT, /* repeating a DEFINE group is not allowed */
|
||||
REG_INVARG, /* inconsistent NEWLINE options */
|
||||
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
|
||||
REG_BADPAT /* (?+ or (?- must be followed by a non-zero number */
|
||||
REG_BADPAT, /* (?+ or (?- must be followed by a non-zero number */
|
||||
REG_BADPAT /* number is too big */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
@@ -107,13 +107,12 @@ typedef struct {
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export settings are needed. */
|
||||
export settings are needed, and are set in pcreposix.c before including this
|
||||
file. */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef PCREPOSIX_STATIC
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
Vendored
+2
@@ -383,3 +383,5 @@ AB.VE
|
||||
AB.VE the turtle
|
||||
PUT NEW DATA ABOVE THIS LINE.
|
||||
---------------------------- Test 49 ------------------------------
|
||||
---------------------------- Test 50 ------------------------------
|
||||
over the lazy dog.
|
||||
|
||||
Vendored
+9
@@ -4021,4 +4021,13 @@
|
||||
/(.*(.)?)*/
|
||||
abcd
|
||||
|
||||
/( (A | (?(1)0|) )* )/x
|
||||
abcd
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
abcd
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
abcd
|
||||
|
||||
/ End of testinput1 /
|
||||
|
||||
+20
@@ -101,4 +101,24 @@ are all themselves checked in other tests. --/
|
||||
|
||||
/[\x{105}-\x{109}]/8iBM
|
||||
|
||||
/( ( (?(1)0|) )* )/xBM
|
||||
|
||||
/( (?(1)0|)* )/xBM
|
||||
|
||||
/[a]/BM
|
||||
|
||||
/[a]/8BM
|
||||
|
||||
/[\xaa]/BM
|
||||
|
||||
/[\xaa]/8BM
|
||||
|
||||
/[^a]/BM
|
||||
|
||||
/[^a]/8BM
|
||||
|
||||
/[^\xaa]/BM
|
||||
|
||||
/[^\xaa]/8BM
|
||||
|
||||
/ End of testinput10 /
|
||||
|
||||
Vendored
+138
@@ -2326,4 +2326,142 @@ a random value. /Ix
|
||||
|
||||
/\V+\v\V+\w/BZ
|
||||
|
||||
/\( (?: [^()]* | (?R) )* \)/x
|
||||
(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(0(00)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)0)
|
||||
|
||||
/[\E]AAA/
|
||||
|
||||
/[\Q\E]AAA/
|
||||
|
||||
/[^\E]AAA/
|
||||
|
||||
/[^\Q\E]AAA/
|
||||
|
||||
/[\E^]AAA/
|
||||
|
||||
/[\Q\E^]AAA/
|
||||
|
||||
/A(*PRUNE)B(*SKIP)C(*THEN)D(*COMMIT)E(*F)F(*FAIL)G(?!)H(*ACCEPT)I/BZ
|
||||
|
||||
/^a+(*FAIL)/
|
||||
aaaaaa
|
||||
|
||||
/a+b?c+(*FAIL)/
|
||||
aaabccc
|
||||
|
||||
/a+b?(*PRUNE)c+(*FAIL)/
|
||||
aaabccc
|
||||
|
||||
/a+b?(*COMMIT)c+(*FAIL)/
|
||||
aaabccc
|
||||
|
||||
/a+b?(*SKIP)c+(*FAIL)/
|
||||
aaabcccaaabccc
|
||||
|
||||
/^(?:aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
|
||||
aaaxxxxxx
|
||||
aaa++++++
|
||||
bbbxxxxx
|
||||
bbb+++++
|
||||
cccxxxx
|
||||
ccc++++
|
||||
dddddddd
|
||||
|
||||
/^(aaa(*THEN)\w{6}|bbb(*THEN)\w{5}|ccc(*THEN)\w{4}|\w{3})/
|
||||
aaaxxxxxx
|
||||
aaa++++++
|
||||
bbbxxxxx
|
||||
bbb+++++
|
||||
cccxxxx
|
||||
ccc++++
|
||||
dddddddd
|
||||
|
||||
/a+b?(*THEN)c+(*FAIL)/
|
||||
aaabccc
|
||||
|
||||
/(A (A|B(*ACCEPT)|C) D)(E)/x
|
||||
ABX
|
||||
AADE
|
||||
ACDE
|
||||
** Failers
|
||||
AD
|
||||
|
||||
/^a+(*FAIL)/C
|
||||
aaaaaa
|
||||
|
||||
/a+b?c+(*FAIL)/C
|
||||
aaabccc
|
||||
|
||||
/a+b?(*PRUNE)c+(*FAIL)/C
|
||||
aaabccc
|
||||
|
||||
/a+b?(*COMMIT)c+(*FAIL)/C
|
||||
aaabccc
|
||||
|
||||
/a+b?(*SKIP)c+(*FAIL)/C
|
||||
aaabcccaaabccc
|
||||
|
||||
/a+b?(*THEN)c+(*FAIL)/C
|
||||
aaabccc
|
||||
|
||||
/a(*PRUNE:XXX)b/
|
||||
|
||||
/a(*MARK)b/
|
||||
|
||||
/(?i:A{1,}\6666666666)/
|
||||
|
||||
/\g6666666666/
|
||||
|
||||
/[\g6666666666]/
|
||||
|
||||
/(?1)\c[/
|
||||
|
||||
/.+A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/\nA/<crlf>
|
||||
\r\nA
|
||||
|
||||
/[\r\n]A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/(\r|\n)A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/a(*CR)b/
|
||||
|
||||
/(*CR)a.b/
|
||||
a\nb
|
||||
** Failers
|
||||
a\rb
|
||||
|
||||
/(*CR)a.b/<lf>
|
||||
a\nb
|
||||
** Failers
|
||||
a\rb
|
||||
|
||||
/(*LF)a.b/<CRLF>
|
||||
a\rb
|
||||
** Failers
|
||||
a\nb
|
||||
|
||||
/(*CRLF)a.b/
|
||||
a\rb
|
||||
a\nb
|
||||
** Failers
|
||||
a\r\nb
|
||||
|
||||
/(*ANYCRLF)a.b/<CR>
|
||||
** Failers
|
||||
a\rb
|
||||
a\nb
|
||||
a\r\nb
|
||||
|
||||
/(*ANY)a.b/<cr>
|
||||
** Failers
|
||||
a\rb
|
||||
a\nb
|
||||
a\r\nb
|
||||
a\x85b
|
||||
|
||||
/ End of testinput2 /
|
||||
|
||||
Vendored
+12
@@ -523,4 +523,16 @@
|
||||
/a*\x{100}*\w/8
|
||||
a
|
||||
|
||||
/\S\S/8g
|
||||
A\x{a3}BC
|
||||
|
||||
/\S{2}/8g
|
||||
A\x{a3}BC
|
||||
|
||||
/\W\W/8g
|
||||
+\x{a3}==
|
||||
|
||||
/\W{2}/8g
|
||||
+\x{a3}==
|
||||
|
||||
/ End of testinput4 /
|
||||
|
||||
Vendored
+24
@@ -238,6 +238,10 @@ can't tell the difference.) --/
|
||||
\xf9\x87\x80\x80\x80
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
\?\xf8\x88\x80\x80\x80
|
||||
\?\xf9\x87\x80\x80\x80
|
||||
\?\xfc\x84\x80\x80\x80\x80
|
||||
\?\xfd\x83\x80\x80\x80\x80
|
||||
|
||||
/\x{100}abc(xyz(?1))/8DZ
|
||||
|
||||
@@ -393,4 +397,24 @@ can't tell the difference.) --/
|
||||
|
||||
/[\V]/8BZ
|
||||
|
||||
/.*$/8<any>
|
||||
\x{1ec5}
|
||||
|
||||
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
||||
|
||||
/X/8
|
||||
\x{0}\x{d7ff}\x{e000}\x{10ffff}
|
||||
\x{d800}
|
||||
\x{d800}\?
|
||||
\x{da00}
|
||||
\x{da00}\?
|
||||
\x{dfff}
|
||||
\x{dfff}\?
|
||||
\x{110000}
|
||||
\x{110000}\?
|
||||
\x{2000000}
|
||||
\x{2000000}\?
|
||||
\x{7fffffff}
|
||||
\x{7fffffff}\?
|
||||
|
||||
/ End of testinput5 /
|
||||
|
||||
Vendored
+56
-2
@@ -61,7 +61,7 @@
|
||||
\x{09f}
|
||||
|
||||
/^\p{Cs}/8
|
||||
\x{dfff}
|
||||
\?\x{dfff}
|
||||
** Failers
|
||||
\x{09f}
|
||||
|
||||
@@ -69,7 +69,7 @@
|
||||
a
|
||||
** Failers
|
||||
Z
|
||||
\x{dfff}
|
||||
\x{e000}
|
||||
|
||||
/^\p{Lm}/8
|
||||
\x{2b0}
|
||||
@@ -778,4 +778,58 @@ was broken in all cases./
|
||||
123abcdefg
|
||||
123abc\xc4\xc5zz
|
||||
|
||||
/\X{1,3}\d/
|
||||
\x8aBCD
|
||||
|
||||
/\X?\d/
|
||||
\x8aBCD
|
||||
|
||||
/\P{L}?\d/
|
||||
\x8aBCD
|
||||
|
||||
/[\PPP\x8a]{1,}\x80/
|
||||
A\x80
|
||||
|
||||
/(?:[\PPa*]*){8,}/
|
||||
|
||||
/[\P{Any}]/BZ
|
||||
|
||||
/[\P{Any}\E]/BZ
|
||||
|
||||
/(\P{Yi}+\277)/
|
||||
|
||||
/(\P{Yi}+\277)?/
|
||||
|
||||
/(?<=\P{Yi}{3}A)X/
|
||||
|
||||
/\p{Yi}+(\P{Yi}+)(?1)/
|
||||
|
||||
/(\P{Yi}{2}\277)?/
|
||||
|
||||
/[\P{Yi}A]/
|
||||
|
||||
/[\P{Yi}\P{Yi}\P{Yi}A]/
|
||||
|
||||
/[^\P{Yi}A]/
|
||||
|
||||
/[^\P{Yi}\P{Yi}\P{Yi}A]/
|
||||
|
||||
/(\P{Yi}*\277)*/
|
||||
|
||||
/(\P{Yi}*?\277)*/
|
||||
|
||||
/(\p{Yi}*+\277)*/
|
||||
|
||||
/(\P{Yi}?\277)*/
|
||||
|
||||
/(\P{Yi}??\277)*/
|
||||
|
||||
/(\p{Yi}?+\277)*/
|
||||
|
||||
/(\P{Yi}{0,3}\277)*/
|
||||
|
||||
/(\P{Yi}{0,3}?\277)*/
|
||||
|
||||
/(\p{Yi}{0,3}+\277)*/
|
||||
|
||||
/ End of testinput6 /
|
||||
|
||||
Vendored
+12
@@ -4298,4 +4298,16 @@
|
||||
>XY\x0aZ\x0aA\x0bNN\x0c
|
||||
>\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
|
||||
/.+A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/\nA/<crlf>
|
||||
\r\nA
|
||||
|
||||
/[\r\n]A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/(\r|\n)A/<crlf>
|
||||
\r\nA
|
||||
|
||||
/ End of testinput7 /
|
||||
|
||||
Vendored
+2
-2
@@ -148,7 +148,7 @@
|
||||
\x{09f}
|
||||
|
||||
/^\p{Cs}/8
|
||||
\x{dfff}
|
||||
\?\x{dfff}
|
||||
** Failers
|
||||
\x{09f}
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
a
|
||||
** Failers
|
||||
Z
|
||||
\x{dfff}
|
||||
\x{e000}
|
||||
|
||||
/^\p{Lm}/8
|
||||
\x{2b0}
|
||||
|
||||
+17
@@ -6576,4 +6576,21 @@ No match
|
||||
0: abcd
|
||||
1:
|
||||
|
||||
/( (A | (?(1)0|) )* )/x
|
||||
abcd
|
||||
0:
|
||||
1:
|
||||
2:
|
||||
|
||||
/( ( (?(1)0|) )* )/x
|
||||
abcd
|
||||
0:
|
||||
1:
|
||||
2:
|
||||
|
||||
/( (?(1)0|)* )/x
|
||||
abcd
|
||||
0:
|
||||
1:
|
||||
|
||||
/ End of testinput1 /
|
||||
|
||||
+172
-66
@@ -6,8 +6,8 @@ are all themselves checked in other tests. --/
|
||||
/((?i)b)/BM
|
||||
Memory allocation (code space): 21
|
||||
------------------------------------------------------------------
|
||||
0 17 Bra 0
|
||||
3 9 Bra 1
|
||||
0 17 Bra
|
||||
3 9 CBra 1
|
||||
8 01 Opt
|
||||
10 NC b
|
||||
12 9 Ket
|
||||
@@ -19,8 +19,8 @@ Memory allocation (code space): 21
|
||||
/(?s)(.*X|^B)/BM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
3 9 Bra 1
|
||||
0 21 Bra
|
||||
3 9 CBra 1
|
||||
8 Any*
|
||||
10 X
|
||||
12 6 Alt
|
||||
@@ -34,8 +34,8 @@ Memory allocation (code space): 25
|
||||
/(?s:.*X|^B)/BM
|
||||
Memory allocation (code space): 29
|
||||
------------------------------------------------------------------
|
||||
0 25 Bra 0
|
||||
3 9 Bra 0
|
||||
0 25 Bra
|
||||
3 9 Bra
|
||||
6 04 Opt
|
||||
8 Any*
|
||||
10 X
|
||||
@@ -52,7 +52,7 @@ Memory allocation (code space): 29
|
||||
/^[[:alnum:]]/BM
|
||||
Memory allocation (code space): 41
|
||||
------------------------------------------------------------------
|
||||
0 37 Bra 0
|
||||
0 37 Bra
|
||||
3 ^
|
||||
4 [0-9A-Za-z]
|
||||
37 37 Ket
|
||||
@@ -62,7 +62,7 @@ Memory allocation (code space): 41
|
||||
/#/IxMD
|
||||
Memory allocation (code space): 7
|
||||
------------------------------------------------------------------
|
||||
0 3 Bra 0
|
||||
0 3 Bra
|
||||
3 3 Ket
|
||||
6 End
|
||||
------------------------------------------------------------------
|
||||
@@ -74,7 +74,7 @@ No need char
|
||||
/a#/IxMD
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
@@ -87,7 +87,7 @@ No need char
|
||||
/x?+/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
0 5 Bra
|
||||
3 x?+
|
||||
5 5 Ket
|
||||
8 End
|
||||
@@ -96,7 +96,7 @@ Memory allocation (code space): 9
|
||||
/x++/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra 0
|
||||
0 5 Bra
|
||||
3 x++
|
||||
5 5 Ket
|
||||
8 End
|
||||
@@ -105,7 +105,7 @@ Memory allocation (code space): 9
|
||||
/x{1,3}+/BM
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
0 15 Bra
|
||||
3 9 Once
|
||||
6 x
|
||||
8 x{0,2}
|
||||
@@ -117,10 +117,10 @@ Memory allocation (code space): 19
|
||||
/(x)*+/BM
|
||||
Memory allocation (code space): 24
|
||||
------------------------------------------------------------------
|
||||
0 20 Bra 0
|
||||
0 20 Bra
|
||||
3 14 Once
|
||||
6 Brazero
|
||||
7 7 Bra 1
|
||||
7 7 CBra 1
|
||||
12 x
|
||||
14 7 KetRmax
|
||||
17 14 Ket
|
||||
@@ -131,19 +131,19 @@ Memory allocation (code space): 24
|
||||
/^((a+)(?U)([ab]+)(?-U)([bc]+)(\w*))/BM
|
||||
Memory allocation (code space): 120
|
||||
------------------------------------------------------------------
|
||||
0 116 Bra 0
|
||||
0 116 Bra
|
||||
3 ^
|
||||
4 109 Bra 1
|
||||
9 7 Bra 2
|
||||
4 109 CBra 1
|
||||
9 7 CBra 2
|
||||
14 a+
|
||||
16 7 Ket
|
||||
19 39 Bra 3
|
||||
19 39 CBra 3
|
||||
24 [ab]+?
|
||||
58 39 Ket
|
||||
61 39 Bra 4
|
||||
61 39 CBra 4
|
||||
66 [bc]+
|
||||
100 39 Ket
|
||||
103 7 Bra 5
|
||||
103 7 CBra 5
|
||||
108 \w*
|
||||
110 7 Ket
|
||||
113 109 Ket
|
||||
@@ -154,7 +154,7 @@ Memory allocation (code space): 120
|
||||
|8J\$WE\<\.rX\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
Memory allocation (code space): 826
|
||||
------------------------------------------------------------------
|
||||
0 822 Bra 0
|
||||
0 822 Bra
|
||||
3 8J$WE<.rX+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
821 \b
|
||||
822 822 Ket
|
||||
@@ -164,7 +164,7 @@ Memory allocation (code space): 826
|
||||
|\$\<\.X\+ix\[d1b\!H\#\?vV0vrK\:ZH1\=2M\>iV\;\?aPhFB\<\*vW\@QW\@sO9\}cfZA\-i\'w\%hKd6gt1UJP\,15_\#QY\$M\^Mss_U\/\]\&LK9\[5vQub\^w\[KDD\<EjmhUZ\?\.akp2dF\>qmj\;2\}YWFdYx\.Ap\]hjCPTP\(n28k\+3\;o\&WXqs\/gOXdr\$\:r\'do0\;b4c\(f_Gr\=\"\\4\)\[01T7ajQJvL\$W\~mL_sS\/4h\:x\*\[ZN\=KLs\&L5zX\/\/\>it\,o\:aU\(\;Z\>pW\&T7oP\'2K\^E\:x9\'c\[\%z\-\,64JQ5AeH_G\#KijUKghQw\^\\vea3a\?kka_G\$8\#\`\*kynsxzBLru\'\]k_\[7FrVx\}\^\=\$blx\>s\-N\%j\;D\*aZDnsw\:YKZ\%Q\.Kne9\#hP\?\+b3\(SOvL\,\^\;\&u5\@\?5C5Bhb\=m\-vEh_L15Jl\]U\)0RP6\{q\%L\^_z5E\'Dw6X\b|BM
|
||||
Memory allocation (code space): 816
|
||||
------------------------------------------------------------------
|
||||
0 812 Bra 0
|
||||
0 812 Bra
|
||||
3 $<.X+ix[d1b!H#?vV0vrK:ZH1=2M>iV;?aPhFB<*vW@QW@sO9}cfZA-i'w%hKd6gt1UJP,15_#QY$M^Mss_U/]&LK9[5vQub^w[KDD<EjmhUZ?.akp2dF>qmj;2}YWFdYx.Ap]hjCPTP(n28k+3;o&WXqs/gOXdr$:r'do0;b4c(f_Gr="\4)[01T7ajQJvL$W~mL_sS/4h:x*[ZN=KLs&L5zX//>it,o:aU(;Z>pW&T7oP'2K^E:x9'c[%z-,64JQ5AeH_G#KijUKghQw^\vea3a?kka_G$8#`*kynsxzBLru']k_[7FrVx}^=$blx>s-N%j;D*aZDnsw:YKZ%Q.Kne9#hP?+b3(SOvL,^;&u5@?5C5Bhb=m-vEh_L15Jl]U)0RP6{q%L^_z5E'Dw6X
|
||||
811 \b
|
||||
812 812 Ket
|
||||
@@ -174,8 +174,8 @@ Memory allocation (code space): 816
|
||||
/(a(?1)b)/BM
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra 0
|
||||
3 18 Bra 1
|
||||
0 24 Bra
|
||||
3 18 CBra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
13 3 Recurse
|
||||
@@ -189,8 +189,8 @@ Memory allocation (code space): 28
|
||||
/(a(?1)+b)/BM
|
||||
Memory allocation (code space): 28
|
||||
------------------------------------------------------------------
|
||||
0 24 Bra 0
|
||||
3 18 Bra 1
|
||||
0 24 Bra
|
||||
3 18 CBra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
13 3 Recurse
|
||||
@@ -204,15 +204,15 @@ Memory allocation (code space): 28
|
||||
/a(?P<name1>b|c)d(?P<longername2>e)/BM
|
||||
Memory allocation (code space): 42
|
||||
------------------------------------------------------------------
|
||||
0 32 Bra 0
|
||||
0 32 Bra
|
||||
3 a
|
||||
5 7 Bra 1
|
||||
5 7 CBra 1
|
||||
10 b
|
||||
12 5 Alt
|
||||
15 c
|
||||
17 12 Ket
|
||||
20 d
|
||||
22 7 Bra 2
|
||||
22 7 CBra 2
|
||||
27 e
|
||||
29 7 Ket
|
||||
32 32 Ket
|
||||
@@ -222,17 +222,17 @@ Memory allocation (code space): 42
|
||||
/(?:a(?P<c>c(?P<d>d)))(?P<a>a)/BM
|
||||
Memory allocation (code space): 54
|
||||
------------------------------------------------------------------
|
||||
0 41 Bra 0
|
||||
3 25 Bra 0
|
||||
0 41 Bra
|
||||
3 25 Bra
|
||||
6 a
|
||||
8 17 Bra 1
|
||||
8 17 CBra 1
|
||||
13 c
|
||||
15 7 Bra 2
|
||||
15 7 CBra 2
|
||||
20 d
|
||||
22 7 Ket
|
||||
25 17 Ket
|
||||
28 25 Ket
|
||||
31 7 Bra 3
|
||||
31 7 CBra 3
|
||||
36 a
|
||||
38 7 Ket
|
||||
41 41 Ket
|
||||
@@ -242,8 +242,8 @@ Memory allocation (code space): 54
|
||||
/(?P<a>a)...(?P=a)bbb(?P>a)d/BM
|
||||
Memory allocation (code space): 43
|
||||
------------------------------------------------------------------
|
||||
0 36 Bra 0
|
||||
3 7 Bra 1
|
||||
0 36 Bra
|
||||
3 7 CBra 1
|
||||
8 a
|
||||
10 7 Ket
|
||||
13 Any
|
||||
@@ -262,7 +262,7 @@ Memory allocation (code space): 43
|
||||
/abc(?C255)de(?C)f/BM
|
||||
Memory allocation (code space): 31
|
||||
------------------------------------------------------------------
|
||||
0 27 Bra 0
|
||||
0 27 Bra
|
||||
3 abc
|
||||
9 Callout 255 10 1
|
||||
15 de
|
||||
@@ -275,7 +275,7 @@ Memory allocation (code space): 31
|
||||
/abcde/CBM
|
||||
Memory allocation (code space): 53
|
||||
------------------------------------------------------------------
|
||||
0 49 Bra 0
|
||||
0 49 Bra
|
||||
3 Callout 255 0 1
|
||||
9 a
|
||||
11 Callout 255 1 1
|
||||
@@ -294,7 +294,7 @@ Memory allocation (code space): 53
|
||||
/\x{100}/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
0 6 Bra
|
||||
3 \x{100}
|
||||
6 6 Ket
|
||||
9 End
|
||||
@@ -303,7 +303,7 @@ Memory allocation (code space): 10
|
||||
/\x{1000}/8BM
|
||||
Memory allocation (code space): 11
|
||||
------------------------------------------------------------------
|
||||
0 7 Bra 0
|
||||
0 7 Bra
|
||||
3 \x{1000}
|
||||
7 7 Ket
|
||||
10 End
|
||||
@@ -312,7 +312,7 @@ Memory allocation (code space): 11
|
||||
/\x{10000}/8BM
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
0 8 Bra
|
||||
3 \x{10000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
@@ -321,7 +321,7 @@ Memory allocation (code space): 12
|
||||
/\x{100000}/8BM
|
||||
Memory allocation (code space): 12
|
||||
------------------------------------------------------------------
|
||||
0 8 Bra 0
|
||||
0 8 Bra
|
||||
3 \x{100000}
|
||||
8 8 Ket
|
||||
11 End
|
||||
@@ -330,7 +330,7 @@ Memory allocation (code space): 12
|
||||
/\x{1000000}/8BM
|
||||
Memory allocation (code space): 13
|
||||
------------------------------------------------------------------
|
||||
0 9 Bra 0
|
||||
0 9 Bra
|
||||
3 \x{1000000}
|
||||
9 9 Ket
|
||||
12 End
|
||||
@@ -339,7 +339,7 @@ Memory allocation (code space): 13
|
||||
/\x{4000000}/8BM
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra 0
|
||||
0 10 Bra
|
||||
3 \x{4000000}
|
||||
10 10 Ket
|
||||
13 End
|
||||
@@ -348,7 +348,7 @@ Memory allocation (code space): 14
|
||||
/\x{7fffFFFF}/8BM
|
||||
Memory allocation (code space): 14
|
||||
------------------------------------------------------------------
|
||||
0 10 Bra 0
|
||||
0 10 Bra
|
||||
3 \x{7fffffff}
|
||||
10 10 Ket
|
||||
13 End
|
||||
@@ -357,7 +357,7 @@ Memory allocation (code space): 14
|
||||
/[\x{ff}]/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
0 6 Bra
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
@@ -366,7 +366,7 @@ Memory allocation (code space): 10
|
||||
/[\x{100}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\x{100}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -375,7 +375,7 @@ Memory allocation (code space): 15
|
||||
/\x80/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
0 6 Bra
|
||||
3 \x{80}
|
||||
6 6 Ket
|
||||
9 End
|
||||
@@ -384,7 +384,7 @@ Memory allocation (code space): 10
|
||||
/\xff/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra 0
|
||||
0 6 Bra
|
||||
3 \x{ff}
|
||||
6 6 Ket
|
||||
9 End
|
||||
@@ -393,7 +393,7 @@ Memory allocation (code space): 10
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/D8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
0 14 Bra
|
||||
3 A\x{2262}\x{391}.
|
||||
14 14 Ket
|
||||
17 End
|
||||
@@ -406,7 +406,7 @@ Need char = '.'
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/D8M
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
0 15 Bra
|
||||
3 \x{d55c}\x{ad6d}\x{c5b4}
|
||||
15 15 Ket
|
||||
18 End
|
||||
@@ -419,7 +419,7 @@ Need char = 180
|
||||
/\x{65e5}\x{672c}\x{8a9e}/D8M
|
||||
Memory allocation (code space): 19
|
||||
------------------------------------------------------------------
|
||||
0 15 Bra 0
|
||||
0 15 Bra
|
||||
3 \x{65e5}\x{672c}\x{8a9e}
|
||||
15 15 Ket
|
||||
18 End
|
||||
@@ -432,7 +432,7 @@ Need char = 158
|
||||
/[\x{100}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\x{100}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -441,7 +441,7 @@ Memory allocation (code space): 15
|
||||
/[Z\x{100}]/8BM
|
||||
Memory allocation (code space): 47
|
||||
------------------------------------------------------------------
|
||||
0 43 Bra 0
|
||||
0 43 Bra
|
||||
3 [Z\x{100}]
|
||||
43 43 Ket
|
||||
46 End
|
||||
@@ -450,7 +450,7 @@ Memory allocation (code space): 47
|
||||
/^[\x{100}\E-\Q\E\x{150}]/B8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
0 14 Bra
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
@@ -460,7 +460,7 @@ Memory allocation (code space): 18
|
||||
/^[\QĀ\E-\QŐ\E]/B8M
|
||||
Memory allocation (code space): 18
|
||||
------------------------------------------------------------------
|
||||
0 14 Bra 0
|
||||
0 14 Bra
|
||||
3 ^
|
||||
4 [\x{100}-\x{150}]
|
||||
14 14 Ket
|
||||
@@ -473,7 +473,7 @@ Failed: missing terminating ] for character class at offset 15
|
||||
/[\p{L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -482,7 +482,7 @@ Memory allocation (code space): 15
|
||||
/[\p{^L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -491,7 +491,7 @@ Memory allocation (code space): 15
|
||||
/[\P{L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\P{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -500,7 +500,7 @@ Memory allocation (code space): 15
|
||||
/[\P{^L}]/BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\p{L}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -509,7 +509,7 @@ Memory allocation (code space): 15
|
||||
/[abc\p{L}\x{0660}]/8BM
|
||||
Memory allocation (code space): 50
|
||||
------------------------------------------------------------------
|
||||
0 46 Bra 0
|
||||
0 46 Bra
|
||||
3 [a-c\p{L}\x{660}]
|
||||
46 46 Ket
|
||||
49 End
|
||||
@@ -518,7 +518,7 @@ Memory allocation (code space): 50
|
||||
/[\p{Nd}]/8BM
|
||||
Memory allocation (code space): 15
|
||||
------------------------------------------------------------------
|
||||
0 11 Bra 0
|
||||
0 11 Bra
|
||||
3 [\p{Nd}]
|
||||
11 11 Ket
|
||||
14 End
|
||||
@@ -527,7 +527,7 @@ Memory allocation (code space): 15
|
||||
/[\p{Nd}+-]+/8BM
|
||||
Memory allocation (code space): 48
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra 0
|
||||
0 44 Bra
|
||||
3 [+\-\p{Nd}]+
|
||||
44 44 Ket
|
||||
47 End
|
||||
@@ -536,7 +536,7 @@ Memory allocation (code space): 48
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iBM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
0 21 Bra
|
||||
3 NC A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
@@ -545,7 +545,7 @@ Memory allocation (code space): 25
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8BM
|
||||
Memory allocation (code space): 25
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra 0
|
||||
0 21 Bra
|
||||
3 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
21 21 Ket
|
||||
24 End
|
||||
@@ -554,10 +554,116 @@ Memory allocation (code space): 25
|
||||
/[\x{105}-\x{109}]/8iBM
|
||||
Memory allocation (code space): 17
|
||||
------------------------------------------------------------------
|
||||
0 13 Bra 0
|
||||
0 13 Bra
|
||||
3 [\x{104}-\x{109}]
|
||||
13 13 Ket
|
||||
16 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( ( (?(1)0|) )* )/xBM
|
||||
Memory allocation (code space): 38
|
||||
------------------------------------------------------------------
|
||||
0 34 Bra
|
||||
3 28 CBra 1
|
||||
8 Brazero
|
||||
9 19 SCBra 2
|
||||
14 8 Cond
|
||||
17 1 Cond ref
|
||||
20 0
|
||||
22 3 Alt
|
||||
25 11 Ket
|
||||
28 19 KetRmax
|
||||
31 28 Ket
|
||||
34 34 Ket
|
||||
37 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/( (?(1)0|)* )/xBM
|
||||
Memory allocation (code space): 30
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
3 20 CBra 1
|
||||
8 Brazero
|
||||
9 8 SCond
|
||||
12 1 Cond ref
|
||||
15 0
|
||||
17 3 Alt
|
||||
20 11 KetRmax
|
||||
23 20 Ket
|
||||
26 26 Ket
|
||||
29 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[a]/8BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 a
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 \xaa
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\xaa]/8BM
|
||||
Memory allocation (code space): 10
|
||||
------------------------------------------------------------------
|
||||
0 6 Bra
|
||||
3 \x{aa}
|
||||
6 6 Ket
|
||||
9 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^a]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^a]/8BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^a]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/BM
|
||||
Memory allocation (code space): 9
|
||||
------------------------------------------------------------------
|
||||
0 5 Bra
|
||||
3 [^\xaa]
|
||||
5 5 Ket
|
||||
8 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[^\xaa]/8BM
|
||||
Memory allocation (code space): 40
|
||||
------------------------------------------------------------------
|
||||
0 36 Bra
|
||||
3 [\x00-\xa9\xab-\xff] (neg)
|
||||
36 36 Ket
|
||||
39 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/ End of testinput10 /
|
||||
|
||||
+674
-280
File diff suppressed because it is too large
Load Diff
+1
-1
@@ -148,7 +148,7 @@ No match
|
||||
|
||||
/[[:alpha:]][[:lower:]][[:upper:]]/DZLfr_FR
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff]
|
||||
[a-z\xb5\xdf-\xf6\xf8-\xff]
|
||||
[A-Z\xc0-\xd6\xd8-\xde]
|
||||
|
||||
+20
@@ -918,4 +918,24 @@ No match
|
||||
a
|
||||
0: a
|
||||
|
||||
/\S\S/8g
|
||||
A\x{a3}BC
|
||||
0: A\x{a3}
|
||||
0: BC
|
||||
|
||||
/\S{2}/8g
|
||||
A\x{a3}BC
|
||||
0: A\x{a3}
|
||||
0: BC
|
||||
|
||||
/\W\W/8g
|
||||
+\x{a3}==
|
||||
0: +\x{a3}
|
||||
0: ==
|
||||
|
||||
/\W{2}/8g
|
||||
+\x{a3}==
|
||||
0: +\x{a3}
|
||||
0: ==
|
||||
|
||||
/ End of testinput4 /
|
||||
|
||||
+145
-96
@@ -1,6 +1,6 @@
|
||||
/\x{100}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}
|
||||
Ket
|
||||
End
|
||||
@@ -12,7 +12,7 @@ Need char = 128
|
||||
|
||||
/\x{1000}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{1000}
|
||||
Ket
|
||||
End
|
||||
@@ -24,7 +24,7 @@ Need char = 128
|
||||
|
||||
/\x{10000}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{10000}
|
||||
Ket
|
||||
End
|
||||
@@ -36,7 +36,7 @@ Need char = 128
|
||||
|
||||
/\x{100000}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100000}
|
||||
Ket
|
||||
End
|
||||
@@ -48,7 +48,7 @@ Need char = 128
|
||||
|
||||
/\x{1000000}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{1000000}
|
||||
Ket
|
||||
End
|
||||
@@ -60,7 +60,7 @@ Need char = 128
|
||||
|
||||
/\x{4000000}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{4000000}
|
||||
Ket
|
||||
End
|
||||
@@ -72,7 +72,7 @@ Need char = 128
|
||||
|
||||
/\x{7fffFFFF}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{7fffffff}
|
||||
Ket
|
||||
End
|
||||
@@ -84,7 +84,7 @@ Need char = 191
|
||||
|
||||
/[\x{ff}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{ff}
|
||||
Ket
|
||||
End
|
||||
@@ -96,7 +96,7 @@ Need char = 191
|
||||
|
||||
/[\x{100}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x{100}]
|
||||
Ket
|
||||
End
|
||||
@@ -118,7 +118,7 @@ Failed: character value in \x{...} sequence is too large at offset 12
|
||||
|
||||
/\x80/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{80}
|
||||
Ket
|
||||
End
|
||||
@@ -130,7 +130,7 @@ Need char = 128
|
||||
|
||||
/\xff/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{ff}
|
||||
Ket
|
||||
End
|
||||
@@ -142,7 +142,7 @@ Need char = 191
|
||||
|
||||
/\x{0041}\x{2262}\x{0391}\x{002e}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
A\x{2262}\x{391}.
|
||||
Ket
|
||||
End
|
||||
@@ -156,7 +156,7 @@ Need char = '.'
|
||||
|
||||
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{d55c}\x{ad6d}\x{c5b4}
|
||||
Ket
|
||||
End
|
||||
@@ -170,7 +170,7 @@ Need char = 180
|
||||
|
||||
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{65e5}\x{672c}\x{8a9e}
|
||||
Ket
|
||||
End
|
||||
@@ -184,7 +184,7 @@ Need char = 158
|
||||
|
||||
/\x{80}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{80}
|
||||
Ket
|
||||
End
|
||||
@@ -196,7 +196,7 @@ Need char = 128
|
||||
|
||||
/\x{084}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{84}
|
||||
Ket
|
||||
End
|
||||
@@ -208,7 +208,7 @@ Need char = 132
|
||||
|
||||
/\x{104}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{104}
|
||||
Ket
|
||||
End
|
||||
@@ -220,7 +220,7 @@ Need char = 132
|
||||
|
||||
/\x{861}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{861}
|
||||
Ket
|
||||
End
|
||||
@@ -232,7 +232,7 @@ Need char = 161
|
||||
|
||||
/\x{212ab}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{212ab}
|
||||
Ket
|
||||
End
|
||||
@@ -244,7 +244,7 @@ Need char = 171
|
||||
|
||||
/.{3,5}X/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Any{3}
|
||||
Any{0,2}
|
||||
X
|
||||
@@ -262,7 +262,7 @@ Need char = 'X'
|
||||
|
||||
/.{3,5}?/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Any{3}
|
||||
Any{0,2}?
|
||||
Ket
|
||||
@@ -334,7 +334,7 @@ can't tell the difference.) --/
|
||||
|
||||
/^[ab]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
^
|
||||
[ab]
|
||||
Ket
|
||||
@@ -357,13 +357,14 @@ No match
|
||||
|
||||
/^[^ab]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
^
|
||||
[\x00-`c-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: anchored utf8
|
||||
No first char
|
||||
No need char
|
||||
@@ -380,12 +381,13 @@ No match
|
||||
|
||||
/[^ab\xC0-\xF0]/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x00-`c-\xbf\xf1-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: utf8
|
||||
No first char
|
||||
No need char
|
||||
@@ -416,7 +418,7 @@ No match
|
||||
|
||||
/Ä€{3,4}/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}{3}
|
||||
\x{100}?
|
||||
Ket
|
||||
@@ -433,8 +435,8 @@ Study returned NULL
|
||||
|
||||
/(\x{100}+|x)/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}+
|
||||
Alt
|
||||
x
|
||||
@@ -451,8 +453,8 @@ Starting byte set: x \xc4
|
||||
|
||||
/(\x{100}*a|x)/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}*+
|
||||
a
|
||||
Alt
|
||||
@@ -470,8 +472,8 @@ Starting byte set: a x \xc4
|
||||
|
||||
/(\x{100}{0,2}a|x)/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}{0,2}
|
||||
a
|
||||
Alt
|
||||
@@ -489,8 +491,8 @@ Starting byte set: a x \xc4
|
||||
|
||||
/(\x{100}{1,2}a|x)/8SDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}
|
||||
\x{100}{0,1}
|
||||
a
|
||||
@@ -533,7 +535,7 @@ No match
|
||||
|
||||
/\x{100}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}
|
||||
Ket
|
||||
End
|
||||
@@ -545,7 +547,7 @@ Need char = 128
|
||||
|
||||
/\x{100}*/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*
|
||||
Ket
|
||||
End
|
||||
@@ -558,7 +560,7 @@ No need char
|
||||
|
||||
/a\x{100}*/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
a
|
||||
\x{100}*
|
||||
Ket
|
||||
@@ -572,7 +574,7 @@ No need char
|
||||
|
||||
/ab\x{100}*/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
ab
|
||||
\x{100}*
|
||||
Ket
|
||||
@@ -586,7 +588,7 @@ Need char = 'b'
|
||||
|
||||
/a\x{100}\x{101}*/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
a\x{100}
|
||||
\x{101}*
|
||||
Ket
|
||||
@@ -600,7 +602,7 @@ Need char = 128
|
||||
|
||||
/a\x{100}\x{101}+/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
a\x{100}
|
||||
\x{101}+
|
||||
Ket
|
||||
@@ -614,7 +616,7 @@ Need char = 129
|
||||
|
||||
/\x{100}*A/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*+
|
||||
A
|
||||
Ket
|
||||
@@ -630,7 +632,7 @@ Need char = 'A'
|
||||
|
||||
/\x{100}*\d(?R)/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*+
|
||||
\d
|
||||
Once
|
||||
@@ -647,31 +649,33 @@ No need char
|
||||
|
||||
/[^\x{c4}]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[^\xc4]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[^\x{c4}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x00-\xc3\xc5-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: utf8
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[\x{100}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x{100}]
|
||||
Ket
|
||||
End
|
||||
@@ -691,7 +695,7 @@ No match
|
||||
|
||||
/[Z\x{100}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[Z\x{100}]
|
||||
Ket
|
||||
End
|
||||
@@ -726,7 +730,7 @@ No match
|
||||
|
||||
/[z-\x{100}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[z-\x{100}]
|
||||
Ket
|
||||
End
|
||||
@@ -738,7 +742,7 @@ No need char
|
||||
|
||||
/[z\Qa-d]Ä€\E]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\-\]adz\x{100}]
|
||||
Ket
|
||||
End
|
||||
@@ -754,7 +758,7 @@ No need char
|
||||
|
||||
/[\xFF]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\xff
|
||||
Ket
|
||||
End
|
||||
@@ -768,7 +772,7 @@ No need char
|
||||
|
||||
/[\xff]/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{ff}
|
||||
Ket
|
||||
End
|
||||
@@ -782,24 +786,26 @@ Need char = 191
|
||||
|
||||
/[^\xFF]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[^\xff]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
No options
|
||||
No first char
|
||||
No need char
|
||||
|
||||
/[^\xff]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x00-\xfe] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Contains explicit CR or LF match
|
||||
Options: utf8
|
||||
No first char
|
||||
No need char
|
||||
@@ -839,7 +845,7 @@ Failed: invalid UTF-8 string at offset 1
|
||||
|
||||
/ÃÃÃxxx/8?DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\X{c0}\X{c0}\X{c0}xxx
|
||||
Ket
|
||||
End
|
||||
@@ -887,19 +893,27 @@ No match
|
||||
\xf1\x8f\x80\x80
|
||||
No match
|
||||
\xf8\x88\x80\x80\x80
|
||||
No match
|
||||
Error -10
|
||||
\xf9\x87\x80\x80\x80
|
||||
No match
|
||||
Error -10
|
||||
\xfc\x84\x80\x80\x80\x80
|
||||
No match
|
||||
Error -10
|
||||
\xfd\x83\x80\x80\x80\x80
|
||||
Error -10
|
||||
\?\xf8\x88\x80\x80\x80
|
||||
No match
|
||||
\?\xf9\x87\x80\x80\x80
|
||||
No match
|
||||
\?\xfc\x84\x80\x80\x80\x80
|
||||
No match
|
||||
\?\xfd\x83\x80\x80\x80\x80
|
||||
No match
|
||||
|
||||
/\x{100}abc(xyz(?1))/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}abc
|
||||
Bra 1
|
||||
CBra 1
|
||||
xyz
|
||||
Once
|
||||
Recurse
|
||||
@@ -915,10 +929,10 @@ Need char = 'z'
|
||||
|
||||
/[^\x{100}]abc(xyz(?1))/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[^\x{100}]
|
||||
abc
|
||||
Bra 1
|
||||
CBra 1
|
||||
xyz
|
||||
Once
|
||||
Recurse
|
||||
@@ -928,16 +942,17 @@ Need char = 'z'
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 1
|
||||
Contains explicit CR or LF match
|
||||
Options: utf8
|
||||
No first char
|
||||
Need char = 'z'
|
||||
|
||||
/[ab\x{100}]abc(xyz(?1))/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[ab\x{100}]
|
||||
abc
|
||||
Bra 1
|
||||
CBra 1
|
||||
xyz
|
||||
Once
|
||||
Recurse
|
||||
@@ -953,11 +968,11 @@ Need char = 'z'
|
||||
|
||||
/(\x{100}(b(?2)c))?/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Brazero
|
||||
Bra 1
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -975,12 +990,12 @@ No need char
|
||||
|
||||
/(\x{100}(b(?2)c)){0,2}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Brazero
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -989,9 +1004,9 @@ No need char
|
||||
Ket
|
||||
Ket
|
||||
Brazero
|
||||
Bra 1
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -1010,11 +1025,11 @@ No need char
|
||||
|
||||
/(\x{100}(b(?1)c))?/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Brazero
|
||||
Bra 1
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -1032,12 +1047,12 @@ No need char
|
||||
|
||||
/(\x{100}(b(?1)c)){0,2}/DZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
Brazero
|
||||
Bra 0
|
||||
Bra 1
|
||||
Bra
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -1046,9 +1061,9 @@ No need char
|
||||
Ket
|
||||
Ket
|
||||
Brazero
|
||||
Bra 1
|
||||
CBra 1
|
||||
\x{100}
|
||||
Bra 2
|
||||
CBra 2
|
||||
b
|
||||
Once
|
||||
Recurse
|
||||
@@ -1081,7 +1096,7 @@ No need char
|
||||
|
||||
/^\ሴ/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
^
|
||||
\x{1234}
|
||||
Ket
|
||||
@@ -1107,7 +1122,7 @@ Need char = 191
|
||||
|
||||
/\x{100}*\d/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*+
|
||||
\d
|
||||
Ket
|
||||
@@ -1121,7 +1136,7 @@ No need char
|
||||
|
||||
/\x{100}*\s/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*+
|
||||
\s
|
||||
Ket
|
||||
@@ -1135,7 +1150,7 @@ No need char
|
||||
|
||||
/\x{100}*\w/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*+
|
||||
\w
|
||||
Ket
|
||||
@@ -1149,7 +1164,7 @@ No need char
|
||||
|
||||
/\x{100}*\D/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*
|
||||
\D
|
||||
Ket
|
||||
@@ -1163,7 +1178,7 @@ No need char
|
||||
|
||||
/\x{100}*\S/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*
|
||||
\S
|
||||
Ket
|
||||
@@ -1177,7 +1192,7 @@ No need char
|
||||
|
||||
/\x{100}*\W/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}*
|
||||
\W
|
||||
Ket
|
||||
@@ -1191,7 +1206,7 @@ No need char
|
||||
|
||||
/\x{100}+\x{200}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}++
|
||||
\x{200}
|
||||
Ket
|
||||
@@ -1205,7 +1220,7 @@ Need char = 128
|
||||
|
||||
/\x{100}+X/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
\x{100}++
|
||||
X
|
||||
Ket
|
||||
@@ -1219,7 +1234,7 @@ Need char = 'X'
|
||||
|
||||
/X+\x{200}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
X++
|
||||
\x{200}
|
||||
Ket
|
||||
@@ -1256,7 +1271,7 @@ Matched, but too many substrings
|
||||
|
||||
/^[\x{100}\E-\Q\E\x{150}]/BZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
^
|
||||
[\x{100}-\x{150}]
|
||||
Ket
|
||||
@@ -1265,7 +1280,7 @@ Matched, but too many substrings
|
||||
|
||||
/^[\QĀ\E-\Q�\E]/BZ8
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
^
|
||||
[\x{100}-\x{150}]
|
||||
Ket
|
||||
@@ -1431,7 +1446,7 @@ No match
|
||||
|
||||
/[\h]/8BZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||||
Ket
|
||||
End
|
||||
@@ -1441,7 +1456,7 @@ No match
|
||||
|
||||
/[\h]{3,}/8BZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]{3,}
|
||||
Ket
|
||||
End
|
||||
@@ -1451,7 +1466,7 @@ No match
|
||||
|
||||
/[\v]/8BZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
||||
Ket
|
||||
End
|
||||
@@ -1459,7 +1474,7 @@ No match
|
||||
|
||||
/[\H]/8BZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{7fffffff}]
|
||||
Ket
|
||||
End
|
||||
@@ -1467,10 +1482,44 @@ No match
|
||||
|
||||
/[\V]/8BZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{7fffffff}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.*$/8<any>
|
||||
\x{1ec5}
|
||||
0: \x{1ec5}
|
||||
|
||||
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
||||
|
||||
/X/8
|
||||
\x{0}\x{d7ff}\x{e000}\x{10ffff}
|
||||
No match
|
||||
\x{d800}
|
||||
Error -10
|
||||
\x{d800}\?
|
||||
No match
|
||||
\x{da00}
|
||||
Error -10
|
||||
\x{da00}\?
|
||||
No match
|
||||
\x{dfff}
|
||||
Error -10
|
||||
\x{dfff}\?
|
||||
No match
|
||||
\x{110000}
|
||||
Error -10
|
||||
\x{110000}\?
|
||||
No match
|
||||
\x{2000000}
|
||||
Error -10
|
||||
\x{2000000}\?
|
||||
No match
|
||||
\x{7fffffff}
|
||||
Error -10
|
||||
\x{7fffffff}\?
|
||||
No match
|
||||
|
||||
/ End of testinput5 /
|
||||
|
||||
+86
-16
@@ -99,7 +99,7 @@ No match
|
||||
No match
|
||||
|
||||
/^\p{Cs}/8
|
||||
\x{dfff}
|
||||
\?\x{dfff}
|
||||
0: \x{dfff}
|
||||
** Failers
|
||||
No match
|
||||
@@ -113,7 +113,7 @@ No match
|
||||
No match
|
||||
Z
|
||||
No match
|
||||
\x{dfff}
|
||||
\x{e000}
|
||||
No match
|
||||
|
||||
/^\p{Lm}/8
|
||||
@@ -550,7 +550,7 @@ No match
|
||||
|
||||
/[\p{L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\p{L}]
|
||||
Ket
|
||||
End
|
||||
@@ -562,7 +562,7 @@ No need char
|
||||
|
||||
/[\p{^L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\P{L}]
|
||||
Ket
|
||||
End
|
||||
@@ -574,7 +574,7 @@ No need char
|
||||
|
||||
/[\P{L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\P{L}]
|
||||
Ket
|
||||
End
|
||||
@@ -586,7 +586,7 @@ No need char
|
||||
|
||||
/[\P{^L}]/DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\p{L}]
|
||||
Ket
|
||||
End
|
||||
@@ -598,7 +598,7 @@ No need char
|
||||
|
||||
/[abc\p{L}\x{0660}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[a-c\p{L}\x{660}]
|
||||
Ket
|
||||
End
|
||||
@@ -610,7 +610,7 @@ No need char
|
||||
|
||||
/[\p{Nd}]/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\p{Nd}]
|
||||
Ket
|
||||
End
|
||||
@@ -624,7 +624,7 @@ No need char
|
||||
|
||||
/[\p{Nd}+-]+/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[+\-\p{Nd}]+
|
||||
Ket
|
||||
End
|
||||
@@ -779,7 +779,7 @@ No match
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
NC A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
@@ -791,7 +791,7 @@ No need char
|
||||
|
||||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
@@ -803,7 +803,7 @@ Need char = 176
|
||||
|
||||
/AB\x{1fb0}/8DZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
AB\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
@@ -815,7 +815,7 @@ Need char = 176
|
||||
|
||||
/AB\x{1fb0}/8DZi
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
NC AB\x{1fb0}
|
||||
Ket
|
||||
End
|
||||
@@ -857,7 +857,7 @@ Need char = 'B' (caseless)
|
||||
|
||||
/[\x{105}-\x{109}]/8iDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[\x{104}-\x{109}]
|
||||
Ket
|
||||
End
|
||||
@@ -881,7 +881,7 @@ No match
|
||||
|
||||
/[z-\x{100}]/8iDZ
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[Z\x{39c}\x{178}z-\x{101}]
|
||||
Ket
|
||||
End
|
||||
@@ -919,7 +919,7 @@ No match
|
||||
|
||||
/[z-\x{100}]/8DZi
|
||||
------------------------------------------------------------------
|
||||
Bra 0
|
||||
Bra
|
||||
[Z\x{39c}\x{178}z-\x{101}]
|
||||
Ket
|
||||
End
|
||||
@@ -1452,4 +1452,74 @@ was broken in all cases./
|
||||
123abc\xc4\xc5zz
|
||||
0: abc\xc4
|
||||
|
||||
/\X{1,3}\d/
|
||||
\x8aBCD
|
||||
No match
|
||||
|
||||
/\X?\d/
|
||||
\x8aBCD
|
||||
No match
|
||||
|
||||
/\P{L}?\d/
|
||||
\x8aBCD
|
||||
No match
|
||||
|
||||
/[\PPP\x8a]{1,}\x80/
|
||||
A\x80
|
||||
0: A\x80
|
||||
|
||||
/(?:[\PPa*]*){8,}/
|
||||
|
||||
/[\P{Any}]/BZ
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\P{Any}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[\P{Any}\E]/BZ
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\P{Any}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(\P{Yi}+\277)/
|
||||
|
||||
/(\P{Yi}+\277)?/
|
||||
|
||||
/(?<=\P{Yi}{3}A)X/
|
||||
|
||||
/\p{Yi}+(\P{Yi}+)(?1)/
|
||||
|
||||
/(\P{Yi}{2}\277)?/
|
||||
|
||||
/[\P{Yi}A]/
|
||||
|
||||
/[\P{Yi}\P{Yi}\P{Yi}A]/
|
||||
|
||||
/[^\P{Yi}A]/
|
||||
|
||||
/[^\P{Yi}\P{Yi}\P{Yi}A]/
|
||||
|
||||
/(\P{Yi}*\277)*/
|
||||
|
||||
/(\P{Yi}*?\277)*/
|
||||
|
||||
/(\p{Yi}*+\277)*/
|
||||
|
||||
/(\P{Yi}?\277)*/
|
||||
|
||||
/(\P{Yi}??\277)*/
|
||||
|
||||
/(\p{Yi}?+\277)*/
|
||||
|
||||
/(\P{Yi}{0,3}\277)*/
|
||||
|
||||
/(\P{Yi}{0,3}?\277)*/
|
||||
|
||||
/(\p{Yi}{0,3}+\277)*/
|
||||
|
||||
/ End of testinput6 /
|
||||
|
||||
+16
@@ -7072,4 +7072,20 @@ No match
|
||||
>\x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
0: \x0a\x0dX\x0aY\x0a\x0bZZZ\x0aAAA\x0bNNN\x0c
|
||||
|
||||
/.+A/<crlf>
|
||||
\r\nA
|
||||
No match
|
||||
|
||||
/\nA/<crlf>
|
||||
\r\nA
|
||||
0: \x0aA
|
||||
|
||||
/[\r\n]A/<crlf>
|
||||
\r\nA
|
||||
0: \x0aA
|
||||
|
||||
/(\r|\n)A/<crlf>
|
||||
\r\nA
|
||||
0: \x0aA
|
||||
|
||||
/ End of testinput7 /
|
||||
|
||||
+2
-2
@@ -271,7 +271,7 @@ No match
|
||||
No match
|
||||
|
||||
/^\p{Cs}/8
|
||||
\x{dfff}
|
||||
\?\x{dfff}
|
||||
0: \x{dfff}
|
||||
** Failers
|
||||
No match
|
||||
@@ -285,7 +285,7 @@ No match
|
||||
No match
|
||||
Z
|
||||
No match
|
||||
\x{dfff}
|
||||
\x{e000}
|
||||
No match
|
||||
|
||||
/^\p{Lm}/8
|
||||
|
||||
@@ -23,10 +23,6 @@
|
||||
* - should use fcall info cache (enhancement)
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "php.h"
|
||||
#include "php_ini.h"
|
||||
#include "php_globals.h"
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
--TEST--
|
||||
Bug #42298 (pcre gives bogus results with /u)
|
||||
--FILE--
|
||||
<?php
|
||||
$str = "A\xc2\xa3BC";
|
||||
preg_match_all('/\S\S/u', $str, $m); var_dump($m);
|
||||
preg_match_all('/\S{2}/u', $str, $m); var_dump($m);
|
||||
|
||||
$str = "A\xe2\x82\xac ";
|
||||
preg_match_all('/\W\W/u', $str, $m); var_dump($m);
|
||||
preg_match_all('/\W{2}/u', $str, $m); var_dump($m);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
array(1) {
|
||||
[0]=>
|
||||
array(2) {
|
||||
[0]=>
|
||||
string(3) "A£"
|
||||
[1]=>
|
||||
string(2) "BC"
|
||||
}
|
||||
}
|
||||
array(1) {
|
||||
[0]=>
|
||||
array(2) {
|
||||
[0]=>
|
||||
string(3) "A£"
|
||||
[1]=>
|
||||
string(2) "BC"
|
||||
}
|
||||
}
|
||||
array(1) {
|
||||
[0]=>
|
||||
array(1) {
|
||||
[0]=>
|
||||
string(4) "€ "
|
||||
}
|
||||
}
|
||||
array(1) {
|
||||
[0]=>
|
||||
array(1) {
|
||||
[0]=>
|
||||
string(4) "€ "
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user