upgrade pcre to version 7.0

2026-03-29 19:52:20 +02:00 · 2007-02-09 19:48:47 +00:00
parent e6d69595af
commit b3e66c616d
65 changed files with 10215 additions and 4653 deletions
--- a/1
+++ b/1
@@ -2,6 +2,7 @@ PHP                                                                        NEWS
 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
 ?? ??? 2007, PHP 5.2.2
 - Upgraded SQLite 3 to version 3.3.12 (Ilia)
+- Upgraded PCRE to version 7.0 (Nuno)
 - Add --ri switch to CLI which allows to check extension information. (Marcus)
 - Fixed bug #40410 (ext/posix does not compile on MacOS 10.3.9). (Tony)
 - Fixed bug #39836 (SplObjectStorage empty after unserialize). (Marcus)
--- a/ext/pcre/config.w32
+++ b/ext/pcre/config.w32
@@ -5,8 +5,8 @@ ARG_WITH("pcre-regex", "Perl Compatible Regular Expressions", "yes");

 if (PHP_PCRE_REGEX == "yes") {
 	EXTENSION("pcre", "php_pcre.c",	PHP_PCRE_REGEX_SHARED,
-		"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DNO_RECURSE -Iext/pcre/pcrelib");
-	ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
+		"-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DEBCDIC=0 -DNO_RECURSE -Iext/pcre/pcrelib");
+	ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucp_searchfuncs.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_info.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_try_flipped.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c", "pcre");
 	ADD_DEF_FILE("ext\\pcre\\php_pcre.def");

 	AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
--- a/ext/pcre/config0.m4
+++ b/ext/pcre/config0.m4
@@ -13,7 +13,7 @@ PHP_ARG_WITH(pcre-regex,for PCRE support,

 if test "$PHP_PCRE_REGEX" != "no"; then
  if test "$PHP_PCRE_REGEX" = "yes"; then
-    PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -I@ext_srcdir@/pcrelib)
+    PHP_NEW_EXTENSION(pcre, pcrelib/pcre_chartables.c pcrelib/pcre_ucp_searchfuncs.c pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c pcrelib/pcre_info.c pcrelib/pcre_maketables.c pcrelib/pcre_newline.c pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c pcrelib/pcre_tables.c pcrelib/pcre_try_flipped.c pcrelib/pcre_valid_utf8.c pcrelib/pcre_version.c pcrelib/pcre_xclass.c php_pcre.c, $ext_shared,,-DEXPORT= -DNEWLINE=10 -DSUPPORT_UTF8 -DSUPPORT_UCP -DLINK_SIZE=2 -DPOSIX_MALLOC_THRESHOLD=10 -DMATCH_LIMIT=10000000 -DMATCH_LIMIT_RECURSION=10000000 -DMAX_NAME_SIZE=32 -DMAX_NAME_COUNT=10000 -DMAX_DUPLENGTH=30000 -DEBCDIC=0 -I@ext_srcdir@/pcrelib)
    PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
    PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
    AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
--- a/ext/pcre/pcrelib/COPYING
+++ b/ext/pcre/pcrelib/COPYING
@@ -4,7 +4,7 @@ PCRE LICENCE
 PCRE is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

-Release 6 of PCRE is distributed under the terms of the "BSD" licence, as
+Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
 specified below. The documentation for PCRE, supplied in the "doc"
 directory, is distributed under the same terms as the software itself.

--- a/ext/pcre/pcrelib/ChangeLog
+++ b/ext/pcre/pcrelib/ChangeLog
@@ -1,6 +1,279 @@
 ChangeLog for PCRE
 ------------------

+Version 7.0 19-Dec-06
+---------------------
+
+ 1. Fixed a signed/unsigned compiler warning in pcre_compile.c, shown up by
+    moving to gcc 4.1.1.
+
+ 2. The -S option for pcretest uses setrlimit(); I had omitted to #include
+    sys/time.h, which is documented as needed for this function. It doesn't
+    seem to matter on Linux, but it showed up on some releases of OS X.
+
+ 3. It seems that there are systems where bytes whose values are greater than
+    127 match isprint() in the "C" locale. The "C" locale should be the
+    default when a C program starts up. In most systems, only ASCII printing
+    characters match isprint(). This difference caused the output from pcretest
+    to vary, making some of the tests fail. I have changed pcretest so that:
+
+    (a) When it is outputting text in the compiled version of a pattern, bytes
+        other than 32-126 are always shown as hex escapes.
+
+    (b) When it is outputting text that is a matched part of a subject string,
+        it does the same, unless a different locale has been set for the match
+        (using the /L modifier). In this case, it uses isprint() to decide.
+
+ 4. Fixed a major bug that caused incorrect computation of the amount of memory
+    required for a compiled pattern when options that changed within the
+    pattern affected the logic of the preliminary scan that determines the
+    length. The relevant options are -x, and -i in UTF-8 mode. The result was
+    that the computed length was too small. The symptoms of this bug were
+    either the PCRE error "internal error: code overflow" from pcre_compile(),
+    or a glibc crash with a message such as "pcretest: free(): invalid next
+    size (fast)". Examples of patterns that provoked this bug (shown in
+    pcretest format) are:
+
+      /(?-x: )/x
+      /(?x)(?-x: \s*#\s*)/
+      /((?i)[\x{c0}])/8
+      /(?i:[\x{c0}])/8
+
+    HOWEVER: Change 17 below makes this fix obsolete as the memory computation
+    is now done differently.
+
+ 5. Applied patches from Google to: (a) add a QuoteMeta function to the C++
+    wrapper classes; (b) implement a new function in the C++ scanner that is
+    more efficient than the old way of doing things because it avoids levels of
+    recursion in the regex matching; (c) add a paragraph to the documentation
+    for the FullMatch() function.
+
+ 6. The escape sequence \n was being treated as whatever was defined as
+    "newline". Not only was this contrary to the documentation, which states
+    that \n is character 10 (hex 0A), but it also went horribly wrong when
+    "newline" was defined as CRLF. This has been fixed.
+
+ 7. In pcre_dfa_exec.c the value of an unsigned integer (the variable called c)
+    was being set to -1 for the "end of line" case (supposedly a value that no
+    character can have). Though this value is never used (the check for end of
+    line is "zero bytes in current character"), it caused compiler complaints.
+    I've changed it to 0xffffffff.
+
+ 8. In pcre_version.c, the version string was being built by a sequence of
+    C macros that, in the event of PCRE_PRERELEASE being defined as an empty
+    string (as it is for production releases) called a macro with an empty
+    argument. The C standard says the result of this is undefined. The gcc
+    compiler treats it as an empty string (which was what was wanted) but it is
+    reported that Visual C gives an error. The source has been hacked around to
+    avoid this problem.
+
+ 9. On the advice of a Windows user, included <io.h> and <fcntl.h> in Windows
+    builds of pcretest, and changed the call to _setmode() to use _O_BINARY
+    instead of 0x8000. Made all the #ifdefs test both _WIN32 and WIN32 (not all
+    of them did).
+
+10. Originally, pcretest opened its input and output without "b"; then I was
+    told that "b" was needed in some environments, so it was added for release
+    5.0 to both the input and output. (It makes no difference on Unix-like
+    systems.) Later I was told that it is wrong for the input on Windows. I've
+    now abstracted the modes into two macros, to make it easier to fiddle with
+    them, and removed "b" from the input mode under Windows.
+
+11. Added pkgconfig support for the C++ wrapper library, libpcrecpp.
+
+12. Added -help and --help to pcretest as an official way of being reminded
+    of the options.
+
+13. Removed some redundant semicolons after macro calls in pcrecpparg.h.in
+    and pcrecpp.cc because they annoy compilers at high warning levels.
+
+14. A bit of tidying/refactoring in pcre_exec.c in the main bumpalong loop.
+
+15. Fixed an occurrence of == in configure.ac that should have been = (shell
+    scripts are not C programs :-) and which was not noticed because it works
+    on Linux.
+
+16. pcretest is supposed to handle any length of pattern and data line (as one
+    line or as a continued sequence of lines) by extending its input buffer if
+    necessary. This feature was broken for very long pattern lines, leading to
+    a string of junk being passed to pcre_compile() if the pattern was longer
+    than about 50K.
+
+17. I have done a major re-factoring of the way pcre_compile() computes the
+    amount of memory needed for a compiled pattern. Previously, there was code
+    that made a preliminary scan of the pattern in order to do this. That was
+    OK when PCRE was new, but as the facilities have expanded, it has become
+    harder and harder to keep it in step with the real compile phase, and there
+    have been a number of bugs (see for example, 4 above). I have now found a
+    cunning way of running the real compile function in a "fake" mode that
+    enables it to compute how much memory it would need, while actually only
+    ever using a few hundred bytes of working memory and without too many
+    tests of the mode. This should make future maintenance and development
+    easier. A side effect of this work is that the limit of 200 on the nesting
+    depth of parentheses has been removed (though this was never a serious
+    limitation, I suspect). However, there is a downside: pcre_compile() now
+    runs more slowly than before (30% or more, depending on the pattern). I
+    hope this isn't a big issue. There is no effect on runtime performance.
+
+18. Fixed a minor bug in pcretest: if a pattern line was not terminated by a
+    newline (only possible for the last line of a file) and it was a
+    pattern that set a locale (followed by /Lsomething), pcretest crashed.
+
+19. Added additional timing features to pcretest. (1) The -tm option now times
+    matching only, not compiling. (2) Both -t and -tm can be followed, as a
+    separate command line item, by a number that specifies the number of
+    repeats to use when timing. The default is 50000; this gives better
+    precision, but takes uncomfortably long for very large patterns.
+
+20. Extended pcre_study() to be more clever in cases where a branch of a
+    subpattern has no definite first character. For example, (a*|b*)[cd] would
+    previously give no result from pcre_study(). Now it recognizes that the
+    first character must be a, b, c, or d.
+
+21. There was an incorrect error "recursive call could loop indefinitely" if
+    a subpattern (or the entire pattern) that was being tested for matching an
+    empty string contained only one non-empty item after a nested subpattern.
+    For example, the pattern (?>\x{100}*)\d(?R) provoked this error
+    incorrectly, because the \d was being skipped in the check.
+
+22. The pcretest program now has a new pattern option /B and a command line
+    option -b, which is equivalent to adding /B to every pattern. This causes
+    it to show the compiled bytecode, without the additional information that
+    -d shows. The effect of -d is now the same as -b with -i (and similarly, /D
+    is the same as /B/I).
+
+23. A new optimization is now able automatically to treat some sequences such
+    as a*b as a*+b. More specifically, if something simple (such as a character
+    or a simple class like \d) has an unlimited quantifier, and is followed by
+    something that cannot possibly match the quantified thing, the quantifier
+    is automatically "possessified".
+
+24. A recursive reference to a subpattern whose number was greater than 39
+    went wrong under certain circumstances in UTF-8 mode. This bug could also
+    have affected the operation of pcre_study().
+
+25. Realized that a little bit of performance could be had by replacing
+    (c & 0xc0) == 0xc0 with c >= 0xc0 when processing UTF-8 characters.
+
+26. Timing data from pcretest is now shown to 4 decimal places instead of 3.
+
+27. Possessive quantifiers such as a++ were previously implemented by turning
+    them into atomic groups such as ($>a+). Now they have their own opcodes,
+    which improves performance. This includes the automatically created ones
+    from 23 above.
+
+28. A pattern such as (?=(\w+))\1: which simulates an atomic group using a
+    lookahead was broken if it was not anchored. PCRE was mistakenly expecting
+    the first matched character to be a colon. This applied both to named and
+    numbered groups.
+
+29. The ucpinternal.h header file was missing its idempotency #ifdef.
+
+30. I was sent a "project" file called libpcre.a.dev which I understand makes
+    building PCRE on Windows easier, so I have included it in the distribution.
+
+31. There is now a check in pcretest against a ridiculously large number being
+    returned by pcre_exec() or pcre_dfa_exec(). If this happens in a /g or /G
+    loop, the loop is abandoned.
+
+32. Forward references to subpatterns in conditions such as (?(2)...) where
+    subpattern 2 is defined later cause pcre_compile() to search forwards in
+    the pattern for the relevant set of parentheses. This search went wrong
+    when there were unescaped parentheses in a character class, parentheses
+    escaped with \Q...\E, or parentheses in a #-comment in /x mode.
+
+33. "Subroutine" calls and backreferences were previously restricted to
+    referencing subpatterns earlier in the regex. This restriction has now
+    been removed.
+
+34. Added a number of extra features that are going to be in Perl 5.10. On the
+    whole, these are just syntactic alternatives for features that PCRE had
+    previously implemented using the Python syntax or my own invention. The
+    other formats are all retained for compatibility.
+
+    (a) Named groups can now be defined as (?<name>...) or (?'name'...) as well
+        as (?P<name>...). The new forms, as well as being in Perl 5.10, are
+        also .NET compatible.
+
+    (b) A recursion or subroutine call to a named group can now be defined as
+        (?&name) as well as (?P>name).
+
+    (c) A backreference to a named group can now be defined as \k<name> or
+        \k'name' as well as (?P=name). The new forms, as well as being in Perl
+        5.10, are also .NET compatible.
+
+    (d) A conditional reference to a named group can now use the syntax
+        (?(<name>) or (?('name') as well as (?(name).
+
+    (e) A "conditional group" of the form (?(DEFINE)...) can be used to define
+        groups (named and numbered) that are never evaluated inline, but can be
+        called as "subroutines" from elsewhere. In effect, the DEFINE condition
+        is always false. There may be only one alternative in such a group.
+
+    (f) A test for recursion can be given as (?(R1).. or (?(R&name)... as well
+        as the simple (?(R). The condition is true only if the most recent
+        recursion is that of the given number or name. It does not search out
+        through the entire recursion stack.
+
+    (g) The escape \gN or \g{N} has been added, where N is a positive or
+        negative number, specifying an absolute or relative reference.
+
+35. Tidied to get rid of some further signed/unsigned compiler warnings and
+    some "unreachable code" warnings.
+
+36. Updated the Unicode property tables to Unicode version 5.0.0. Amongst other
+    things, this adds five new scripts.
+
+37. Perl ignores orphaned \E escapes completely. PCRE now does the same.
+    There were also incompatibilities regarding the handling of \Q..\E inside
+    character classes, for example with patterns like [\Qa\E-\Qz\E] where the
+    hyphen was adjacent to \Q or \E. I hope I've cleared all this up now.
+
+38. Like Perl, PCRE detects when an indefinitely repeated parenthesized group
+    matches an empty string, and forcibly breaks the loop. There were bugs in
+    this code in non-simple cases. For a pattern such as  ^(a()*)*  matched
+    against  aaaa  the result was just "a" rather than "aaaa", for example. Two
+    separate and independent bugs (that affected different cases) have been
+    fixed.
+
+39. Refactored the code to abolish the use of different opcodes for small
+    capturing bracket numbers. This is a tidy that I avoided doing when I
+    removed the limit on the number of capturing brackets for 3.5 back in 2001.
+    The new approach is not only tidier, it makes it possible to reduce the
+    memory needed to fix the previous bug (38).
+
+40. Implemented PCRE_NEWLINE_ANY to recognize any of the Unicode newline
+    sequences (http://unicode.org/unicode/reports/tr18/) as "newline" when
+    processing dot, circumflex, or dollar metacharacters, or #-comments in /x
+    mode.
+
+41. Add \R to match any Unicode newline sequence, as suggested in the Unicode
+    report.
+
+42. Applied patch, originally from Ari Pollak, modified by Google, to allow
+    copy construction and assignment in the C++ wrapper.
+
+43. Updated pcregrep to support "--newline=any". In the process, I fixed a
+    couple of bugs that could have given wrong results in the "--newline=crlf"
+    case.
+
+44. Added a number of casts and did some reorganization of signed/unsigned int
+    variables following suggestions from Dair Grant. Also renamed the variable
+    "this" as "item" because it is a C++ keyword.
+
+45. Arranged for dftables to add
+
+      #include "pcre_internal.h"
+
+    to pcre_chartables.c because without it, gcc 4.x may remove the array
+    definition from the final binary if PCRE is built into a static library and
+    dead code stripping is activated.
+
+46. For an unanchored pattern, if a match attempt fails at the start of a
+    newline sequence, and the newline setting is CRLF or ANY, and the next two
+    characters are CRLF, advance by two characters instead of one.
+
+
 Version 6.7 04-Jul-06
 ---------------------

--- a/ext/pcre/pcrelib/LICENCE
+++ b/ext/pcre/pcrelib/LICENCE
@@ -4,7 +4,7 @@ PCRE LICENCE
 PCRE is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.

-Release 6 of PCRE is distributed under the terms of the "BSD" licence, as
+Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
 specified below. The documentation for PCRE, supplied in the "doc"
 directory, is distributed under the same terms as the software itself.

--- a/ext/pcre/pcrelib/NEWS
+++ b/ext/pcre/pcrelib/NEWS
@@ -1,6 +1,36 @@
 News about PCRE releases
 ------------------------

+Release 7.0 23-Nov-06
+---------------------
+
+This release has a new major number because there have been some internal
+upheavals to facilitate the addition of new optimizations and other facilities,
+and to make subsequent maintenance and extension easier. Compilation is likely
+to be a bit slower, but there should be no major effect on runtime performance.
+Previously compiled patterns are NOT upwards compatible with this release. If
+you have saved compiled patterns from a previous release, you will have to
+re-compile them. Important changes that are visible to users are:
+
+1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
+   some more scripts.
+
+2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
+   sequence as a newline.
+
+3. The \R escape matches a single Unicode newline sequence as a single unit.
+
+4. New features that will appear in Perl 5.10 are now in PCRE. These include
+   alternative Perl syntax for named parentheses, and Perl syntax for
+   recursion.
+
+5. The C++ wrapper interface has been extended by the addition of a
+   QuoteMeta function and the ability to allow copy construction and
+   assignment.
+
+For a complete list of changes, see the ChangeLog file.
+
+
 Release 6.7 04-Jul-06
 ---------------------

--- a/ext/pcre/pcrelib/NON-UNIX-USE
+++ b/ext/pcre/pcrelib/NON-UNIX-USE
@@ -22,7 +22,7 @@ The following are generic comments about building PCRE. The interspersed
 indented commands are suggestions from Mark Tetrode as to which commands you
 might use on a Windows system to build a static library.

-(1) Copy or rename the file config.in as config.h, and change the macros that
+(1) Copy or rename the file config.h.in as config.h, and change the macros that
 define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0.
 Unfortunately, because of the way Unix autoconf works, the default setting has
 to be 0. You may also want to make changes to other macros in config.h. In
@@ -31,7 +31,7 @@ the NEWLINE macro. The default is to use '\n', thereby using whatever value
 your compiler gives to '\n'.

  rem Mark Tetrode's commands
-  copy config.in config.h
+  copy config.h.in config.h
  rem Use write, because notepad cannot handle UNIX files. Change values.
  write config.h

@@ -56,6 +56,7 @@ character tables and writes them to that file.
  pcre_globals.c
  pcre_info.c
  pcre_maketables.c
+  pcre_newline.c
  pcre_ord2utf8.c
  pcre_refcount.c
  pcre_study.c
@@ -93,10 +94,10 @@ pcre and pcreposix libraries when linking.
  cl /F0x400000 pcretest.c pcre.lib pcreposix.lib

 (6) Run pcretest on the testinput files in the testdata directory, and check
-that the output matches the corresponding testoutput files. You must use the
-i option when checking testinput2. Note that the supplied files are in Unix
-format, with just LF characters as line terminators. You may need to edit them
-to change this if your system uses a different convention.
+that the output matches the corresponding testoutput files. Note that the
+supplied files are in Unix format, with just LF characters as line terminators.
+You may need to edit them to change this if your system uses a different
+convention.

  rem Mark Tetrode's commands
  pcretest testdata\testinput1 testdata\myoutput1
@@ -135,6 +136,17 @@ If you have a system without "configure" but where you can use a Makefile, edit
 Makefile.in to create Makefile, substituting suitable values for the variables
 at the head of the file.

+Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
+
+  Some of the core BCC libraries have a version of PCRE from 1998 built in,
+  which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
+  version mismatch. I'm including an easy workaround below, if you'd like to
+  include it in the non-unix instructions:
+
+  When linking a project with BCC5.5, pcre.lib must be included before any of
+  the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
+  line.
+
 Some help in building a Win32 DLL of PCRE in GnuWin32 environments was
 contributed by Paul Sokolovsky. These environments are Mingw32
 (http://www.xraylith.wisc.edu/~khan/software/gnu-win32/) and CygWin
--- a/ext/pcre/pcrelib/README
+++ b/ext/pcre/pcrelib/README
@@ -118,13 +118,13 @@ library. You can read more about them in the pcrebuild man page.
  property table); only the basic two-letter properties such as Lu are
  supported.

-. You can build PCRE to recognize either CR or LF or the sequence CRLF as
-  indicating the end of a line. Whatever you specify at build time is the
-  default; the caller of PCRE can change the selection at run time. The default
-  newline indicator is a single LF character (the Unix standard). You can
-  specify the default newline indicator by adding --newline-is-cr or
-  --newline-is-lf or --newline-is-crlf to the "configure" command,
-  respectively.
+. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
+  of the Unicode newline sequences as indicating the end of a line. Whatever
+  you specify at build time is the default; the caller of PCRE can change the
+  selection at run time. The default newline indicator is a single LF character
+  (the Unix standard). You can specify the default newline indicator by adding
+  --newline-is-cr or --newline-is-lf or --newline-is-crlf or --newline-is-any
+  to the "configure" command, respectively.

 . When called via the POSIX interface, PCRE uses malloc() to get additional
  storage for processing capturing parentheses if there are more than 10 of
@@ -283,7 +283,7 @@ to the values of CC and CFLAGS.
 Using HP's ANSI C++ compiler (aCC)
 ----------------------------------

-Unless C++ support is disabled by specifiying the "--disable-cpp" option of the
+Unless C++ support is disabled by specifying the "--disable-cpp" option of the
 "configure" script, you *must* include the "-AA" option in the CXXFLAGS
 environment variable in order for the C++ components to compile correctly.

@@ -305,8 +305,8 @@ PCRE in the same way as for Unix systems.

 PCRE has been compiled on Windows systems and on Macintoshes, but I don't know
 the details because I don't use those systems. It should be straightforward to
-build PCRE on any system that has a Standard C compiler, because it uses only
-Standard C functions.
+build PCRE on any system that has a Standard C compiler and library, because it
+uses only Standard C functions.


 Testing PCRE
@@ -325,15 +325,15 @@ NON-UNIX-USE.
 The RunTest script runs the pcretest test program (which is documented in its
 own man page) on each of the testinput files (in the testdata directory) in
 turn, and compares the output with the contents of the corresponding testoutput
-file. A file called testtry is used to hold the main output from pcretest
+files. A file called testtry is used to hold the main output from pcretest
 (testsavedregex is also used as a working file). To run pcretest on just one of
 the test files, give its number as an argument to RunTest, for example:

  RunTest 2

-The first file can also be fed directly into the perltest script to check that
-Perl gives the same results. The only difference you should see is in the first
-few lines, where the Perl version is given instead of the PCRE version.
+The first test file can also be fed directly into the perltest script to check
+that Perl gives the same results. The only difference you should see is in the
+first few lines, where the Perl version is given instead of the PCRE version.

 The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
 pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
@@ -442,6 +442,7 @@ The distribution should contain the following files:
  pcre_globals.c        )   and some internal functions that they use
  pcre_info.c           )
  pcre_maketables.c     )
+  pcre_newline.c        )
  pcre_ord2utf8.c       )
  pcre_refcount.c       )
  pcre_study.c          )
@@ -525,4 +526,4 @@ The distribution should contain the following files:
 Philip Hazel
 Email local part: ph10
 Email domain: cam.ac.uk
-June 2006
+November 2006
--- a/ext/pcre/pcrelib/dftables.c
+++ b/ext/pcre/pcrelib/dftables.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -86,7 +86,16 @@ fprintf(f,
 fprintf(f,
  "This file contains the default tables for characters with codes less than\n"
  "128 (ASCII characters). These tables are used when no external tables are\n"
-  "passed to PCRE. */\n\n"
+  "passed to PCRE.\n\n");
+fprintf(f,
+  "The following #include is present because without it gcc 4.x may remove\n"
+  "the array definition from the final binary if PCRE is built into a static\n"
+  "library and dead code stripping is activated. This leads to link errors.\n"
+  "Pulling in the header ensures that the array gets flagged as \"someone\n"
+  "outside this compilation unit might reference this\" and so it will always\n"
+  "be supplied to the linker. */\n\n"
+  "#include \"pcre_internal.h\"\n\n");
+fprintf(f,
  "const unsigned char _pcre_default_tables[] = {\n\n"
  "/* This table is a lower casing table. */\n\n");

--- a/ext/pcre/pcrelib/doc/Tech.Notes
+++ b/ext/pcre/pcrelib/doc/Tech.Notes
@@ -16,10 +16,11 @@ not operate by backtracking, as the original Henry Spencer code and current
 Perl code does, but instead checked all possibilities simultaneously by keeping
 a list of current states and checking all of them as it advanced through the
 subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
-algorithm". When the pattern was all used up, all remaining states were
-possible matches, and the one matching the longest subset of the subject string
-was chosen. This did not necessarily maximize the individual wild portions of
-the pattern, as is expected in Unix and Perl-style regular expressions.
+algorithm", though it was not a traditional Finite State Machine (FSM). When
+the pattern was all used up, all remaining states were possible matches, and
+the one matching the longest subset of the subject string was chosen. This did
+not necessarily maximize the individual wild portions of the pattern, as is
+expected in Unix and Perl-style regular expressions.

 Historical note 2
 -----------------
@@ -41,14 +42,38 @@ unrelated to those mentioned above), I tried at first to invent an algorithm
 that used an amount of store bounded by a multiple of the number of characters
 in the pattern, to save on compiling time. However, because of the greater
 complexity in Perl regular expressions, I couldn't do this. In any case, a
-first pass through the pattern is needed, for a number of reasons. PCRE works
-by running a very degenerate first pass to calculate a maximum store size, and
-then a second pass to do the real compile - which may use a bit less than the
-predicted amount of store. The idea is that this is going to turn out faster
-because the first pass is degenerate and the second pass can just store stuff
-straight into the vector, which it knows is big enough. It does make the
-compiling functions bigger, of course, but they have become quite big anyway to
-handle all the Perl stuff.
+first pass through the pattern is helpful for other reasons. 
+
+Computing the memory requirement: how it was
+--------------------------------------------
+
+Up to and including release 6.7, PCRE worked by running a very degenerate first
+pass to calculate a maximum store size, and then a second pass to do the real
+compile - which might use a bit less than the predicted amount of memory. The
+idea was that this would turn out faster than the Henry Spencer code because
+the first pass is degenerate and the second pass can just store stuff straight
+into the vector, which it knows is big enough.
+
+Computing the memory requirement: how it is
+-------------------------------------------
+
+By the time I was working on a potential 6.8 release, the degenerate first pass
+had become very complicated and hard to maintain. Indeed one of the early
+things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
+I had a flash of inspiration as to how I could run the real compile function in
+a "fake" mode that enables it to compute how much memory it would need, while
+actually only ever using a few hundred bytes of working memory, and without too
+many tests of the mode that might slow it down. So I re-factored the compiling
+functions to work this way. This got rid of about 600 lines of source. It
+should make future maintenance and development easier. As this was such a major 
+change, I never released 6.8, instead upping the number to 7.0 (other quite 
+major changes are also present in the 7.0 release).
+
+A side effect of this work is that the previous limit of 200 on the nesting
+depth of parentheses was removed. However, there is a downside: pcre_compile()
+runs more slowly than before (30% or more, depending on the pattern) because it
+is doing a full analysis of the pattern. My hope is that this is not a big
+issue.

 Traditional matching function
 -----------------------------
@@ -70,6 +95,12 @@ intreprets the same compiled pattern data as pcre_exec(); however, not all the
 facilities are available, and those that are do not always work in quite the 
 same way. See the user documentation for details.

+The algorithm that is used for pcre_dfa_exec() is not a traditional FSM, 
+because it may have a number of states active at one time. More work would be 
+needed at compile time to produce a traditional FSM where only one state is 
+ever active at once. I believe some other regex matchers work this way.
+
+
 Format of compiled patterns
 ---------------------------

@@ -79,10 +110,12 @@ item is either implicit in the opcode or contained in the data bytes that
 follow it. 

 In many cases below "two-byte" data values are specified. This is in fact just
-a default. PCRE can be compiled to use 3-byte or 4-byte values (impairing the
+a default when the number is an offset within the compiled pattern. PCRE can be
+compiled to use 3-byte or 4-byte values for these offsets (impairing the
 performance). This is necessary only when patterns whose compiled length is
-greater than 64K are going to be processed. In this description, we assume the 
-"normal" compilation options.
+greater than 64K are going to be processed. In this description, we assume the
+"normal" compilation options. "Two-byte" data values that are counts (e.g. for 
+quantifiers) are always just two bytes.

 A list of all the opcodes follows:

@@ -109,6 +142,7 @@ These items are all just one byte long
  OP_EOD                 match end of data: \z
  OP_DOLL                $ (end of data, or before \n in multiline)
  OP_EXTUNI              match an extended Unicode character 
+  OP_ANYNL               match any Unicode newline sequence 
  

 Repeating single characters
@@ -119,23 +153,28 @@ following opcodes:

  OP_STAR
  OP_MINSTAR
+  OP_POSSTAR 
  OP_PLUS
  OP_MINPLUS
+  OP_POSPLUS 
  OP_QUERY
  OP_MINQUERY
+  OP_POSQUERY 

 In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
-Those with "MIN" in their name are the minimizing versions. Each is followed by
-the character that is to be repeated. Other repeats make use of
+Those with "MIN" in their name are the minimizing versions. Those with "POS" in 
+their names are possessive versions. Each is followed by the character that is
+to be repeated. Other repeats make use of

  OP_UPTO
  OP_MINUPTO
+  OP_POSUPTO 
  OP_EXACT

 which are followed by a two-byte count (most significant first) and the
 repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
 non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
-OP_UPTO (or OP_MINUPTO).
+OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).


 Repeating character types
@@ -147,12 +186,16 @@ byte. The opcodes are:

  OP_TYPESTAR
  OP_TYPEMINSTAR
+  OP_TYPEPOSSTAR 
  OP_TYPEPLUS
  OP_TYPEMINPLUS
+  OP_TYPEPOSPLUS 
  OP_TYPEQUERY
  OP_TYPEMINQUERY
+  OP_TYPEPOSQUERY 
  OP_TYPEUPTO
  OP_TYPEMINUPTO
+  OP_TYPEPOSUPTO 
  OP_TYPEEXACT


@@ -216,9 +259,10 @@ OP_REF is followed by two bytes containing the reference number.
 Repeating character classes and back references
 -----------------------------------------------

-Single-character classes are handled specially (see above). This applies to
-OP_CLASS and OP_REF. In both cases, the repeat information follows the base
-item. The matching code looks at the following opcode to see if it is one of
+Single-character classes are handled specially (see above). This section
+applies to OP_CLASS and OP_REF. In both cases, the repeat information follows
+the base item. The matching code looks at the following opcode to see if it is
+one of

  OP_CRSTAR
  OP_CRMINSTAR
@@ -230,7 +274,9 @@ item. The matching code looks at the following opcode to see if it is one of
  OP_CRMINRANGE

 All but the last two are just single-byte items. The others are followed by
-four bytes of data, comprising the minimum and maximum repeat counts.
+four bytes of data, comprising the minimum and maximum repeat counts. There are 
+no special possessive opcodes for these repeats; a possessive repeat is 
+compiled into an atomic group.


 Brackets and alternation
@@ -239,29 +285,25 @@ Brackets and alternation
 A pair of non-capturing (round) brackets is wrapped round each expression at
 compile time, so alternation always happens in the context of brackets.

-Non-capturing brackets use the opcode OP_BRA, while capturing brackets use
-OP_BRA+1, OP_BRA+2, etc. [Note for North Americans: "bracket" to some English
-speakers, including myself, can be round, square, curly, or pointy. Hence this
-usage.]
+[Note for North Americans: "bracket" to some English speakers, including
+myself, can be round, square, curly, or pointy. Hence this usage.]

-Originally PCRE was limited to 99 capturing brackets (so as not to use up all
-the opcodes). From release 3.5, there is no limit. What happens is that the
-first ones, up to EXTRACT_BASIC_MAX are handled with separate opcodes, as
-above. If there are more, the opcode is set to EXTRACT_BASIC_MAX+1, and the
-first operation in the bracket is OP_BRANUMBER, followed by a 2-byte bracket
-number. This opcode is ignored while matching, but is fished out when handling
-the bracket itself. (They could have all been done like this, but I was making
-minimal changes.)
+Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
+capturing brackets and it used a different opcode for each one. From release
+3.5, the limit was removed by putting the bracket number into the data for
+higher-numbered brackets. From release 7.0 all capturing brackets are handled
+this way, using the single opcode OP_CBRA.

 A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
 next alternative OP_ALT or, if there aren't any branches, to the matching
 OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
-the next one, or to the OP_KET opcode.
+the next one, or to the OP_KET opcode. For capturing brackets, the bracket 
+number immediately follows the offset, always as a 2-byte item.

 OP_KET is used for subpatterns that do not repeat indefinitely, while
 OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
 maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
-positive number) the offset back to the matching OP_BRA opcode.
+positive number) the offset back to the matching bracket opcode.

 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO or OP_BRAMINZERO. These are single-byte
@@ -276,7 +318,14 @@ as appropriate.
 A subpattern with a bounded maximum repetition is replicated in a nested
 fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
 before each replication after the minimum, so that, for example, (abc){2,5} is
-compiled as (abc)(abc)((abc)((abc)(abc)?)?)?.
+compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group 
+has the same number.
+
+When a repeated subpattern has an unbounded upper limit, it is checked to see 
+whether it could match an empty string. If this is the case, the opcode in the 
+final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
+that it needs to check for matching an empty string when it hits OP_KETRMIN or
+OP_KETRMAX, and if so, to break the loop.


 Assertions
@@ -292,22 +341,27 @@ each alternative of a lookbehind assertion, allowing them to have different
 fixed lengths.


-Once-only subpatterns
---------------------
+Once-only (atomic) subpatterns
+------------------------------

 These are also just like other subpatterns, but they start with the opcode
-OP_ONCE.
+OP_ONCE. The check for matching an empty string in an unbounded repeat is 
+handled entirely at runtime, so there is just this one opcode.


 Conditional subpatterns
 -----------------------

-These are like other subpatterns, but they start with the opcode OP_COND. If
+These are like other subpatterns, but they start with the opcode OP_COND, or
+OP_SCOND for one that might match an empty string in an unbounded repeat. If
 the condition is a back reference, this is stored at the start of the
 subpattern using the opcode OP_CREF followed by two bytes containing the
-reference number. If the condition is "in recursion" (coded as "(?(R)"), the
-same scheme is used, with a "reference number" of 0xffff. Otherwise, a
-conditional subpattern always starts with one of the assertions.
+reference number. If the condition is "in recursion" (coded as "(?(R)"), or "in
+recursion of group x" (coded as "(?(Rx)"), the group number is stored at the
+start of the subpattern using the opcode OP_RREF, and a value of zero for "the
+whole pattern". For a DEFINE condition, just the single byte OP_DEF is used (it
+has no associated data). Otherwise, a conditional subpattern always starts with
+one of the assertions.


 Recursion
@@ -345,4 +399,4 @@ at compile time, and so does not cause anything to be put into the compiled
 data.

 Philip Hazel
-June 2006
+November 2006
--- a/ext/pcre/pcrelib/doc/pcre.txt
+++ b/ext/pcre/pcrelib/doc/pcre.txt
--- a/ext/pcre/pcrelib/pcre.h
+++ b/ext/pcre/pcrelib/pcre.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.

-           Copyright (c) 1997-2005 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.

 #ifndef _PCRE_H
 #define _PCRE_H
- 
+
 #include "php_compat.h"

 /* The current PCRE version information. */
@@ -54,10 +54,10 @@ and libpcre.pc. The values are not put into configure.ac and substituted here
 cannot run ./configure. As it now stands, this file need not be edited in that
 circumstance. */

-#define PCRE_MAJOR          6
-#define PCRE_MINOR          7
+#define PCRE_MAJOR          7
+#define PCRE_MINOR          0
 #define PCRE_PRERELEASE
-#define PCRE_DATE           04-Jul-2006
+#define PCRE_DATE           18-Dec-2006

 /* Win32 uses DLL by default; it needs special stuff for exported functions
 when building PCRE. */
@@ -120,6 +120,7 @@ extern "C" {
 #define PCRE_NEWLINE_CR         0x00100000
 #define PCRE_NEWLINE_LF         0x00200000
 #define PCRE_NEWLINE_CRLF       0x00300000
+#define PCRE_NEWLINE_ANY        0x00400000

 /* Exec-time and get/set-time error codes */

@@ -127,7 +128,8 @@ extern "C" {
 #define PCRE_ERROR_NULL            (-2)
 #define PCRE_ERROR_BADOPTION       (-3)
 #define PCRE_ERROR_BADMAGIC        (-4)
-#define PCRE_ERROR_UNKNOWN_NODE    (-5)
+#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
+#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
 #define PCRE_ERROR_NOMEMORY        (-6)
 #define PCRE_ERROR_NOSUBSTRING     (-7)
 #define PCRE_ERROR_MATCHLIMIT      (-8)
@@ -144,6 +146,8 @@ extern "C" {
 #define PCRE_ERROR_DFA_WSSIZE     (-19)
 #define PCRE_ERROR_DFA_RECURSE    (-20)
 #define PCRE_ERROR_RECURSIONLIMIT (-21)
+#define PCRE_ERROR_NULLWSLIMIT    (-22)
+#define PCRE_ERROR_BADNEWLINE     (-23)

 /* Request types for pcre_fullinfo() */

--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
--- a/ext/pcre/pcrelib/pcre_config.c
+++ b/ext/pcre/pcrelib/pcre_config.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
--- a/ext/pcre/pcrelib/pcre_fullinfo.c
+++ b/ext/pcre/pcrelib/pcre_fullinfo.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_get.c
+++ b/ext/pcre/pcrelib/pcre_get.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -436,7 +436,6 @@ pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
 int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
-
 }


--- a/ext/pcre/pcrelib/pcre_globals.c
+++ b/ext/pcre/pcrelib/pcre_globals.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -51,6 +51,18 @@ differently, and global variables are not used (see pcre.in). */


 #ifndef VPCOMPAT
+
+/**************************************************************************
+This code used to be here for use when compiling as a C++ library. However,
+according to Dair Grant it is not needed: "
+
+ Including 'extern "C"' in the declaration generates an "initialized and
+ declared `extern'" warning from gcc 4.0.1. Since we include pcre_internal.h,
+ which includes pcre.h, which declares these prototypes within an extern "C" {}
+ block, we shouldn't need the prefix here.
+
+So, from Release 7.0 I have cut this out.
+
 #ifdef __cplusplus
 extern "C" void *(*pcre_malloc)(size_t) = malloc;
 extern "C" void  (*pcre_free)(void *) = free;
@@ -58,12 +70,13 @@ extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;
 extern "C" void  (*pcre_stack_free)(void *) = free;
 extern "C" int   (*pcre_callout)(pcre_callout_block *) = NULL;
 #else
+**************************************************************************/
+
 void *(*pcre_malloc)(size_t) = malloc;
 void  (*pcre_free)(void *) = free;
 void *(*pcre_stack_malloc)(size_t) = malloc;
 void  (*pcre_stack_free)(void *) = free;
 int   (*pcre_callout)(pcre_callout_block *) = NULL;
 #endif
-#endif

 /* End of pcre_globals.c */
--- a/ext/pcre/pcrelib/pcre_info.c
+++ b/ext/pcre/pcrelib/pcre_info.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_internal.h
+++ b/ext/pcre/pcrelib/pcre_internal.h
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -54,12 +54,16 @@ functions whose names all begin with "_pcre_". */
 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
 inline, and there are *still* stupid compilers about that don't like indented
 pre-processor statements, or at least there were when I first wrote this. After
-all, it had only been about 10 years then... */
+all, it had only been about 10 years then...

+It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
+be absolutely sure we get our version. */
+
+#undef DPRINTF
 #ifdef DEBUG
 #define DPRINTF(p) printf p
 #else
-#define DPRINTF(p) /*nothing*/
+#define DPRINTF(p) /* Nothing */
 #endif


@@ -118,13 +122,48 @@ Unix, where it is defined in sys/types, so use "uschar" instead. */

 typedef unsigned char uschar;

-/* PCRE is able to support 3 different kinds of newline (CR, LF, CRLF). The
-following macro is used to package up testing for newlines. NLBLOCK is defined
-in the various modules to indicate in which datablock the parameters exist. */
+/* This is an unsigned int value that no character can ever have. UTF-8
+characters only go up to 0x7fffffff (though Unicode doesn't go beyond
+0x0010ffff). */
+
+#define NOTACHAR 0xffffffff
+
+/* PCRE is able to support several different kinds of newline (CR, LF, CRLF,
+and "all" at present). The following macros are used to package up testing for
+newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to
+indicate in which datablock the parameters exist, and what the start/end of
+string field names are. */
+
+#define NLTYPE_FIXED   0     /* Newline is a fixed length string */
+#define NLTYPE_ANY     1     /* Newline is any Unicode line ending */
+
+/* This macro checks for a newline at the given position */

 #define IS_NEWLINE(p) \
-  ((p)[0] == NLBLOCK->nl[0] && \
-  (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]))
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) < NLBLOCK->PSEND && \
+     _pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \
+    ) \
+    : \
+    ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
+     (p)[0] == NLBLOCK->nl[0] && \
+     (NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
+    ) \
+  )
+
+/* This macro checks for a newline immediately preceding the given position */
+
+#define WAS_NEWLINE(p) \
+  ((NLBLOCK->nltype != NLTYPE_FIXED)? \
+    ((p) > NLBLOCK->PSSTART && \
+     _pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \
+    ) \
+    : \
+    ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
+     (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
+     (NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
+    ) \
+  )

 /* When PCRE is compiled as a C++ library, the subject pointer can be replaced
 with a custom type. This makes it possible, for example, to allow pcre_exec()
@@ -282,7 +321,7 @@ we know we are in UTF-8 mode. */

 #define GETCHAR(c, eptr) \
  c = *eptr; \
-  if ((c & 0xc0) == 0xc0) \
+  if (c >= 0xc0) \
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
@@ -300,7 +339,7 @@ pointer. */

 #define GETCHARTEST(c, eptr) \
  c = *eptr; \
-  if (utf8 && (c & 0xc0) == 0xc0) \
+  if (utf8 && c >= 0xc0) \
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
@@ -318,7 +357,7 @@ know we are in UTF-8 mode. */

 #define GETCHARINC(c, eptr) \
  c = *eptr++; \
-  if ((c & 0xc0) == 0xc0) \
+  if (c >= 0xc0) \
    { \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
@@ -334,7 +373,7 @@ know we are in UTF-8 mode. */

 #define GETCHARINCTEST(c, eptr) \
  c = *eptr++; \
-  if (utf8 && (c & 0xc0) == 0xc0) \
+  if (utf8 && c >= 0xc0) \
    { \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
    int gcss = 6*gcaa; \
@@ -351,7 +390,7 @@ if there are extra bytes. This is called when we know we are in UTF-8 mode. */

 #define GETCHARLEN(c, eptr, len) \
  c = *eptr; \
-  if ((c & 0xc0) == 0xc0) \
+  if (c >= 0xc0) \
    { \
    int gcii; \
    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
@@ -404,20 +443,21 @@ bits. */
 /* Masks for identifying the public options that are permitted at compile
 time, run time, or study time, respectively. */

+#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY)
+
 #define PUBLIC_OPTIONS \
  (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
   PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
   PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
-   PCRE_DUPNAMES|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
+   PCRE_DUPNAMES|PCRE_NEWLINE_BITS)

 #define PUBLIC_EXEC_OPTIONS \
  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)
+   PCRE_PARTIAL|PCRE_NEWLINE_BITS)

 #define PUBLIC_DFA_EXEC_OPTIONS \
  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_CR| \
-   PCRE_NEWLINE_LF)
+   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS)

 #define PUBLIC_STUDY_OPTIONS 0   /* None defined */

@@ -449,9 +489,7 @@ typedef int BOOL;
 #define FALSE   0
 #define TRUE    1

-/* Escape items that are just an encoding of a particular data value. Note that
-ESC_n is defined as yet another macro, which is set in config.h to either \n
-(the default) or \r (which some people want). */
+/* Escape items that are just an encoding of a particular data value. */

 #ifndef ESC_e
 #define ESC_e 27
@@ -462,7 +500,7 @@ ESC_n is defined as yet another macro, which is set in config.h to either \n
 #endif

 #ifndef ESC_n
-#define ESC_n NEWLINE
+#define ESC_n '\n'
 #endif

 #ifndef ESC_r
@@ -501,21 +539,28 @@ value such as \n. They must have non-zero values, as check_escape() returns
 their negation. Also, they must appear in the same order as in the opcode
 definitions below, up to ESC_z. There's a dummy for OP_ANY because it
 corresponds to "." rather than an escape sequence. The final one must be
-ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
-tests in the code for an escape greater than ESC_b and less than ESC_Z to
-detect the types that may be repeated. These are the types that consume
-characters. If any new escapes are put in between that don't consume a
+ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
+There are two tests in the code for an escape greater than ESC_b and less than
+ESC_Z to detect the types that may be repeated. These are the types that
+consume characters. If any new escapes are put in between that don't consume a
 character, that code will have to change. */

 enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
-       ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,
-       ESC_Q, ESC_REF };
+       ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,
+       ESC_E, ESC_Q, ESC_k, ESC_REF };
+

 /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
 that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
 OP_EOD must correspond in order to the list of escapes immediately above.
-Note that whenever this list is updated, the two macro definitions that follow
-must also be updated to match. */
+
+To keep stored, compiled patterns compatible, new opcodes should be added
+immediately before OP_BRA, where (since release 7.0) a gap is left for this
+purpose.
+
+*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
+that follow must also be updated to match. There is also a table called
+"coptable" in pcre_dfa_exec.c that must be updated. */

 enum {
  OP_END,            /* 0 End of pattern */
@@ -536,111 +581,123 @@ enum {
  OP_ANYBYTE,        /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
  OP_NOTPROP,        /* 13 \P (not Unicode property) */
  OP_PROP,           /* 14 \p (Unicode property) */
-  OP_EXTUNI,         /* 15 \X (extended Unicode sequence */
-  OP_EODN,           /* 16 End of data or \n at end of data: \Z. */
-  OP_EOD,            /* 17 End of data: \z */
+  OP_ANYNL,          /* 15 \R (any newline sequence) */
+  OP_EXTUNI,         /* 16 \X (extended Unicode sequence */
+  OP_EODN,           /* 17 End of data or \n at end of data: \Z. */
+  OP_EOD,            /* 18 End of data: \z */

-  OP_OPT,            /* 18 Set runtime options */
-  OP_CIRC,           /* 19 Start of line - varies with multiline switch */
-  OP_DOLL,           /* 20 End of line - varies with multiline switch */
-  OP_CHAR,           /* 21 Match one character, casefully */
-  OP_CHARNC,         /* 22 Match one character, caselessly */
-  OP_NOT,            /* 23 Match one character, not the following one */
+  OP_OPT,            /* 19 Set runtime options */
+  OP_CIRC,           /* 20 Start of line - varies with multiline switch */
+  OP_DOLL,           /* 21 End of line - varies with multiline switch */
+  OP_CHAR,           /* 22 Match one character, casefully */
+  OP_CHARNC,         /* 23 Match one character, caselessly */
+  OP_NOT,            /* 24 Match one character, not the following one */

-  OP_STAR,           /* 24 The maximizing and minimizing versions of */
-  OP_MINSTAR,        /* 25 all these opcodes must come in pairs, with */
-  OP_PLUS,           /* 26 the minimizing one second. */
-  OP_MINPLUS,        /* 27 This first set applies to single characters */
-  OP_QUERY,          /* 28 */
-  OP_MINQUERY,       /* 29 */
-  OP_UPTO,           /* 30 From 0 to n matches */
-  OP_MINUPTO,        /* 31 */
-  OP_EXACT,          /* 32 Exactly n matches */
+  OP_STAR,           /* 25 The maximizing and minimizing versions of */
+  OP_MINSTAR,        /* 26 these six opcodes must come in pairs, with */
+  OP_PLUS,           /* 27 the minimizing one second. */
+  OP_MINPLUS,        /* 28 This first set applies to single characters.*/
+  OP_QUERY,          /* 29 */
+  OP_MINQUERY,       /* 30 */

-  OP_NOTSTAR,        /* 33 The maximizing and minimizing versions of */
-  OP_NOTMINSTAR,     /* 34 all these opcodes must come in pairs, with */
-  OP_NOTPLUS,        /* 35 the minimizing one second. */
-  OP_NOTMINPLUS,     /* 36 This set applies to "not" single characters */
-  OP_NOTQUERY,       /* 37 */
-  OP_NOTMINQUERY,    /* 38 */
-  OP_NOTUPTO,        /* 39 From 0 to n matches */
-  OP_NOTMINUPTO,     /* 40 */
-  OP_NOTEXACT,       /* 41 Exactly n matches */
+  OP_UPTO,           /* 31 From 0 to n matches */
+  OP_MINUPTO,        /* 32 */
+  OP_EXACT,          /* 33 Exactly n matches */

-  OP_TYPESTAR,       /* 42 The maximizing and minimizing versions of */
-  OP_TYPEMINSTAR,    /* 43 all these opcodes must come in pairs, with */
-  OP_TYPEPLUS,       /* 44 the minimizing one second. These codes must */
-  OP_TYPEMINPLUS,    /* 45 be in exactly the same order as those above. */
-  OP_TYPEQUERY,      /* 46 This set applies to character types such as \d */
-  OP_TYPEMINQUERY,   /* 47 */
-  OP_TYPEUPTO,       /* 48 From 0 to n matches */
-  OP_TYPEMINUPTO,    /* 49 */
-  OP_TYPEEXACT,      /* 50 Exactly n matches */
+  OP_POSSTAR,        /* 34 Possessified star */
+  OP_POSPLUS,        /* 35 Possessified plus */
+  OP_POSQUERY,       /* 36 Posesssified query */
+  OP_POSUPTO,        /* 37 Possessified upto */

-  OP_CRSTAR,         /* 51 The maximizing and minimizing versions of */
-  OP_CRMINSTAR,      /* 52 all these opcodes must come in pairs, with */
-  OP_CRPLUS,         /* 53 the minimizing one second. These codes must */
-  OP_CRMINPLUS,      /* 54 be in exactly the same order as those above. */
-  OP_CRQUERY,        /* 55 These are for character classes and back refs */
-  OP_CRMINQUERY,     /* 56 */
-  OP_CRRANGE,        /* 57 These are different to the three sets above. */
-  OP_CRMINRANGE,     /* 58 */
+  OP_NOTSTAR,        /* 38 The maximizing and minimizing versions of */
+  OP_NOTMINSTAR,     /* 39 these six opcodes must come in pairs, with */
+  OP_NOTPLUS,        /* 40 the minimizing one second. They must be in */
+  OP_NOTMINPLUS,     /* 41 exactly the same order as those above. */
+  OP_NOTQUERY,       /* 42 This set applies to "not" single characters. */
+  OP_NOTMINQUERY,    /* 43 */

-  OP_CLASS,          /* 59 Match a character class, chars < 256 only */
-  OP_NCLASS,         /* 60 Same, but the bitmap was created from a negative
+  OP_NOTUPTO,        /* 44 From 0 to n matches */
+  OP_NOTMINUPTO,     /* 45 */
+  OP_NOTEXACT,       /* 46 Exactly n matches */
+
+  OP_NOTPOSSTAR,     /* 47 Possessified versions */
+  OP_NOTPOSPLUS,     /* 48 */
+  OP_NOTPOSQUERY,    /* 49 */
+  OP_NOTPOSUPTO,     /* 50 */
+
+  OP_TYPESTAR,       /* 51 The maximizing and minimizing versions of */
+  OP_TYPEMINSTAR,    /* 52 these six opcodes must come in pairs, with */
+  OP_TYPEPLUS,       /* 53 the minimizing one second. These codes must */
+  OP_TYPEMINPLUS,    /* 54 be in exactly the same order as those above. */
+  OP_TYPEQUERY,      /* 55 This set applies to character types such as \d */
+  OP_TYPEMINQUERY,   /* 56 */
+
+  OP_TYPEUPTO,       /* 57 From 0 to n matches */
+  OP_TYPEMINUPTO,    /* 58 */
+  OP_TYPEEXACT,      /* 59 Exactly n matches */
+
+  OP_TYPEPOSSTAR,    /* 60 Possessified versions */
+  OP_TYPEPOSPLUS,    /* 61 */
+  OP_TYPEPOSQUERY,   /* 62 */
+  OP_TYPEPOSUPTO,    /* 63 */
+
+  OP_CRSTAR,         /* 64 The maximizing and minimizing versions of */
+  OP_CRMINSTAR,      /* 65 all these opcodes must come in pairs, with */
+  OP_CRPLUS,         /* 66 the minimizing one second. These codes must */
+  OP_CRMINPLUS,      /* 67 be in exactly the same order as those above. */
+  OP_CRQUERY,        /* 68 These are for character classes and back refs */
+  OP_CRMINQUERY,     /* 69 */
+  OP_CRRANGE,        /* 70 These are different to the three sets above. */
+  OP_CRMINRANGE,     /* 71 */
+
+  OP_CLASS,          /* 72 Match a character class, chars < 256 only */
+  OP_NCLASS,         /* 73 Same, but the bitmap was created from a negative
                           class - the difference is relevant only when a UTF-8
                           character > 255 is encountered. */

-  OP_XCLASS,         /* 61 Extended class for handling UTF-8 chars within the
+  OP_XCLASS,         /* 74 Extended class for handling UTF-8 chars within the
                           class. This does both positive and negative. */

-  OP_REF,            /* 62 Match a back reference */
-  OP_RECURSE,        /* 63 Match a numbered subpattern (possibly recursive) */
-  OP_CALLOUT,        /* 64 Call out to external function if provided */
+  OP_REF,            /* 75 Match a back reference */
+  OP_RECURSE,        /* 76 Match a numbered subpattern (possibly recursive) */
+  OP_CALLOUT,        /* 77 Call out to external function if provided */

-  OP_ALT,            /* 65 Start of alternation */
-  OP_KET,            /* 66 End of group that doesn't have an unbounded repeat */
-  OP_KETRMAX,        /* 67 These two must remain together and in this */
-  OP_KETRMIN,        /* 68 order. They are for groups the repeat for ever. */
+  OP_ALT,            /* 78 Start of alternation */
+  OP_KET,            /* 79 End of group that doesn't have an unbounded repeat */
+  OP_KETRMAX,        /* 80 These two must remain together and in this */
+  OP_KETRMIN,        /* 81 order. They are for groups the repeat for ever. */

-  /* The assertions must come before ONCE and COND */
+  /* The assertions must come before BRA, CBRA, ONCE, and COND.*/

-  OP_ASSERT,         /* 69 Positive lookahead */
-  OP_ASSERT_NOT,     /* 70 Negative lookahead */
-  OP_ASSERTBACK,     /* 71 Positive lookbehind */
-  OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */
-  OP_REVERSE,        /* 73 Move pointer back - used in lookbehind assertions */
+  OP_ASSERT,         /* 82 Positive lookahead */
+  OP_ASSERT_NOT,     /* 83 Negative lookahead */
+  OP_ASSERTBACK,     /* 84 Positive lookbehind */
+  OP_ASSERTBACK_NOT, /* 85 Negative lookbehind */
+  OP_REVERSE,        /* 86 Move pointer back - used in lookbehind assertions */

-  /* ONCE and COND must come after the assertions, with ONCE first, as there's
-  a test for >= ONCE for a subpattern that isn't an assertion. */
+  /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
+  as there's a test for >= ONCE for a subpattern that isn't an assertion. */

-  OP_ONCE,           /* 74 Once matched, don't back up into the subpattern */
-  OP_COND,           /* 75 Conditional group */
-  OP_CREF,           /* 76 Used to hold an extraction string number (cond ref) */
+  OP_ONCE,           /* 87 Atomic group */
+  OP_BRA,            /* 88 Start of non-capturing bracket */
+  OP_CBRA,           /* 89 Start of capturing bracket */
+  OP_COND,           /* 90 Conditional group */

-  OP_BRAZERO,        /* 77 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 78 order. */
+  /* These three must follow the previous three, in the same order. There's a
+  check for >= SBRA to distinguish the two sets. */

-  OP_BRANUMBER,      /* 79 Used for extracting brackets whose number is greater
-                           than can fit into an opcode. */
+  OP_SBRA,           /* 91 Start of non-capturing bracket, check empty  */
+  OP_SCBRA,          /* 92 Start of capturing bracket, check empty */
+  OP_SCOND,          /* 93 Conditional group, check empty */

-  OP_BRA             /* 80 This and greater values are used for brackets that
-                           extract substrings up to EXTRACT_BASIC_MAX. After
-                           that, use is made of OP_BRANUMBER. */
+  OP_CREF,           /* 94 Used to hold a capture number as condition */
+  OP_RREF,           /* 95 Used to hold a recursion number as condition */
+  OP_DEF,            /* 96 The DEFINE condition */
+
+  OP_BRAZERO,        /* 97 These two must remain together and in this */
+  OP_BRAMINZERO      /* 98 order. */
 };

-/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
-study.c that all opcodes are less than 128 in value. This makes handling UTF-8
-character sequences easier. */
-
-/* The highest extraction number before we have to start using additional
-bytes. (Originally PCRE didn't have support for extraction counts highter than
-this number.) The value is limited by the number of opcodes left after OP_BRA,
-i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
-opcodes. */
-
-#define EXTRACT_BASIC_MAX  100
-

 /* This macro defines textual names for all the opcodes. These are used only
 for debugging. The macro is referenced only in pcre_printint.c. */
@@ -648,17 +705,21 @@ for debugging. The macro is referenced only in pcre_printint.c. */
 #define OP_NAME_LIST \
  "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d",                \
  "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
-  "notprop", "prop", "extuni",                                    \
+  "notprop", "prop", "anynl", "extuni",                           \
  "\\Z", "\\z",                                                   \
  "Opt", "^", "$", "char", "charnc", "not",                       \
  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
+  "*+","++", "?+", "{",                                           \
  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
+  "*+","++", "?+", "{",                                           \
  "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
+  "*+","++", "?+", "{",                                           \
  "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
  "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \
  "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
-  "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
-  "Brazero", "Braminzero", "Branumber", "Bra"
+  "AssertB", "AssertB not", "Reverse",                            \
+  "Once", "Bra 0", "Bra", "Cond", "SBra 0", "SBra", "SCond",      \
+  "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"


 /* This macro defines the length of fixed length operations in the compiled
@@ -674,7 +735,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  1,                             /* End                                    */ \
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
  1, 1,                          /* Any, Anybyte                           */ \
-  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
+  3, 3, 1, 1,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ \
  1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
  2,                             /* Char  - the minimum length             */ \
  2,                             /* Charnc  - the minimum length           */ \
@@ -682,12 +743,15 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  /* Positive single-char repeats                            ** These are  */ \
  2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
  4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
+  2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \
  /* Negative single-char repeats - only for chars < 256                   */ \
  2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
  4, 4, 4,                       /* NOT upto, minupto, exact               */ \
+  2, 2, 2, 4,                    /* Possessive *, +, ?, upto               */ \
  /* Positive type repeats                                                 */ \
  2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
  4, 4, 4,                       /* Type upto, minupto, exact              */ \
+  2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \
  /* Character class & ref repeats                                         */ \
  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
  5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
@@ -706,17 +770,22 @@ in UTF-8 mode. The code that uses this table must know about such things. */
  1+LINK_SIZE,                   /* Assert behind                          */ \
  1+LINK_SIZE,                   /* Assert behind not                      */ \
  1+LINK_SIZE,                   /* Reverse                                */ \
-  1+LINK_SIZE,                   /* Once                                   */ \
+  1+LINK_SIZE,                   /* ONCE                                   */ \
+  1+LINK_SIZE,                   /* BRA                                    */ \
+  3+LINK_SIZE,                   /* CBRA                                   */ \
  1+LINK_SIZE,                   /* COND                                   */ \
+  1+LINK_SIZE,                   /* SBRA                                   */ \
+  3+LINK_SIZE,                   /* SCBRA                                  */ \
+  1+LINK_SIZE,                   /* SCOND                                  */ \
  3,                             /* CREF                                   */ \
+  3,                             /* RREF                                   */ \
+  1,                             /* DEF                                    */ \
  1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
-  3,                             /* BRANUMBER                              */ \
-  1+LINK_SIZE                    /* BRA                                    */ \


-/* A magic value for OP_CREF to indicate the "in recursion" condition. */
+/* A magic value for OP_RREF to indicate the "any recursion" condition. */

-#define CREF_RECURSE  0xffff
+#define RREF_ANY  0xffff

 /* Error code numbers. They are given names so that they can more easily be
 tracked. */
@@ -726,7 +795,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
       ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
       ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
       ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
-       ERR50, ERR51 };
+       ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 };

 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@@ -781,17 +850,23 @@ typedef struct compile_data {
  const uschar *fcc;            /* Points to case-flipping table */
  const uschar *cbits;          /* Points to character type table */
  const uschar *ctypes;         /* Points to table of type maps */
+  const uschar *start_workspace;/* The start of working space */
  const uschar *start_code;     /* The start of the compiled code */
  const uschar *start_pattern;  /* The start of the pattern */
+  const uschar *end_pattern;    /* The end of the pattern */
+  uschar *hwm;                  /* High watermark of workspace */
  uschar *name_table;           /* The name/number table */
  int  names_found;             /* Number of entries so far */
  int  name_entry_size;         /* Size of each entry */
+  int  bracount;                /* Count of capturing parens */
  int  top_backref;             /* Maximum back reference */
  unsigned int backref_map;     /* Bitmap of low back refs */
+  int  external_options;        /* External (initial) options */
  int  req_varyopt;             /* "After variable item" flag for reqbyte */
  BOOL nopartial;               /* Set TRUE if partial won't work */
-  int  nllen;                   /* 1 or 2 for newline string length */
-  uschar nl[4];                 /* Newline string */
+  int  nltype;                  /* Newline type */
+  int  nllen;                   /* Newline string length */
+  uschar nl[4];                 /* Newline string when fixed length */
 } compile_data;

 /* Structure for maintaining a chain of pointers to the currently incomplete
@@ -824,6 +899,16 @@ This isn't used for a "normal" compilation of pcre. */

 struct heapframe;

+/* Structure for building a chain of data for holding the values of the subject
+pointer at the start of each subpattern, so as to detect when an empty string
+has been matched by a subpattern - to break infinite loops. */
+
+typedef struct eptrblock {
+  struct eptrblock *epb_prev;
+  USPTR epb_saved_eptr;
+} eptrblock;
+
+
 /* Structure for passing "static" information around between the functions
 doing traditional NFA matching, so that they are thread-safe. */

@@ -834,8 +919,9 @@ typedef struct match_data {
  int   *offset_vector;         /* Offset vector */
  int    offset_end;            /* One past the end */
  int    offset_max;            /* The maximum usable for return data */
-  int    nllen;                 /* 1 or 2 for newline string length */
-  uschar nl[4];                 /* Newline string */
+  int    nltype;                /* Newline type */
+  int    nllen;                 /* Newline string length */
+  uschar nl[4];                 /* Newline string when fixed */
  const uschar *lcc;            /* Points to lower casing table */
  const uschar *ctypes;         /* Points to table of type maps */
  BOOL   offset_overflow;       /* Set if too many extractions */
@@ -854,6 +940,8 @@ typedef struct match_data {
  int    end_offset_top;        /* Highwater mark at end of match */
  int    capture_last;          /* Most recent capture number */
  int    start_offset;          /* The start offset value */
+  eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
+  int    eptrn;                 /* Next free eptrblock */
  recursion_info *recursive;    /* Linked list of recursion data */
  void  *callout_data;          /* To pass back to callouts */
  struct heapframe *thisframe;  /* Used only when compiling for no recursion */
@@ -869,8 +957,9 @@ typedef struct dfa_match_data {
  const uschar *tables;         /* Character tables */
  int   moptions;               /* Match options */
  int   poptions;               /* Pattern options */
-  int    nllen;                 /* 1 or 2 for newline string length */
-  uschar nl[4];                 /* Newline string */
+  int    nltype;                /* Newline type */
+  int    nllen;                 /* Newline string length */
+  uschar nl[4];                 /* Newline string when fixed */
  void  *callout_data;          /* To pass back to callouts */
 } dfa_match_data;

@@ -941,13 +1030,17 @@ extern const uschar _pcre_OP_lengths[];
 one of the exported public functions. They have to be "external" in the C
 sense, but are not part of the PCRE public API. */

-extern int         _pcre_ord2utf8(int, uschar *);
-extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
-                     const pcre_study_data *, pcre_study_data *);
-extern int         _pcre_ucp_findprop(const unsigned int, int *, int *);
-extern int         _pcre_ucp_othercase(const int);
-extern int         _pcre_valid_utf8(const uschar *, int);
-extern BOOL        _pcre_xclass(int, const uschar *);
+extern BOOL         _pcre_is_newline(const uschar *, const uschar *, int *,
+                      BOOL);
+extern int          _pcre_ord2utf8(int, uschar *);
+extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
+                      const pcre_study_data *, pcre_study_data *);
+extern int          _pcre_ucp_findprop(const unsigned int, int *, int *);
+extern unsigned int _pcre_ucp_othercase(const unsigned int);
+extern int          _pcre_valid_utf8(const uschar *, int);
+extern BOOL         _pcre_was_newline(const uschar *, const uschar *, int *,
+                      BOOL);
+extern BOOL         _pcre_xclass(int, const uschar *);

 #endif

--- a/ext/pcre/pcrelib/pcre_maketables.c
+++ b/ext/pcre/pcrelib/pcre_maketables.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -130,7 +130,7 @@ for (i = 0; i < 256; i++)
  meta-character, which in this sense is any character that terminates a run
  of data characters. */

-  if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta;
+  if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
  *p++ = x;
  }

--- a/ext/pcre/pcrelib/pcre_newline.c
+++ b/ext/pcre/pcrelib/pcre_newline.c
@@ -0,0 +1,135 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2006 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains internal functions for testing newlines when more than
+one kind of newline is to be recognized. When a newline is found, its length is
+returned. In principle, we could implement several newline "types", each
+referring to a different set of newline characters. At present, PCRE supports
+only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,
+so for now the type isn't passed into the functions. It can easily be added
+later if required. The full list of Unicode newline characters is taken from
+http://unicode.org/unicode/reports/tr18/. */
+
+
+#include "pcre_internal.h"
+
+
+
+/*************************************************
+*      Check for newline at given position       *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is less than the end of the
+string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  endptr       pointer to the end of the string
+  lenptr       where to return the length
+  utf8         TRUE if in utf8 mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,
+  BOOL utf8)
+{
+int c;
+if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
+switch(c)
+  {
+  case 0x000a:                                       /* LF */
+  case 0x000b:                                       /* VT */
+  case 0x000c: *lenptr = 1; return TRUE;             /* FF */
+  case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
+               return TRUE;                          /* CR */
+  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 3; return TRUE;             /* PS */
+  default: return FALSE;
+  }
+}
+
+
+
+/*************************************************
+*     Check for newline at previous position     *
+*************************************************/
+
+/* It is guaranteed that the initial value of ptr is greater than the start of
+the string that is being processed.
+
+Arguments:
+  ptr          pointer to possible newline
+  startptr     pointer to the start of the string
+  lenptr       where to return the length
+  utf8         TRUE if in utf8 mode
+
+Returns:       TRUE or FALSE
+*/
+
+BOOL
+_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,
+  BOOL utf8)
+{
+int c;
+ptr--;
+if (utf8)
+  {
+  BACKCHAR(ptr);
+  GETCHAR(c, ptr);
+  }
+else c = *ptr;
+switch(c)
+  {
+  case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
+               return TRUE;                         /* LF */
+  case 0x000b:                                      /* VT */
+  case 0x000c:                                      /* FF */
+  case 0x000d: *lenptr = 1; return TRUE;            /* CR */
+  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */
+  case 0x2028:                                      /* LS */
+  case 0x2029: *lenptr = 3; return TRUE;            /* PS */
+  default: return FALSE;
+  }
+}
+
+/* End of pcre_newline.c */
--- a/ext/pcre/pcrelib/pcre_ord2utf8.c
+++ b/ext/pcre/pcrelib/pcre_ord2utf8.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_printint.src
+++ b/ext/pcre/pcrelib/pcre_printint.src
@@ -49,9 +49,19 @@ local functions. This source file is used in two places:
 compiled regex for debugging purposes. */


+/* Macro that decides whether a character should be output as a literal or in
+hexadecimal. We don't use isprint() because that can vary from system to system
+(even without the use of locales) and we want the output always to be the same,
+for testing purposes. This macro is used in pcretest as well as in this file. */
+
+#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
+
+/* The table of operator names. */
+
 static const char *OP_names[] = { OP_NAME_LIST };


+
 /*************************************************
 *       Print single- or multi-byte character    *
 *************************************************/
@@ -63,7 +73,7 @@ int c = *ptr;

 if (!utf8 || (c & 0xc0) != 0xc0)
  {
-  if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
  return 0;
  }
 else
@@ -160,16 +170,6 @@ for(;;)

  fprintf(f, "%3d ", (int)(code - codestart));

-  if (*code >= OP_BRA)
-    {
-    if (*code - OP_BRA > EXTRACT_BASIC_MAX)
-      fprintf(f, "%3d Bra extra\n", GET(code, 1));
-    else
-      fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
-    code += _pcre_OP_lengths[OP_BRA];
-    continue;
-    }
-
  switch(*code)
    {
    case OP_END:
@@ -203,6 +203,14 @@ for(;;)
    fprintf(f, "\n");
    continue;

+    case OP_CBRA:
+    case OP_SCBRA:
+    fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
+      GET2(code, 1+LINK_SIZE));
+    break;
+
+    case OP_BRA:
+    case OP_SBRA:
    case OP_KETRMAX:
    case OP_KETRMIN:
    case OP_ALT:
@@ -213,33 +221,45 @@ for(;;)
    case OP_ASSERTBACK_NOT:
    case OP_ONCE:
    case OP_COND:
+    case OP_SCOND:
    case OP_REVERSE:
    fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
    break;

-    case OP_BRANUMBER:
-    printf("%3d %s", GET2(code, 1), OP_names[*code]);
+    case OP_CREF:
+    fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
    break;

-    case OP_CREF:
-    if (GET2(code, 1) == CREF_RECURSE)
-      fprintf(f, "    Cond recurse");
+    case OP_RREF:
+    c = GET2(code, 1);
+    if (c == RREF_ANY)
+      fprintf(f, "    Cond recurse any");
    else
-      fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
+      fprintf(f, "    Cond recurse %d", c);
+    break;
+
+    case OP_DEF:
+    fprintf(f, "    Cond def");
    break;

    case OP_STAR:
    case OP_MINSTAR:
+    case OP_POSSTAR:
    case OP_PLUS:
    case OP_MINPLUS:
+    case OP_POSPLUS:
    case OP_QUERY:
    case OP_MINQUERY:
+    case OP_POSQUERY:
    case OP_TYPESTAR:
    case OP_TYPEMINSTAR:
+    case OP_TYPEPOSSTAR:
    case OP_TYPEPLUS:
    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
    case OP_TYPEQUERY:
    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSQUERY:
    fprintf(f, "    ");
    if (*code >= OP_TYPESTAR)
      {
@@ -257,17 +277,20 @@ for(;;)
    case OP_EXACT:
    case OP_UPTO:
    case OP_MINUPTO:
+    case OP_POSUPTO:
    fprintf(f, "    ");
    extra = print_char(f, code+3, utf8);
    fprintf(f, "{");
-    if (*code != OP_EXACT) fprintf(f, ",");
+    if (*code != OP_EXACT) fprintf(f, "0,");
    fprintf(f, "%d}", GET2(code,1));
    if (*code == OP_MINUPTO) fprintf(f, "?");
+      else if (*code == OP_POSUPTO) fprintf(f, "+");
    break;

    case OP_TYPEEXACT:
    case OP_TYPEUPTO:
    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
    fprintf(f, "    %s", OP_names[code[3]]);
    if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
      {
@@ -278,20 +301,26 @@ for(;;)
    if (*code != OP_TYPEEXACT) fprintf(f, "0,");
    fprintf(f, "%d}", GET2(code,1));
    if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
+      else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
    break;

    case OP_NOT:
-    if (isprint(c = code[1])) fprintf(f, "    [^%c]", c);
+    c = code[1];
+    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
      else fprintf(f, "    [^\\x%02x]", c);
    break;

    case OP_NOTSTAR:
    case OP_NOTMINSTAR:
+    case OP_NOTPOSSTAR:
    case OP_NOTPLUS:
    case OP_NOTMINPLUS:
+    case OP_NOTPOSPLUS:
    case OP_NOTQUERY:
    case OP_NOTMINQUERY:
-    if (isprint(c = code[1])) fprintf(f, "    [^%c]", c);
+    case OP_NOTPOSQUERY:
+    c = code[1];
+    if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
      else fprintf(f, "    [^\\x%02x]", c);
    fprintf(f, "%s", OP_names[*code]);
    break;
@@ -299,11 +328,14 @@ for(;;)
    case OP_NOTEXACT:
    case OP_NOTUPTO:
    case OP_NOTMINUPTO:
-    if (isprint(c = code[3])) fprintf(f, "    [^%c]{", c);
+    case OP_NOTPOSUPTO:
+    c = code[3];
+    if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
      else fprintf(f, "    [^\\x%02x]{", c);
    if (*code != OP_NOTEXACT) fprintf(f, "0,");
    fprintf(f, "%d}", GET2(code,1));
    if (*code == OP_NOTMINUPTO) fprintf(f, "?");
+      else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
    break;

    case OP_RECURSE:
@@ -363,12 +395,14 @@ for(;;)
            for (j = i+1; j < 256; j++)
              if ((ccode[j/8] & (1 << (j&7))) == 0) break;
            if (i == '-' || i == ']') fprintf(f, "\\");
-            if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
+            if (PRINTABLE(i)) fprintf(f, "%c", i);
+              else fprintf(f, "\\x%02x", i);
            if (--j > i)
              {
              if (j != i + 1) fprintf(f, "-");
              if (j == '-' || j == ']') fprintf(f, "\\");
-              if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
+              if (PRINTABLE(j)) fprintf(f, "%c", j);
+                else fprintf(f, "\\x%02x", j);
              }
            i = j;
            }
--- a/ext/pcre/pcrelib/pcre_refcount.c
+++ b/ext/pcre/pcrelib/pcre_refcount.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_scanner.cc
+++ b/ext/pcre/pcrelib/pcre_scanner.cc
@@ -43,6 +43,7 @@ Scanner::Scanner()
    input_(data_),
    skip_(NULL),
    should_skip_(false),
+    skip_repeat_(false),
    save_comments_(false),
    comments_(NULL),
    comments_offset_(0) {
@@ -53,6 +54,7 @@ Scanner::Scanner(const string& in)
    input_(data_),
    skip_(NULL),
    should_skip_(false),
+    skip_repeat_(false),
    save_comments_(false),
    comments_(NULL),
    comments_offset_(0) {
@@ -63,15 +65,31 @@ Scanner::~Scanner() {
  delete comments_;
 }

+void Scanner::SetSkipExpression(const char* re) {
+  delete skip_;
+  if (re != NULL) {
+    skip_ = new RE(re);
+    should_skip_ = true;
+    skip_repeat_ = true;
+    ConsumeSkip();
+  } else {
+    skip_ = NULL;
+    should_skip_ = false;
+    skip_repeat_ = false;
+  }
+}
+
 void Scanner::Skip(const char* re) {
  delete skip_;
  if (re != NULL) {
    skip_ = new RE(re);
    should_skip_ = true;
+    skip_repeat_ = false;
    ConsumeSkip();
  } else {
    skip_ = NULL;
    should_skip_ = false;
+    skip_repeat_ = false;
  }
 }

@@ -118,19 +136,22 @@ bool Scanner::Consume(const RE& re,

 // helper function to consume *skip_ and honour save_comments_
 void Scanner::ConsumeSkip() {
+  const char* start_data = input_.data();
+  while (skip_->Consume(&input_)) {
+    if (!skip_repeat_) {
+      // Only one skip allowed.
+      break;
+    }
+  }
  if (save_comments_) {
-    if (NULL == comments_) {
+    if (comments_ == NULL) {
      comments_ = new vector<StringPiece>;
    }
-    const char *start_data = input_.data();
-    skip_->Consume(&input_);
    // already pointing one past end, so no need to +1
    int length = input_.data() - start_data;
    if (length > 0) {
      comments_->push_back(StringPiece(start_data, length));
    }
-  } else {
-    skip_->Consume(&input_);
  }
 }

--- a/ext/pcre/pcrelib/pcre_scanner.h
+++ b/ext/pcre/pcrelib/pcre_scanner.h
@@ -36,7 +36,7 @@
 //      Scanner scanner(input);
 //      string var;
 //      int number;
-//      scanner.Skip("\\s+");           // Skip any white space we encounter
+//      scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
 //      while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
 //        ...;
 //      }
@@ -90,10 +90,16 @@ class Scanner {
  // skipped.  For example, a programming language scanner would use
  // a skip RE that matches white space and comments.
  //
-  //    scanner.Skip("(\\s|//.*|/[*](.|\n)*?[*]/)*");
+  //    scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
+  //
+  // Skipping repeats as long as it succeeds.  We used to let people do
+  // this by writing "(...)*" in the regular expression, but that added
+  // up to lots of recursive calls within the pcre library, so now we
+  // control repetition explicitly via the function call API.
  //
  // You can pass NULL for "re" if you do not want any data to be skipped.
-  void Skip(const char* re);
+  void Skip(const char* re);   // DEPRECATED; does *not* repeat
+  void SetSkipExpression(const char* re);

  // Temporarily pause "skip"ing. This
  //   Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
@@ -109,12 +115,13 @@ class Scanner {
  /***** Special wrappers around SetSkip() for some common idioms *****/

  // Arranges to skip whitespace, C comments, C++ comments.
-  // The overall RE is a repeated disjunction of the following REs:
+  // The overall RE is a disjunction of the following REs:
  //    \\s                     whitespace
  //    //.*\n                  C++ comment
  //    /[*](.|\n)*?[*]/        C comment (x*? means minimal repetitions of x)
+  // We get repetition via the semantics of SetSkipExpression, not by using *
  void SkipCXXComments() {
-    Skip("((\\s|//.*\n|/[*](.|\n)*?[*]/)*)");
+    SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
  }

  void set_save_comments(bool comments) {
@@ -143,6 +150,7 @@ class Scanner {
  StringPiece   input_;         // Unprocessed input
  RE*           skip_;          // If non-NULL, RE for skipping input
  bool          should_skip_;   // If true, use skip_
+  bool          skip_repeat_;   // If true, repeat skip_ as long as it works
  bool          save_comments_; // If true, aggregate the skip expression

  // the skipped comments
--- a/ext/pcre/pcrelib/pcre_scanner_unittest.cc
+++ b/ext/pcre/pcrelib/pcre_scanner_unittest.cc
@@ -33,10 +33,13 @@
 // functionality.

 #include <stdio.h>
+#include <string>
 #include <vector>
 #include <pcre_stringpiece.h>
 #include <pcre_scanner.h>

+#define FLAGS_unittest_stack_size   49152
+
 // Dies with a fatal error if the two values are not equal.
 #define CHECK_EQ(a, b)  do {                                    \
  if ( (a) != (b) ) {                                           \
@@ -116,8 +119,31 @@ static void TestScanner() {
  comments.resize(0);
 }

+static void TestBigComment() {
+  string input;
+  for (int i = 0; i < 1024; ++i) {
+    char buf[1024];
+    snprintf(buf, sizeof(buf), "    # Comment %d\n", i);
+    input += buf;
+  }
+  input += "name = value;\n";
+
+  Scanner s(input.c_str());
+  s.SetSkipExpression("\\s+|#.*\n");
+
+  string name;
+  string value;
+  s.Consume("(\\w+) = (\\w+);", &name, &value);
+  CHECK_EQ(name, "name");
+  CHECK_EQ(value, "value");
+}
+
+// TODO: also test scanner and big-comment in a thread with a
+//       small stack size
+
 int main(int argc, char** argv) {
  TestScanner();
+  TestBigComment();

  // Done
  printf("OK\n");
--- a/ext/pcre/pcrelib/pcre_study.c
+++ b/ext/pcre/pcrelib/pcre_study.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -45,6 +45,11 @@ supporting functions. */
 #include "pcre_internal.h"


+/* Returns from set_start_bits() */
+
+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
+
+
 /*************************************************
 *      Set a bit and maybe its alternate case    *
 *************************************************/
@@ -72,12 +77,16 @@ if (caseless && (cd->ctypes[c] & ctype_letter) != 0)


 /*************************************************
-*          Create bitmap of starting chars       *
+*          Create bitmap of starting bytes       *
 *************************************************/

-/* This function scans a compiled unanchored expression and attempts to build a
-bitmap of the set of initial characters. If it can't, it returns FALSE. As time
-goes by, we may be able to get more clever at doing this.
+/* This function scans a compiled unanchored expression recursively and
+attempts to build a bitmap of the set of possible starting bytes. As time goes
+by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
+useful for parenthesized groups in patterns such as (a*)b where the group
+provides some optional starting bytes but scanning must continue at the outer
+level to find at least one mandatory byte. At the outermost level, this
+function fails unless the result is SSB_DONE.

 Arguments:
  code         points to an expression
@@ -86,14 +95,17 @@ Arguments:
  utf8         TRUE if in UTF-8 mode
  cd           the block with char table pointers

-Returns:       TRUE if table built, FALSE otherwise
+Returns:       SSB_FAIL     => Failed to find any starting bytes
+               SSB_DONE     => Found mandatory starting bytes
+               SSB_CONTINUE => Found optional starting bytes
 */

-static BOOL
+static int
 set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
  BOOL utf8, compile_data *cd)
 {
 register int c;
+int yield = SSB_DONE;

 #if 0
 /* ========================================================================= */
@@ -114,25 +126,55 @@ volatile int dummy;

 do
  {
-  const uschar *tcode = code + 1 + LINK_SIZE;
+  const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
  BOOL try_next = TRUE;

-  while (try_next)
+  while (try_next)    /* Loop for items in this branch */
    {
-    /* If a branch starts with a bracket or a positive lookahead assertion,
-    recurse to set bits from within them. That's all for this branch. */
-
-    if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
+    int rc;
+    switch(*tcode)
      {
-      if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
-        return FALSE;
-      try_next = FALSE;
-      }
+      /* Fail if we reach something we don't understand */

-    else switch(*tcode)
-      {
      default:
-      return FALSE;
+      return SSB_FAIL;
+
+      /* If we hit a bracket or a positive lookahead assertion, recurse to set
+      bits from within the subpattern. If it can't find anything, we have to
+      give up. If it finds some mandatory character(s), we are done for this
+      branch. Otherwise, carry on scanning after the subpattern. */
+
+      case OP_BRA:
+      case OP_SBRA:
+      case OP_CBRA:
+      case OP_SCBRA:
+      case OP_ONCE:
+      case OP_ASSERT:
+      rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
+      if (rc == SSB_FAIL) return SSB_FAIL;
+      if (rc == SSB_DONE) try_next = FALSE; else
+        {
+        do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
+        tcode += 1 + LINK_SIZE;
+        }
+      break;
+
+      /* If we hit ALT or KET, it means we haven't found anything mandatory in
+      this branch, though we might have found something optional. For ALT, we
+      continue with the next alternative, but we have to arrange that the final
+      result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
+      return SSB_CONTINUE: if this is the top level, that indicates failure,
+      but after a nested subpattern, it causes scanning to continue. */
+
+      case OP_ALT:
+      yield = SSB_CONTINUE;
+      try_next = FALSE;
+      break;
+
+      case OP_KET:
+      case OP_KETRMAX:
+      case OP_KETRMIN:
+      return SSB_CONTINUE;

      /* Skip over callout */

@@ -140,19 +182,13 @@ do
      tcode += 2 + 2*LINK_SIZE;
      break;

-      /* Skip over extended extraction bracket number */
-
-      case OP_BRANUMBER:
-      tcode += 3;
-      break;
-
      /* Skip over lookbehind and negative lookahead assertions */

      case OP_ASSERT_NOT:
      case OP_ASSERTBACK:
      case OP_ASSERTBACK_NOT:
      do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
-      tcode += 1+LINK_SIZE;
+      tcode += 1 + LINK_SIZE;
      break;

      /* Skip over an option setting, changing the caseless flag */
@@ -166,27 +202,30 @@ do

      case OP_BRAZERO:
      case OP_BRAMINZERO:
-      if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
-        return FALSE;
+      if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
+        return SSB_FAIL;
 /* =========================================================================
      See the comment at the head of this function concerning the next line,
      which was an old fudge for the benefit of OS/2.
      dummy = 1;
  ========================================================================= */
      do tcode += GET(tcode,1); while (*tcode == OP_ALT);
-      tcode += 1+LINK_SIZE;
+      tcode += 1 + LINK_SIZE;
      break;

      /* Single-char * or ? sets the bit and tries the next item */

      case OP_STAR:
      case OP_MINSTAR:
+      case OP_POSSTAR:
      case OP_QUERY:
      case OP_MINQUERY:
+      case OP_POSQUERY:
      set_bit(start_bits, tcode[1], caseless, cd);
      tcode += 2;
 #ifdef SUPPORT_UTF8
-      if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
+      if (utf8 && tcode[-1] >= 0xc0)
+        tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
 #endif
      break;

@@ -194,10 +233,12 @@ do

      case OP_UPTO:
      case OP_MINUPTO:
+      case OP_POSUPTO:
      set_bit(start_bits, tcode[3], caseless, cd);
      tcode += 4;
 #ifdef SUPPORT_UTF8
-      if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
+      if (utf8 && tcode[-1] >= 0xc0)
+        tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
 #endif
      break;

@@ -210,6 +251,7 @@ do
      case OP_CHARNC:
      case OP_PLUS:
      case OP_MINPLUS:
+      case OP_POSPLUS:
      set_bit(start_bits, tcode[1], caseless, cd);
      try_next = FALSE;
      break;
@@ -283,16 +325,19 @@ do

      case OP_TYPEUPTO:
      case OP_TYPEMINUPTO:
+      case OP_TYPEPOSUPTO:
      tcode += 2;               /* Fall through */

      case OP_TYPESTAR:
      case OP_TYPEMINSTAR:
+      case OP_TYPEPOSSTAR:
      case OP_TYPEQUERY:
      case OP_TYPEMINQUERY:
+      case OP_TYPEPOSQUERY:
      switch(tcode[1])
        {
        case OP_ANY:
-        return FALSE;
+        return SSB_FAIL;

        case OP_NOT_DIGIT:
        for (c = 0; c < 32; c++)
@@ -418,7 +463,7 @@ do
  code += GET(code, 1);   /* Advance to next branch */
  }
 while (*code == OP_ALT);
-return TRUE;
+return yield;
 }


@@ -492,8 +537,8 @@ compile_block.ctypes = tables + ctypes_offset;
 /* See if we can find a fixed set of initial characters for the pattern. */

 memset(start_bits, 0, 32 * sizeof(uschar));
-if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
-  (re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
+if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
+  (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;

 /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
 the latter, which is pointed to by the former, which may also get additional
--- a/ext/pcre/pcrelib/pcre_tables.c
+++ b/ext/pcre/pcrelib/pcre_tables.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -72,9 +72,8 @@ first byte of a character, indexed by the number of additional bytes. */
 const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
 const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};

-/* Table of the number of extra characters, indexed by the first character
-masked with 0x3f. The highest number for a valid UTF-8 character is in fact
-0x3d. */
+/* Table of the number of extra bytes, indexed by the first byte masked with
+0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */

 const uschar _pcre_utf8_table4[] = {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -89,6 +88,7 @@ const ucp_type_table _pcre_utt[] = {
  { "Any",                 PT_ANY,  0 },
  { "Arabic",              PT_SC,   ucp_Arabic },
  { "Armenian",            PT_SC,   ucp_Armenian },
+  { "Balinese",            PT_SC,   ucp_Balinese },
  { "Bengali",             PT_SC,   ucp_Bengali },
  { "Bopomofo",            PT_SC,   ucp_Bopomofo },
  { "Braille",             PT_SC,   ucp_Braille },
@@ -104,6 +104,7 @@ const ucp_type_table _pcre_utt[] = {
  { "Common",              PT_SC,   ucp_Common },
  { "Coptic",              PT_SC,   ucp_Coptic },
  { "Cs",                  PT_PC,   ucp_Cs },
+  { "Cuneiform",           PT_SC,   ucp_Cuneiform },
  { "Cypriot",             PT_SC,   ucp_Cypriot },
  { "Cyrillic",            PT_SC,   ucp_Cyrillic },
  { "Deseret",             PT_SC,   ucp_Deseret },
@@ -146,6 +147,7 @@ const ucp_type_table _pcre_utt[] = {
  { "N",                   PT_GC,   ucp_N },
  { "Nd",                  PT_PC,   ucp_Nd },
  { "New_Tai_Lue",         PT_SC,   ucp_New_Tai_Lue },
+  { "Nko",                 PT_SC,   ucp_Nko },
  { "Nl",                  PT_PC,   ucp_Nl },
  { "No",                  PT_PC,   ucp_No },
  { "Ogham",               PT_SC,   ucp_Ogham },
@@ -158,6 +160,8 @@ const ucp_type_table _pcre_utt[] = {
  { "Pd",                  PT_PC,   ucp_Pd },
  { "Pe",                  PT_PC,   ucp_Pe },
  { "Pf",                  PT_PC,   ucp_Pf },
+  { "Phags_Pa",            PT_SC,   ucp_Phags_Pa },
+  { "Phoenician",          PT_SC,   ucp_Phoenician },
  { "Pi",                  PT_PC,   ucp_Pi },
  { "Po",                  PT_PC,   ucp_Po },
  { "Ps",                  PT_PC,   ucp_Ps },
--- a/ext/pcre/pcrelib/pcre_try_flipped.c
+++ b/ext/pcre/pcrelib/pcre_try_flipped.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcre_ucp_searchfuncs.c
+++ b/ext/pcre/pcrelib/pcre_ucp_searchfuncs.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -131,11 +131,11 @@ letter, return the other case. Otherwise, return -1.
 Arguments:
  c           the character value

-Returns:      the other case or -1 if none
+Returns:      the other case or NOTACHAR if none
 */

-int
-_pcre_ucp_othercase(const int c)
+unsigned int
+_pcre_ucp_othercase(const unsigned int c)
 {
 int bot = 0;
 int top = sizeof(ucp_table)/sizeof(cnode);
@@ -161,14 +161,14 @@ for (;;)
    }
  }

-/* Found an entry in the table. Return -1 for a range entry. Otherwise return
-the other case if there is one, else -1. */
+/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
+return the other case if there is one, else NOTACHAR. */

-if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return -1;
+if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;

 offset = ucp_table[mid].f1 & f1_casemask;
 if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
-return (offset == 0)? -1 : c + offset;
+return (offset == 0)? NOTACHAR : c + offset;
 }


--- a/ext/pcre/pcrelib/pcre_valid_utf8.c
+++ b/ext/pcre/pcrelib/pcre_valid_utf8.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,7 @@ for (p = string; length-- > 0; p++)
  register int ab;
  register int c = *p;
  if (c < 128) continue;
-  if ((c & 0xc0) != 0xc0) return p - string;
+  if (c < 0xc0) return p - string;
  ab = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
  if (length < ab) return p - string;
  length -= ab;
--- a/ext/pcre/pcrelib/pcre_version.c
+++ b/ext/pcre/pcrelib/pcre_version.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -49,16 +49,38 @@ string that identifies the PCRE version that is in use. */
 *          Return version string                 *
 *************************************************/

+/* These macros are the standard way of turning unquoted text into C strings.
+They allow macros like PCRE_MAJOR to be defined without quotes, which is
+convenient for user programs that want to test its value. */
+
 #define STRING(a)  # a
 #define XSTRING(s) STRING(s)

+/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
+production releases. Originally, it was used naively in this code:
+
+  return XSTRING(PCRE_MAJOR)
+         "." XSTRING(PCRE_MINOR)
+             XSTRING(PCRE_PRERELEASE)
+         " " XSTRING(PCRE_DATE);
+
+However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
+STRING(). The C standard states: "If (before argument substitution) any
+argument consists of no preprocessing tokens, the behavior is undefined." It
+turns out the gcc treats this case as a single empty string - which is what we
+really want - but Visual C grumbles about the lack of an argument for the
+macro. Unfortunately, both are within their rights. To cope with both ways of
+handling this, I had resort to some messy hackery that does a test at run time.
+I could find no way of detecting that a macro is defined as an empty string at
+pre-processor time. This hack uses a standard trick for avoiding calling
+the STRING macro with an empty argument when doing the test. */
+
 PCRE_DATA_SCOPE const char *
 pcre_version(void)
 {
-return XSTRING(PCRE_MAJOR)
-       "." XSTRING(PCRE_MINOR)
-           XSTRING(PCRE_PRERELEASE)
-       " " XSTRING(PCRE_DATE);
+return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
+  XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
+  XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
 }

 /* End of pcre_version.c */
--- a/ext/pcre/pcrelib/pcre_xclass.c
+++ b/ext/pcre/pcrelib/pcre_xclass.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcrecpp.cc
+++ b/ext/pcre/pcrelib/pcrecpp.cc
@@ -61,7 +61,7 @@ static const string empty_string;
 // If the user doesn't ask for any options, we just use this one
 static RE_Options default_options;

-void RE::Init(const char* pat, const RE_Options* options) {
+void RE::Init(const string& pat, const RE_Options* options) {
  pattern_ = pat;
  if (options == NULL) {
    options_ = default_options;
@@ -78,7 +78,7 @@ void RE::Init(const char* pat, const RE_Options* options) {
    // conservative in that it may treat some "simple" patterns
    // as "complex" (e.g., if the vertical bar is in a character
    // class or is escaped).  But it seems good enough.
-    if (strchr(pat, '|') == NULL) {
+    if (strchr(pat.c_str(), '|') == NULL) {
      // Simple pattern: we can use position-based checks to perform
      // fully anchored matches
      re_full_ = re_partial_;
@@ -89,12 +89,18 @@ void RE::Init(const char* pat, const RE_Options* options) {
  }
 }

-RE::~RE() {
+void RE::Cleanup() {
  if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
  if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);
  if (error_ != &empty_string)                     delete error_;
 }

+
+RE::~RE() {
+  Cleanup();
+}
+
+
 pcre* RE::Compile(Anchor anchor) {
  // First, convert RE_Options into pcre options
  int pcre_options = 0;
@@ -424,6 +430,34 @@ bool RE::Extract(const StringPiece& rewrite,
  return Rewrite(out, rewrite, text, vec, matches);
 }

+/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
+  string result;
+
+  // Escape any ascii character not in [A-Za-z_0-9].
+  //
+  // Note that it's legal to escape a character even if it has no
+  // special meaning in a regular expression -- so this function does
+  // that.  (This also makes it identical to the perl function of the
+  // same name; see `perldoc -f quotemeta`.)
+  for (int ii = 0; ii < unquoted.size(); ++ii) {
+    // Note that using 'isalnum' here raises the benchmark time from
+    // 32ns to 58ns:
+    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+        unquoted[ii] != '_' &&
+        // If this is the part of a UTF8 or Latin1 character, we need
+        // to copy this byte without escaping.  Experimentally this is
+        // what works correctly with the regexp library.
+        !(unquoted[ii] & 128)) {
+      result += '\\';
+    }
+    result += unquoted[ii];
+  }
+
+  return result;
+}
+
 /***** Actual matching and rewriting code *****/

 int RE::TryMatch(const StringPiece& text,
@@ -809,14 +843,14 @@ bool Arg::parse_float(const char* str, int n, void* dest) {
    return parse_##name##_radix(str, n, dest, 0);                       \
  }

-DEFINE_INTEGER_PARSERS(short);
-DEFINE_INTEGER_PARSERS(ushort);
-DEFINE_INTEGER_PARSERS(int);
-DEFINE_INTEGER_PARSERS(uint);
-DEFINE_INTEGER_PARSERS(long);
-DEFINE_INTEGER_PARSERS(ulong);
-DEFINE_INTEGER_PARSERS(longlong);
-DEFINE_INTEGER_PARSERS(ulonglong);
+DEFINE_INTEGER_PARSERS(short)      /*                                   */
+DEFINE_INTEGER_PARSERS(ushort)     /*                                   */
+DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */
+DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */
+DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */
+DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */
+DEFINE_INTEGER_PARSERS(longlong)   /*                                   */
+DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */

 #undef DEFINE_INTEGER_PARSERS

--- a/ext/pcre/pcrelib/pcrecpp.h
+++ b/ext/pcre/pcrelib/pcrecpp.h
@@ -112,6 +112,12 @@
 //    T             (where "bool T::ParseFrom(const char*, int)" exists)
 //    NULL          (the corresponding matched sub-pattern is not copied)
 //
+// CAVEAT: An optional sub-pattern that does not exist in the matched
+// string is assigned the empty string.  Therefore, the following will
+// return false (because the empty string is not a valid number):
+//    int number;
+//    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
+//
 // -----------------------------------------------------------------------
 // DO_MATCH
 //
@@ -488,8 +494,25 @@ class RE {
  // pass in a string or a "const char*" wherever an "RE" is expected.
  RE(const char* pat) { Init(pat, NULL); }
  RE(const char *pat, const RE_Options& option) { Init(pat, &option); }
-  RE(const string& pat) { Init(pat.c_str(), NULL); }
-  RE(const string& pat, const RE_Options& option) { Init(pat.c_str(), &option); }
+  RE(const string& pat) { Init(pat, NULL); }
+  RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
+
+  // Copy constructor & assignment - note that these are expensive
+  // because they recompile the expression.
+  RE(const RE& re) { Init(re.pattern_, &re.options_); }
+  const RE& operator=(const RE& re) {
+    if (this != &re) {
+      Cleanup();
+
+      // This is the code that originally came from Google
+      // Init(re.pattern_.c_str(), &re.options_);
+
+      // This is the replacement from Ari Pollak
+      Init(re.pattern_, &re.options_);
+    }
+    return *this;
+  }
+

  ~RE();

@@ -589,6 +612,15 @@ class RE {
               const StringPiece &text,
               string *out) const;

+  // Escapes all potentially meaningful regexp characters in
+  // 'unquoted'.  The returned string, used as a regular expression,
+  // will exactly match the original string.  For example,
+  //           1.5-2.0?
+  // may become:
+  //           1\.5\-2\.0\?
+  static string QuoteMeta(const StringPiece& unquoted);
+
+
  /***** Generic matching interface *****/

  // Type of match (TODO: Should be restructured as part of RE_Options)
@@ -611,7 +643,8 @@ class RE {

 private:

-  void Init(const char* pattern, const RE_Options* options);
+  void Init(const string& pattern, const RE_Options* options);
+  void Cleanup();

  // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
  // pairs of integers for the beginning and end positions of matched
@@ -655,11 +688,6 @@ class RE {
  pcre*         re_full_;       // For full matches
  pcre*         re_partial_;    // For partial matches
  const string* error_;         // Error indicator (or points to empty string)
-
-  // Don't allow the default copy or assignment constructors --
-  // they're expensive and too easy to do by accident.
-  RE(const RE&);
-  void operator=(const RE&);
 };

 }   // namespace pcrecpp
--- a/ext/pcre/pcrelib/pcrecpp_unittest.cc
+++ b/ext/pcre/pcrelib/pcrecpp_unittest.cc
@@ -1,4 +1,6 @@
-// Copyright (c) 2005, Google Inc.
+// -*- coding: utf-8 -*-
+//
+// Copyright (c) 2005 - 2006, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -445,6 +447,80 @@ static void TestRecursion() {
  CHECK(re4.FullMatch(text_bad) == false);
 }

+// A meta-quoted string, interpreted as a pattern, should always match
+// the original unquoted string.
+static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
+  string quoted = RE::QuoteMeta(unquoted);
+  RE re(quoted, options);
+  CHECK(re.FullMatch(unquoted));
+}
+
+// A string containing meaningful regexp characters, which is then meta-
+// quoted, should not generally match a string the unquoted string does.
+static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
+                                  RE_Options options = RE_Options()) {
+  string quoted = RE::QuoteMeta(unquoted);
+  RE re(quoted, options);
+  CHECK(!re.FullMatch(should_not_match));
+}
+
+// Tests that quoted meta characters match their original strings,
+// and that a few things that shouldn't match indeed do not.
+static void TestQuotaMetaSimple() {
+  TestQuoteMeta("foo");
+  TestQuoteMeta("foo.bar");
+  TestQuoteMeta("foo\\.bar");
+  TestQuoteMeta("[1-9]");
+  TestQuoteMeta("1.5-2.0?");
+  TestQuoteMeta("\\d");
+  TestQuoteMeta("Who doesn't like ice cream?");
+  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
+  TestQuoteMeta("((?!)xxx).*yyy");
+  TestQuoteMeta("([");
+}
+
+static void TestQuoteMetaSimpleNegative() {
+  NegativeTestQuoteMeta("foo", "bar");
+  NegativeTestQuoteMeta("...", "bar");
+  NegativeTestQuoteMeta("\\.", ".");
+  NegativeTestQuoteMeta("\\.", "..");
+  NegativeTestQuoteMeta("(a)", "a");
+  NegativeTestQuoteMeta("(a|b)", "a");
+  NegativeTestQuoteMeta("(a|b)", "(a)");
+  NegativeTestQuoteMeta("(a|b)", "a|b");
+  NegativeTestQuoteMeta("[0-9]", "0");
+  NegativeTestQuoteMeta("[0-9]", "0-9");
+  NegativeTestQuoteMeta("[0-9]", "[9]");
+  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
+}
+
+static void TestQuoteMetaLatin1() {
+  TestQuoteMeta("3\xb2 = 9");
+}
+
+static void TestQuoteMetaUtf8() {
+#ifdef SUPPORT_UTF8
+  TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
+  TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
+  TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
+  TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
+  TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
+  TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
+  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
+  NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
+                        "27\\\xc2\\\xb0",
+                        pcrecpp::UTF8());
+#endif
+}
+
+static void TestQuoteMetaAll() {
+  printf("Testing QuoteMeta\n");
+  TestQuotaMetaSimple();
+  TestQuoteMetaSimpleNegative();
+  TestQuoteMetaLatin1();
+  TestQuoteMetaUtf8();
+}
+
 //
 // Options tests contributed by
 // Giuseppe Maxia, CTO, Stardata s.r.l.
@@ -667,6 +743,35 @@ static void TestOptions() {
  Test_all_options();
 }

+static void TestConstructors() {
+  printf("Testing constructors\n");
+
+  RE_Options options;
+  options.set_dotall(true);
+  const char *str = "HELLO\n" "cruel\n" "world";
+
+  RE orig("HELLO.*world", options);
+  CHECK(orig.FullMatch(str));
+
+  RE copy1(orig);
+  CHECK(copy1.FullMatch(str));
+
+  RE copy2("not a match");
+  CHECK(!copy2.FullMatch(str));
+  copy2 = copy1;
+  CHECK(copy2.FullMatch(str));
+  copy2 = orig;
+  CHECK(copy2.FullMatch(str));
+
+  // Make sure when we assign to ourselves, nothing bad happens
+  orig = orig;
+  copy1 = copy1;
+  copy2 = copy2;
+  CHECK(orig.FullMatch(str));
+  CHECK(copy1.FullMatch(str));
+  CHECK(copy2.FullMatch(str));
+}
+
 int main(int argc, char** argv) {
  // Treat any flag as --help
  if (argc > 1 && argv[1][0] == '-') {
@@ -985,11 +1090,14 @@ int main(int argc, char** argv) {
  CHECK(RE("h.*o").PartialMatch("hello!"));
  CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));

+  /***** other tests *****/
+
  RadixTests();
  TestReplace();
  TestExtract();
  TestConsume();
  TestFindAndConsume();
+  TestQuoteMetaAll();
  TestMatchNumberPeculiarity();

  // Check the pattern() accessor
@@ -1109,6 +1217,9 @@ int main(int argc, char** argv) {
    VERBOSE_TEST  = true;
  TestOptions();

+  // Test the constructors
+  TestConstructors();
+
  // Done
  printf("OK\n");

--- a/ext/pcre/pcrelib/pcregrep.c
+++ b/ext/pcre/pcrelib/pcregrep.c
@@ -6,7 +6,7 @@
 its pattern matching. On a Unix or Win32 system it can recurse into
 directories.

-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.

 typedef int BOOL;

-#define VERSION "4.3 01-Jun-2006"
+#define VERSION "4.4 29-Nov-2006"
 #define MAX_PATTERN_COUNT 100

 #if BUFSIZ > 8192
@@ -65,7 +65,6 @@ typedef int BOOL;
 #define MBUFTHIRD 8192
 #endif

-
 /* Values for the "filenames" variable, which specifies options for file name
 output. The order is important; it is assumed that a file name is wanted for
 all values greater than FN_DEFAULT. */
@@ -83,6 +82,10 @@ enum { DEE_READ, DEE_SKIP };
 #define PO_LINE_MATCH     0x0002
 #define PO_FIXED_STRINGS  0x0004

+/* Line ending types */
+
+enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
+


 /*************************************************
@@ -100,8 +103,7 @@ static const char *jfriedl_prefix = "";
 static const char *jfriedl_postfix = "";
 #endif

-static int  endlinebyte = '\n';     /* Last byte of endline sequence */
-static int  endlineextra = 0;       /* Extra bytes for endline sequence */
+static int  endlinetype;

 static char *colour_string = (char *)"1;31";
 static char *colour_option = NULL;
@@ -142,6 +144,7 @@ static BOOL number = FALSE;
 static BOOL only_matching = FALSE;
 static BOOL quiet = FALSE;
 static BOOL silent = FALSE;
+static BOOL utf8 = FALSE;

 /* Structure for options and list of them */

@@ -219,6 +222,16 @@ static const char *prefix[] = {
 static const char *suffix[] = {
  "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };

+/* UTF-8 tables - used only when the newline setting is "all". */
+
+const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+const char utf8_table4[] = {
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+


 /*************************************************
@@ -470,6 +483,216 @@ return sys_errlist[n];



+/*************************************************
+*             Find end of line                   *
+*************************************************/
+
+/* The length of the endline sequence that is found is set via lenptr. This may
+be zero at the very end of the file if there is no line-ending sequence there.
+
+Arguments:
+  p         current position in line
+  endptr    end of available data
+  lenptr    where to put the length of the eol sequence
+
+Returns:    pointer to the last byte of the line
+*/
+
+static char *
+end_of_line(char *p, char *endptr, int *lenptr)
+{
+switch(endlinetype)
+  {
+  default:      /* Just in case */
+  case EL_LF:
+  while (p < endptr && *p != '\n') p++;
+  if (p < endptr)
+    {
+    *lenptr = 1;
+    return p + 1;
+    }
+  *lenptr = 0;
+  return endptr;
+
+  case EL_CR:
+  while (p < endptr && *p != '\r') p++;
+  if (p < endptr)
+    {
+    *lenptr = 1;
+    return p + 1;
+    }
+  *lenptr = 0;
+  return endptr;
+
+  case EL_CRLF:
+  for (;;)
+    {
+    while (p < endptr && *p != '\r') p++;
+    if (++p >= endptr)
+      {
+      *lenptr = 0;
+      return endptr;
+      }
+    if (*p == '\n')
+      {
+      *lenptr = 2;
+      return p + 1;
+      }
+    }
+  break;
+
+  case EL_ANY:
+  while (p < endptr)
+    {
+    int extra = 0;
+    register int c = *((unsigned char *)p);
+
+    if (utf8 && c >= 0xc0)
+      {
+      int gcii, gcss;
+      extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
+      gcss = 6*extra;
+      c = (c & utf8_table3[extra]) << gcss;
+      for (gcii = 1; gcii <= extra; gcii++)
+        {
+        gcss -= 6;
+        c |= (p[gcii] & 0x3f) << gcss;
+        }
+      }
+
+    p += 1 + extra;
+
+    switch (c)
+      {
+      case 0x0a:    /* LF */
+      case 0x0b:    /* VT */
+      case 0x0c:    /* FF */
+      *lenptr = 1;
+      return p;
+
+      case 0x0d:    /* CR */
+      if (p < endptr && *p == 0x0a)
+        {
+        *lenptr = 2;
+        p++;
+        }
+      else *lenptr = 1;
+      return p;
+
+      case 0x85:    /* NEL */
+      *lenptr = utf8? 2 : 1;
+      return p;
+
+      case 0x2028:  /* LS */
+      case 0x2029:  /* PS */
+      *lenptr = 3;
+      return p;
+
+      default:
+      break;
+      }
+    }   /* End of loop for ANY case */
+
+  *lenptr = 0;  /* Must have hit the end */
+  return endptr;
+  }     /* End of overall switch */
+}
+
+
+
+/*************************************************
+*         Find start of previous line            *
+*************************************************/
+
+/* This is called when looking back for before lines to print.
+
+Arguments:
+  p         start of the subsequent line
+  startptr  start of available data
+
+Returns:    pointer to the start of the previous line
+*/
+
+static char *
+previous_line(char *p, char *startptr)
+{
+switch(endlinetype)
+  {
+  default:      /* Just in case */
+  case EL_LF:
+  p--;
+  while (p > startptr && p[-1] != '\n') p--;
+  return p;
+
+  case EL_CR:
+  p--;
+  while (p > startptr && p[-1] != '\n') p--;
+  return p;
+
+  case EL_CRLF:
+  for (;;)
+    {
+    p -= 2;
+    while (p > startptr && p[-1] != '\n') p--;
+    if (p <= startptr + 1 || p[-2] == '\r') return p;
+    }
+  return p;   /* But control should never get here */
+
+  case EL_ANY:
+  if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
+  if (utf8) while ((*p & 0xc0) == 0x80) p--;
+
+  while (p > startptr)
+    {
+    register int c;
+    char *pp = p - 1;
+
+    if (utf8)
+      {
+      int extra = 0;
+      while ((*pp & 0xc0) == 0x80) pp--;
+      c = *((unsigned char *)pp);
+      if (c >= 0xc0)
+        {
+        int gcii, gcss;
+        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
+        gcss = 6*extra;
+        c = (c & utf8_table3[extra]) << gcss;
+        for (gcii = 1; gcii <= extra; gcii++)
+          {
+          gcss -= 6;
+          c |= (pp[gcii] & 0x3f) << gcss;
+          }
+        }
+      }
+    else c = *((unsigned char *)pp);
+
+    switch (c)
+      {
+      case 0x0a:    /* LF */
+      case 0x0b:    /* VT */
+      case 0x0c:    /* FF */
+      case 0x0d:    /* CR */
+      case 0x85:    /* NEL */
+      case 0x2028:  /* LS */
+      case 0x2029:  /* PS */
+      return p;
+
+      default:
+      break;
+      }
+
+    p = pp;  /* Back one character */
+    }        /* End of loop for ANY case */
+
+  return startptr;  /* Hit start of data */
+  }     /* End of overall switch */
+}
+
+
+
+
+
 /*************************************************
 *       Print the previous "after" lines         *
 *************************************************/
@@ -495,13 +718,13 @@ if (after_context > 0 && lastmatchnumber > 0)
  int count = 0;
  while (lastmatchrestart < endptr && count++ < after_context)
    {
+    int ellength;
    char *pp = lastmatchrestart;
    if (printname != NULL) fprintf(stdout, "%s-", printname);
    if (number) fprintf(stdout, "%d-", lastmatchnumber++);
-    while (*pp != endlinebyte) pp++;
-    fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
-      stdout);
-    lastmatchrestart = pp + 1;
+    pp = end_of_line(pp, endptr, &ellength);
+    fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
+    lastmatchrestart = pp;
    }
  hyphenpending = TRUE;
  }
@@ -558,7 +781,7 @@ way, the buffer is shifted left and re-filled. */

 while (ptr < endptr)
  {
-  int i;
+  int i, endlinelength;
  int mrc = 0;
  BOOL match = FALSE;
  char *t = ptr;
@@ -571,11 +794,10 @@ while (ptr < endptr)
  line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
  that any match is constrained to be in the first line. */

-  linelength = 0;
-  while (t < endptr && *t++ != endlinebyte) linelength++;
+  t = end_of_line(t, endptr, &endlinelength);
+  linelength = t - ptr - endlinelength;
  length = multiline? endptr - ptr : linelength;

-
  /* Extra processing for Jeffrey Friedl's debugging. */

 #ifdef JFRIEDL_DEBUG
@@ -706,13 +928,13 @@ while (ptr < endptr)

      if (after_context > 0 && lastmatchnumber > 0)
        {
+        int ellength;
        int linecount = 0;
        char *p = lastmatchrestart;

        while (p < ptr && linecount < after_context)
          {
-          while (*p != endlinebyte) p++;
-          p++;
+          p = end_of_line(p, ptr, &ellength);
          linecount++;
          }

@@ -725,10 +947,9 @@ while (ptr < endptr)
          char *pp = lastmatchrestart;
          if (printname != NULL) fprintf(stdout, "%s-", printname);
          if (number) fprintf(stdout, "%d-", lastmatchnumber++);
-          while (*pp != endlinebyte) pp++;
-          fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
-            (1 + endlineextra), stdout);
-          lastmatchrestart = pp + 1;
+          pp = end_of_line(pp, endptr, &ellength);
+          fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
+          lastmatchrestart = pp;
          }
        if (lastmatchrestart != ptr) hyphenpending = TRUE;
        }
@@ -754,8 +975,7 @@ while (ptr < endptr)
               linecount < before_context)
          {
          linecount++;
-          p--;
-          while (p > buffer && p[-1] != endlinebyte) p--;
+          p = previous_line(p, buffer);
          }

        if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
@@ -763,12 +983,13 @@ while (ptr < endptr)

        while (p < ptr)
          {
+          int ellength;
          char *pp = p;
          if (printname != NULL) fprintf(stdout, "%s-", printname);
          if (number) fprintf(stdout, "%d-", linenumber - linecount--);
-          while (*pp != endlinebyte) pp++;
-          fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
-          p = pp + 1;
+          pp = end_of_line(pp, endptr, &ellength);
+          fwrite(p, 1, pp - p, stdout);
+          p = pp;
          }
        }

@@ -788,11 +1009,16 @@ while (ptr < endptr)

      if (multiline)
        {
+        int ellength;
        char *endmatch = ptr + offsets[1];
        t = ptr;
-        while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
-        while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
-        linelength = endmatch - ptr;
+        while (t < endmatch)
+          {
+          t = end_of_line(t, endptr, &ellength);
+          if (t <= endmatch) linenumber++; else break;
+          }
+        endmatch = end_of_line(endmatch, endptr, &ellength);
+        linelength = endmatch - ptr - ellength;
        }

      /*** NOTE: Use only fwrite() to output the data line, so that binary
@@ -824,9 +1050,7 @@ while (ptr < endptr)
        fprintf(stdout, "%c[00m", 0x1b);
        fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
        }
-      else fwrite(ptr, 1, linelength, stdout);
-
-      fprintf(stdout, "\n");
+      else fwrite(ptr, 1, linelength + endlinelength, stdout);
      }

    /* End of doing what has to be done for a match */
@@ -836,13 +1060,13 @@ while (ptr < endptr)
    /* Remember where the last match happened for after_context. We remember
    where we are about to restart, and that line's number. */

-    lastmatchrestart = ptr + linelength + 1;
+    lastmatchrestart = ptr + linelength + endlinelength;
    lastmatchnumber = linenumber + 1;
    }

  /* Advance to after the newline and increment the line number. */

-  ptr += linelength + 1;
+  ptr += linelength + endlinelength;
  linenumber++;

  /* If we haven't yet reached the end of the file (the buffer is full), and
@@ -1098,7 +1322,7 @@ switch(letter)
  case 'q': quiet = TRUE; break;
  case 'r': dee_action = dee_RECURSE; break;
  case 's': silent = TRUE; break;
-  case 'u': options |= PCRE_UTF8; break;
+  case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
  case 'v': invert = TRUE; break;
  case 'w': process_options |= PO_WORD_MATCH; break;
  case 'x': process_options |= PO_LINE_MATCH; break;
@@ -1231,14 +1455,16 @@ compile_pattern(char *pattern, int options, char *filename, int count)
 {
 if ((process_options & PO_FIXED_STRINGS) != 0)
  {
+  char *eop = pattern + strlen(pattern);
  char buffer[MBUFTHIRD];
  for(;;)
    {
-    char *p = strchr(pattern, endlinebyte);
-    if (p == NULL)
+    int ellength;
+    char *p = end_of_line(pattern, eop, &ellength);
+    if (ellength == 0)
      return compile_single_pattern(pattern, options, filename, count);
-    sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
-    pattern = p + 1;
+    sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
+    pattern = p;
    if (!compile_single_pattern(buffer, options, filename, count))
      return FALSE;
    }
@@ -1267,7 +1493,9 @@ char *patterns[MAX_PATTERN_COUNT];
 const char *locale_from = "--locale";
 const char *error;

-/* Set the default line ending value from the default in the PCRE library. */
+/* Set the default line ending value from the default in the PCRE library;
+"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
+*/

 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
 switch(i)
@@ -1275,6 +1503,7 @@ switch(i)
  default:                 newline = (char *)"lf"; break;
  case '\r':               newline = (char *)"cr"; break;
  case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
+  case -1:                 newline = (char *)"any"; break;
  }

 /* Process the options */
@@ -1565,16 +1794,22 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
  {
  pcre_options |= PCRE_NEWLINE_CR;
-  endlinebyte = '\r';
+  endlinetype = EL_CR;
  }
 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
  {
  pcre_options |= PCRE_NEWLINE_LF;
+  endlinetype = EL_LF;
  }
 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
  {
  pcre_options |= PCRE_NEWLINE_CRLF;
-  endlineextra = 1;
+  endlinetype = EL_CRLF;
+  }
+else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
+  {
+  pcre_options |= PCRE_NEWLINE_ANY;
+  endlinetype = EL_ANY;
  }
 else
  {
--- a/ext/pcre/pcrelib/pcreposix.c
+++ b/ext/pcre/pcrelib/pcreposix.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.

                       Written by Philip Hazel
-           Copyright (c) 1997-2007 University of Cambridge
+           Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -78,7 +78,7 @@ static const int eint[] = {
  REG_BADPAT,  /* unrecognized character after (?< */
  REG_BADPAT,  /* lookbehind assertion is not fixed length */
  REG_BADPAT,  /* malformed number or name after (?( */
-  REG_BADPAT,  /* conditional group containe more than two branches */
+  REG_BADPAT,  /* conditional group contains more than two branches */
  REG_BADPAT,  /* assertion expected after (?( */
  REG_BADPAT,  /* (?R or (?digits must be followed by ) */
  REG_ECTYPE,  /* unknown POSIX class name */
@@ -93,7 +93,7 @@ static const int eint[] = {
  REG_BADPAT,  /* closing ) for (?C expected */
  REG_BADPAT,  /* recursive call could loop indefinitely */
  REG_BADPAT,  /* unrecognized character after (?P */
-  REG_BADPAT,  /* syntax error after (?P */
+  REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */
  REG_BADPAT,  /* two named subpatterns have the same name */
  REG_BADPAT,  /* invalid UTF-8 string */
  REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
@@ -102,7 +102,13 @@ static const int eint[] = {
  REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
  REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
  REG_BADPAT,  /* repeated subpattern is too long */
-  REG_BADPAT   /* octal value is greater than \377 (not in UTF-8 mode) */
+  REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */
+  REG_BADPAT,  /* internal error: overran compiling workspace */
+  REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */
+  REG_BADPAT,  /* DEFINE group contains more than one branch */
+  REG_BADPAT,  /* repeating a DEFINE group is not allowed */
+  REG_INVARG,  /* inconsistent NEWLINE options */
+  REG_BADPAT   /* \g is not followed followed by an (optionally braced) non-zero number */
 };

 /* Table of texts corresponding to POSIX error codes */
--- a/ext/pcre/pcrelib/pcreposix.h
+++ b/ext/pcre/pcrelib/pcreposix.h
@@ -9,7 +9,7 @@
 Compatible Regular Expression library. It defines the things POSIX says should
 be there. I hope.

-            Copyright (c) 1997-2007 University of Cambridge
+            Copyright (c) 1997-2006 University of Cambridge

 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
--- a/ext/pcre/pcrelib/pcretest.c
+++ b/ext/pcre/pcrelib/pcretest.c
@@ -44,10 +44,29 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <locale.h>
 #include <errno.h>

-#ifndef _WIN32
-#include <sys/resource.h>
+
+/* A number of things vary for Windows builds. Originally, pcretest opened its
+input and output without "b"; then I was told that "b" was needed in some
+environments, so it was added for release 5.0 to both the input and output. (It
+makes no difference on Unix-like systems.) Later I was told that it is wrong
+for the input on Windows. I've now abstracted the modes into two macros that
+are set here, to make it easier to fiddle with them, and removed "b" from the
+input mode under Windows. */
+
+#if defined(_WIN32) || defined(WIN32)
+#include <io.h>                /* For _setmode() */
+#include <fcntl.h>             /* For _O_BINARY */
+#define INPUT_MODE   "r"
+#define OUTPUT_MODE  "wb"
+
+#else
+#include <sys/time.h>          /* These two includes are needed */
+#include <sys/resource.h>      /* for setrlimit(). */
+#define INPUT_MODE   "rb"
+#define OUTPUT_MODE  "wb"
 #endif

+
 #define PCRE_SPY        /* For Win32 build, import data, not export */

 /* We include pcre_internal.h because we need the internal info for displaying
@@ -74,10 +93,18 @@ symbols to prevent clashes. */

 /* We also need the pcre_printint() function for printing out compiled
 patterns. This function is in a separate file so that it can be included in
-pcre_compile.c when that module is compiled with debugging enabled. */
+pcre_compile.c when that module is compiled with debugging enabled.
+
+The definition of the macro PRINTABLE, which determines whether to print an
+output character as-is or as a hex value when showing compiled patterns, is
+contained in this file. We uses it here also, in cases when the locale has not
+been explicitly changed, so as to get consistent output from systems that
+differ in their output from isprint() even in the "C" locale. */

 #include "pcre_printint.src"

+#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
+

 /* It is possible to compile this test program without including support for
 testing the POSIX interface, though this is not available via the standard
@@ -103,6 +130,8 @@ function (define NOINFOCHECK). */
 #endif
 #endif

+/* This is the default loop count for timing. */
+
 #define LOOPREPEAT 500000

 /* Static variables */
@@ -114,6 +143,7 @@ static int callout_extra;
 static int callout_fail_count;
 static int callout_fail_id;
 static int first_callout;
+static int locale_set = 0;
 static int show_malloc;
 static int use_utf8;
 static size_t gotten_store;
@@ -157,6 +187,7 @@ uschar *here = start;
 for (;;)
  {
  int rlen = buffer_size - (here - buffer);
+
  if (rlen > 1000)
    {
    int dlen;
@@ -213,7 +244,7 @@ return NULL;  /* Control never gets here */

 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
 around with conditional compilation, just do the job by hand. It is only used
-for unpicking the -o argument, so just keep it simple.
+for unpicking arguments, so just keep it simple.

 Arguments:
  str           string to be converted
@@ -311,6 +342,8 @@ Arguments:
 Returns:     number of characters placed in the buffer
 */

+#if !defined NOUTF8
+
 static int
 ord2utf8(int cvalue, uschar *utf8bytes)
 {
@@ -327,6 +360,8 @@ for (j = i; j > 0; j--)
 return i + 1;
 }

+#endif
+


 /*************************************************
@@ -353,16 +388,19 @@ while (length-- > 0)
      {
      length -= rc - 1;
      p += rc;
-      if (c < 256 && isprint(c))
+      if (PRINTHEX(c))
        {
        if (f != NULL) fprintf(f, "%c", c);
        yield++;
        }
      else
        {
-        int n;
-        if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
-        yield += n;
+        int n = 4;
+        if (f != NULL) fprintf(f, "\\x{%02x}", c);
+        yield += (n <= 0x000000ff)? 2 :
+                 (n <= 0x00000fff)? 3 :
+                 (n <= 0x0000ffff)? 4 :
+                 (n <= 0x000fffff)? 5 : 6;
        }
      continue;
      }
@@ -371,7 +409,8 @@ while (length-- > 0)

   /* Not UTF-8, or malformed UTF-8  */

-  if (isprint(c = *(p++)))
+  c = *p++;
+  if (PRINTHEX(c))
    {
    if (f != NULL) fprintf(f, "%c", c);
    yield++;
@@ -614,7 +653,7 @@ return count;
 *************************************************/

 /* This is used both at compile and run-time to check for <xxx> escapes, where
-xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
+xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.

 Arguments:
  p           points after the leading '<'
@@ -629,12 +668,45 @@ check_newline(uschar *p, FILE *f)
 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
+if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
 fprintf(f, "Unknown newline type at: <%s\n", p);
 return 0;
 }



+/*************************************************
+*             Usage function                     *
+*************************************************/
+
+static void
+usage(void)
+{
+printf("Usage:     pcretest [options] [<input> [<output>]]\n");
+printf("  -b       show compiled code (bytecode)\n");
+printf("  -C       show PCRE compile-time options and exit\n");
+printf("  -d       debug: show compiled code and information (-b and -i)\n");
+#if !defined NODFA
+printf("  -dfa     force DFA matching for all subjects\n");
+#endif
+printf("  -help    show usage information\n");
+printf("  -i       show information about compiled patterns\n"
+       "  -m       output memory used information\n"
+       "  -o <n>   set size of offsets vector to <n>\n");
+#if !defined NOPOSIX
+printf("  -p       use POSIX interface\n");
+#endif
+printf("  -q       quiet: do not output PCRE version number at start\n");
+printf("  -S <n>   set stack size to <n> megabytes\n");
+printf("  -s       output store (memory) used information\n"
+       "  -t       time compilation and execution\n");
+printf("  -t <n>   time compilation and execution, repeating <n> times\n");
+printf("  -tm      time execution (matching) only\n");
+printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
+}
+
+
+
 /*************************************************
 *                Main Program                    *
 *************************************************/
@@ -650,6 +722,7 @@ int options = 0;
 int study_options = 0;
 int op = 1;
 int timeit = 0;
+int timeitm = 0;
 int showinfo = 0;
 int showstore = 0;
 int quiet = 0;
@@ -681,16 +754,19 @@ buffer = (unsigned char *)malloc(buffer_size);
 dbuffer = (unsigned char *)malloc(buffer_size);
 pbuffer = (unsigned char *)malloc(buffer_size);

-/* The outfile variable is static so that new_malloc can use it. The _setmode()
-stuff is some magic that I don't understand, but which apparently does good
-things in Windows. It's related to line terminations.  */
-
-#if defined(_WIN32) || defined(WIN32)
-_setmode( _fileno( stdout ), 0x8000 );
-#endif  /* defined(_WIN32) || defined(WIN32) */
+/* The outfile variable is static so that new_malloc can use it. */

 outfile = stdout;

+/* The following  _setmode() stuff is some Windows magic that tells its runtime
+library to translate CRLF into a single LF character. At least, that's what
+I've been told: never having used Windows I take this all on trust. Originally
+it set 0x8000, but then I was advised that _O_BINARY was better. */
+
+#if defined(_WIN32) || defined(WIN32)
+_setmode( _fileno( stdout ), _O_BINARY );
+#endif
+
 /* Scan options */

 while (argc > 1 && argv[op][0] == '-')
@@ -699,8 +775,8 @@ while (argc > 1 && argv[op][0] == '-')

  if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
    showstore = 1;
-  else if (strcmp(argv[op], "-t") == 0) timeit = 1;
  else if (strcmp(argv[op], "-q") == 0) quiet = 1;
+  else if (strcmp(argv[op], "-b") == 0) debug = 1;
  else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
  else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
 #if !defined NODFA
@@ -713,11 +789,25 @@ while (argc > 1 && argv[op][0] == '-')
    op++;
    argc--;
    }
+  else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
+    {
+    int both = argv[op][2] == 0;
+    int temp;
+    if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
+                     *endptr == 0))
+      {
+      timeitm = temp;
+      op++;
+      argc--;
+      }
+    else timeitm = LOOPREPEAT;
+    if (both) timeit = timeitm;
+    }
  else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
      ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
        *endptr == 0))
    {
-#ifdef _WIN32
+#if defined(_WIN32) || defined(WIN32)
    printf("PCRE: -S not supported on this OS\n");
    exit(1);
 #else
@@ -749,7 +839,8 @@ while (argc > 1 && argv[op][0] == '-')
    printf("  %sUnicode properties support\n", rc? "" : "No ");
    (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
    printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
-      (rc == '\n')? "LF" : "CRLF");
+      (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
+      (rc == -1)? "ANY" : "???");
    (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
    printf("  Internal link size = %d\n", rc);
    (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
@@ -762,24 +853,16 @@ while (argc > 1 && argv[op][0] == '-')
    printf("  Match recursion uses %s\n", rc? "stack" : "heap");
    exit(0);
    }
+  else if (strcmp(argv[op], "-help") == 0 ||
+           strcmp(argv[op], "--help") == 0)
+    {
+    usage();
+    goto EXIT;
+    }
  else
    {
    printf("** Unknown or malformed option %s\n", argv[op]);
-    printf("Usage:   pcretest [options] [<input> [<output>]]\n");
-    printf("  -C     show PCRE compile-time options and exit\n");
-    printf("  -d     debug: show compiled code; implies -i\n");
-#if !defined NODFA
-    printf("  -dfa   force DFA matching for all subjects\n");
-#endif
-    printf("  -i     show information about compiled pattern\n"
-           "  -m     output memory used information\n"
-           "  -o <n> set size of offsets vector to <n>\n");
-#if !defined NOPOSIX
-    printf("  -p     use POSIX interface\n");
-#endif
-    printf("  -S <n> set stack size to <n> megabytes\n");
-    printf("  -s     output store (memory) used information\n"
-           "  -t     time compilation and execution\n");
+    usage();
    yield = 1;
    goto EXIT;
    }
@@ -803,7 +886,7 @@ if (offsets == NULL)

 if (argc > 1)
  {
-  infile = fopen(argv[op], "rb");
+  infile = fopen(argv[op], INPUT_MODE);
  if (infile == NULL)
    {
    printf("** Failed to open %s\n", argv[op]);
@@ -814,7 +897,7 @@ if (argc > 1)

 if (argc > 2)
  {
-  outfile = fopen(argv[op+1], "wb");
+  outfile = fopen(argv[op+1], OUTPUT_MODE);
  if (outfile == NULL)
    {
    printf("** Failed to open %s\n", argv[op+1]);
@@ -859,7 +942,7 @@ while (!done)
  int do_showinfo = showinfo;
  int do_showrest = 0;
  int do_flip = 0;
-  int erroroffset, len, delimiter;
+  int erroroffset, len, delimiter, poffset;

  use_utf8 = 0;

@@ -969,6 +1052,7 @@ while (!done)
    }

  pp = p;
+  poffset = p - buffer;

  for(;;)
    {
@@ -989,6 +1073,11 @@ while (!done)
    if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
    }

+  /* The buffer may have moved while being extended; reset the start of data
+  pointer to the correct relative point in the buffer. */
+
+  p = buffer + poffset;
+
  /* If the first character after the delimiter is backslash, make
  the pattern end with backslash. This is purely to provide a way
  of testing for the error message when a pattern ends with backslash. */
@@ -1020,6 +1109,7 @@ while (!done)

      case '+': do_showrest = 1; break;
      case 'A': options |= PCRE_ANCHORED; break;
+      case 'B': do_debug = 1; break;
      case 'C': options |= PCRE_AUTO_CALLOUT; break;
      case 'D': do_debug = do_showinfo = 1; break;
      case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
@@ -1042,14 +1132,16 @@ while (!done)

      case 'L':
      ppp = pp;
-      /* The '\r' test here is so that it works on Windows */
-      while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
+      /* The '\r' test here is so that it works on Windows. */
+      /* The '0' test is just in case this is an unterminated line. */
+      while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
      *ppp = 0;
      if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
        {
        fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
        goto SKIP_DATA;
        }
+      locale_set = 1;
      tables = pcre_maketables();
      pp = ppp;
      break;
@@ -1116,19 +1208,19 @@ while (!done)
 #endif  /* !defined NOPOSIX */

    {
-    if (timeit)
+    if (timeit > 0)
      {
      register int i;
      clock_t time_taken;
      clock_t start_time = clock();
-      for (i = 0; i < LOOPREPEAT; i++)
+      for (i = 0; i < timeit; i++)
        {
        re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
        if (re != NULL) free(re);
        }
      time_taken = clock() - start_time;
-      fprintf(outfile, "Compile time %.3f milliseconds\n",
-        (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
+      fprintf(outfile, "Compile time %.4f milliseconds\n",
+        (((double)time_taken * 1000.0) / (double)timeit) /
          (double)CLOCKS_PER_SEC);
      }

@@ -1180,17 +1272,17 @@ while (!done)

    if (do_study)
      {
-      if (timeit)
+      if (timeit > 0)
        {
        register int i;
        clock_t time_taken;
        clock_t start_time = clock();
-        for (i = 0; i < LOOPREPEAT; i++)
+        for (i = 0; i < timeit; i++)
          extra = pcre_study(re, study_options, &error);
        time_taken = clock() - start_time;
        if (extra != NULL) free(extra);
-        fprintf(outfile, "  Study time %.3f milliseconds\n",
-          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
+        fprintf(outfile, "  Study time %.4f milliseconds\n",
+          (((double)time_taken * 1000.0) / (double)timeit) /
            (double)CLOCKS_PER_SEC);
        }
      extra = pcre_study(re, study_options, &error);
@@ -1233,6 +1325,12 @@ while (!done)

    SHOW_INFO:

+    if (do_debug)
+      {
+      fprintf(outfile, "------------------------------------------------------------------\n");
+      pcre_printint(re, outfile);
+      }
+
    if (do_showinfo)
      {
      unsigned long int get_options, all_options;
@@ -1243,12 +1341,6 @@ while (!done)
      int nameentrysize, namecount;
      const uschar *nametable;

-      if (do_debug)
-        {
-        fprintf(outfile, "------------------------------------------------------------------\n");
-        pcre_printint(re, outfile);
-        }
-
      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
      new_info(re, NULL, PCRE_INFO_SIZE, &size);
      new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
@@ -1327,7 +1419,7 @@ while (!done)
          ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
          ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");

-      switch (get_options & PCRE_NEWLINE_CRLF)
+      switch (get_options & PCRE_NEWLINE_BITS)
        {
        case PCRE_NEWLINE_CR:
        fprintf(outfile, "Forced newline sequence: CR\n");
@@ -1341,6 +1433,10 @@ while (!done)
        fprintf(outfile, "Forced newline sequence: CRLF\n");
        break;

+        case PCRE_NEWLINE_ANY:
+        fprintf(outfile, "Forced newline sequence: ANY\n");
+        break;
+
        default:
        break;
        }
@@ -1358,7 +1454,7 @@ while (!done)
        int ch = first_char & 255;
        const char *caseless = ((first_char & REQ_CASELESS) == 0)?
          "" : " (caseless)";
-        if (isprint(ch))
+        if (PRINTHEX(ch))
          fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
        else
          fprintf(outfile, "First char = %d%s\n", ch, caseless);
@@ -1373,7 +1469,7 @@ while (!done)
        int ch = need_char & 255;
        const char *caseless = ((need_char & REQ_CASELESS) == 0)?
          "" : " (caseless)";
-        if (isprint(ch))
+        if (PRINTHEX(ch))
          fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
        else
          fprintf(outfile, "Need char = %d%s\n", ch, caseless);
@@ -1409,7 +1505,7 @@ while (!done)
                  fprintf(outfile, "\n  ");
                  c = 2;
                  }
-                if (isprint(i) && i != ' ')
+                if (PRINTHEX(i) && i != ' ')
                  {
                  fprintf(outfile, "%c ", i);
                  c += 2;
@@ -1468,6 +1564,7 @@ while (!done)
                strerror(errno));
              }
            else fprintf(outfile, "Study data written to %s\n", to_file);
+
            }
          }
        fclose(f);
@@ -1866,7 +1963,7 @@ while (!done)

    for (;; gmatched++)    /* Loop for /g or /G */
      {
-      if (timeit)
+      if (timeitm > 0)
        {
        register int i;
        clock_t time_taken;
@@ -1876,7 +1973,7 @@ while (!done)
        if (all_use_dfa || use_dfa)
          {
          int workspace[1000];
-          for (i = 0; i < LOOPREPEAT; i++)
+          for (i = 0; i < timeitm; i++)
            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
              options | g_notempty, use_offsets, use_size_offsets, workspace,
              sizeof(workspace)/sizeof(int));
@@ -1884,13 +1981,13 @@ while (!done)
        else
 #endif

-        for (i = 0; i < LOOPREPEAT; i++)
+        for (i = 0; i < timeitm; i++)
          count = pcre_exec(re, extra, (char *)bptr, len,
            start_offset, options | g_notempty, use_offsets, use_size_offsets);

        time_taken = clock() - start_time;
-        fprintf(outfile, "Execute time %.3f milliseconds\n",
-          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
+        fprintf(outfile, "Execute time %.4f milliseconds\n",
+          (((double)time_taken * 1000.0) / (double)timeitm) /
            (double)CLOCKS_PER_SEC);
        }

@@ -1966,7 +2063,28 @@ while (!done)

      if (count >= 0)
        {
-        int i;
+        int i, maxcount;
+
+#if !defined NODFA
+        if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
+#endif
+          maxcount = use_size_offsets/3;
+
+        /* This is a check against a lunatic return value. */
+
+        if (count > maxcount)
+          {
+          fprintf(outfile,
+            "** PCRE error: returned count %d is too big for offset size %d\n",
+            count, use_size_offsets);
+          count = use_size_offsets/3;
+          if (do_g || do_G)
+            {
+            fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
+            do_g = do_G = FALSE;        /* Break g/G loop */
+            }
+          }
+
        for (i = 0; i < count * 2; i += 2)
          {
          if (use_offsets[i] < 0)
@@ -2165,6 +2283,7 @@ while (!done)
    {
    new_free((void *)tables);
    setlocale(LC_CTYPE, "C");
+    locale_set = 0;
    }
  }

--- a/ext/pcre/pcrelib/testdata/grepinput
+++ b/ext/pcre/pcrelib/testdata/grepinput
@@ -593,7 +593,8 @@ aaaaa2
 ffffffffff

 This is a line before the binary zero.
-This line contains a binary zero here >This is a line after the binary zero.
+This line contains a binary zero here >< for testing.
+This is a line after the binary zero.

 ABOVE the elephant 
 ABOVE
--- a/ext/pcre/pcrelib/testdata/grepinput8
+++ b/ext/pcre/pcrelib/testdata/grepinput8
@@ -0,0 +1,12 @@
+X one
+X twoX threeX four
+X five
+X six
+X sevenX eight X nine X ten
+
+Before 111
+Before 222 Before 333Match
+After 111
+After 222 After 333
+And so on and so on
+And so on and so on
--- a/ext/pcre/pcrelib/testdata/grepoutput
+++ b/ext/pcre/pcrelib/testdata/grepoutput
@@ -75,7 +75,14 @@ RC=1
 39:nineteen
 40:twenty
 41:
-42:This is the last line of this file.
+42:Here follows some CR/LF/CRLF test data.
+43:
+44:abc
+def
+45:ghi
+46:jkl
+47:
+48:This is the last line of this file.
 ---------------------------- Test 12 -----------------------------
 Pattern
 ---------------------------- Test 13 -----------------------------
@@ -157,7 +164,8 @@ eighteen
 nineteen
 twenty

-This is the last line of this file.
+Here follows some CR/LF/CRLF test data.
+
 ---------------------------- Test 25 -----------------------------
 15-
 16-complete pair
@@ -207,7 +215,8 @@ eighteen
 nineteen
 twenty

-This is the last line of this file.
+Here follows some CR/LF/CRLF test data.
+
 ---------------------------- Test 27 -----------------------------
 four
 five
@@ -227,7 +236,10 @@ eighteen
 nineteen
 twenty

-This is the last line of this file.
+Here follows some CR/LF/CRLF test data.
+
+abc
+def
 ---------------------------- Test 28 -----------------------------
 14-of lines all by themselves.
 15-
@@ -279,7 +291,12 @@ eighteen
 nineteen
 twenty

-This is the last line of this file.
+Here follows some CR/LF/CRLF test data.
+
+abc
+def
+ghi
+jkl
 ---------------------------- Test 30 -----------------------------
 ./testdata/grepinput-4-features should be added at the end, because some of the tests involve the
 ./testdata/grepinput-5-output of line numbers, and we don't want these to change.
@@ -329,6 +346,7 @@ RC=2
 RC=0
 ---------------------------- Test 36 -----------------------------
 ./testdata/grepinputx
+./testdata/grepinput8
 RC=0
 ---------------------------- Test 37 -----------------------------
 aaaaa0
@@ -342,10 +360,13 @@ pcregrep: check your regex for nested unlimited loops
 pcregrep: pcre_exec() error -8 while matching this line:
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 ---------------------------- Test 38 ------------------------------
-This line contains a binary zero here >---------------------------- Test 39 ------------------------------
+This line contains a binary zero here >< for testing.
+---------------------------- Test 39 ------------------------------
 This is a line before the binary zero.
-This line contains a binary zero here >---------------------------- Test 40 ------------------------------
-This line contains a binary zero here >This is a line after the binary zero.
+This line contains a binary zero here >< for testing.
+---------------------------- Test 40 ------------------------------
+This line contains a binary zero here >< for testing.
+This is a line after the binary zero.
 ---------------------------- Test 41 ------------------------------
 before the binary zero
 after the binary zero
@@ -378,3 +399,31 @@ ABOVE the elephant
 AB.VE
 AB.VE the turtle
 PUT NEW DATA ABOVE THIS LINE.
+---------------------------- Test 49 ------------------------------
+abc
+def
+ghi
+jkl
+---------------------------- Test 50 ------------------------------
+def
+---------------------------- Test 51 ------------------------------
+ghi
+jkl
+
+This is the last line of this file.
+---------------------------- Test 52 ------------------------------
+def
+ghi
+jkl
+
+This is the last line of this file.
+---------------------------- Test 53 ------------------------------
+ghi
+jkl
+
+This is the last line of this file.
+---------------------------- Test 54 ------------------------------
+44:abc
+45:def
+46:ghi
+47:jkl
--- a/ext/pcre/pcrelib/testdata/grepoutput8
+++ b/ext/pcre/pcrelib/testdata/grepoutput8
@@ -0,0 +1,11 @@
+---------------------------- Test U1 ------------------------------
+1:X one
+2:X two3:X three4:X four
+5:X five
+6:X six
+7:X seven8:X eight 9:X nine 10:X ten
+---------------------------- Test U2 ------------------------------
+12-Before 111
+13-Before 222 14-Before 33315:Match
+16-After 111
+17-After 222 18-After 333
--- a/ext/pcre/pcrelib/testdata/testinput1
+++ b/ext/pcre/pcrelib/testdata/testinput1
@@ -1297,8 +1297,7 @@
    abc

 /^a	b
-  
-    c/x
+      c/x
    abc

 /^(a|)\1*b/
@@ -1454,11 +1453,6 @@
 /{4,5a}bc/
    {4,5a}bc

-/^a.b/
-    a\rb
-    *** Failers
-    a\nb
-
 /abc$/
    abc
    abc\n
@@ -1500,8 +1494,8 @@
 /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
    abcdefghijk\12S

-/ab\gdef/
-    abgdef
+/ab\hdef/
+    abhdef

 /a{0}bc/
    bc
@@ -3382,9 +3376,14 @@
    cdaccb

 /^(?:a?b?)*$/
+    \
+    a
+    ab
+    aaa   
    *** Failers
    dbcb
    a--
+    aa-- 

 /((?s)^a(.))((?m)^b$)/
    a\nb\nc\n
@@ -3884,4 +3883,139 @@
    a,b]
    [a,b,c]  

+/(?-x: )/x
+    A\x20B
+    
+"(?x)(?-x: \s*#\s*)"
+    A # B
+    ** Failers
+    #  
+
+"(?x-is)(?:(?-ixs) \s*#\s*) include"
+    A #include
+    ** Failers
+    A#include  
+    A #Include
+
+/a*b*\w/
+    aaabbbb
+    aaaa
+    a
+
+/a*b?\w/
+    aaabbbb
+    aaaa
+    a
+
+/a*b{0,4}\w/
+    aaabbbb
+    aaaa
+    a
+
+/a*b{0,}\w/
+    aaabbbb
+    aaaa
+    a
+    
+/a*\d*\w/
+    0a
+    a 
+    
+/a*b *\w/x
+    a 
+
+/a*b#comment
+  *\w/x
+    a 
+
+/a* b *\w/x
+    a 
+
+/^\w+=.*(\\\n.*)*/
+    abc=xyz\\\npqr
+
+/(?=(\w+))\1:/
+    abcd:
+
+/^(?=(\w+))\1:/
+    abcd:
+
+/^\Eabc/
+    abc
+    
+/^[\Eabc]/
+    a
+    ** Failers 
+    E 
+    
+/^[a-\Ec]/
+    b
+    ** Failers
+    -
+    E    
+
+/^[a\E\E-\Ec]/
+    b
+    ** Failers
+    -
+    E    
+
+/^[\E\Qa\E-\Qz\E]+/
+    b
+    ** Failers
+    -  
+    
+/^[a\Q]bc\E]/
+    a
+    ]
+    c
+    
+/^[a-\Q\E]/
+    a
+    -     
+
+/^(a()*)*/
+    aaaa
+
+/^(?:a(?:(?:))*)*/
+    aaaa
+
+/^(a()+)+/
+    aaaa
+
+/^(?:a(?:(?:))+)+/
+    aaaa
+
+/(a){0,3}(?(1)b|(c|))*D/
+    abbD
+    ccccD
+    D  
+
+/(a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?>a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?:a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/\Z/g
+  abc\n
+  
+/^(?s)(?>.*)(?<!\n)/
+  abc
+  abc\n  
+
+/^(?![^\n]*\n\z)/
+  abc
+  abc\n 
+  
+/\z(?<!\n)/
+  abc
+  abc\n  
+
 / End of testinput1 /
--- a/ext/pcre/pcrelib/testdata/testinput2
+++ b/ext/pcre/pcrelib/testdata/testinput2
--- a/ext/pcre/pcrelib/testdata/testinput4
+++ b/ext/pcre/pcrelib/testdata/testinput4
@@ -520,4 +520,7 @@
    abcdefg
    ab

+/a*\x{100}*\w/8
+    a 
+
 / End of testinput4 /
--- a/ext/pcre/pcrelib/testdata/testinput5
+++ b/ext/pcre/pcrelib/testdata/testinput5
@@ -270,5 +270,89 @@
 /\777/8I
  \x{1ff}
  \777 
+  
+/\x{100}*\d/8D
+
+/\x{100}*\s/8D
+
+/\x{100}*\w/8D
+
+/\x{100}*\D/8D
+
+/\x{100}*\S/8D
+
+/\x{100}*\W/8D
+
+/\x{100}+\x{200}/8D
+
+/\x{100}+X/8D
+
+/X+\x{200}/8D
+
+/()()()()()()()()()()
+ ()()()()()()()()()()
+ ()()()()()()()()()()
+ ()()()()()()()()()()
+ A (x) (?41) B/8x
+    AxxB     
+
+/^[\x{100}\E-\Q\E\x{150}]/B8
+
+/^[\QÄ€\E-\QÅ<51>\E]/B8
+
+/^[\QÄ€\E-\QÅ<51>\E/B8
+
+/^abc./mgx8<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+
+/abc.$/mgx8<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
+
+/^a\Rb/8
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0cb
+    a\x{85}b   
+    a\x{2028}b 
+    a\x{2029}b 
+    ** Failers
+    a\n\rb    
+
+/^a\R*b/8
+    ab
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0c\x{2028}\x{2029}b
+    a\x{85}b   
+    a\n\rb    
+    a\n\r\x{85}\x0cb 
+
+/^a\R+b/8
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0c\x{2028}\x{2029}b
+    a\x{85}b   
+    a\n\rb    
+    a\n\r\x{85}\x0cb 
+    ** Failers
+    ab  
+
+/^a\R{1,3}b/8
+    a\nb
+    a\n\rb
+    a\n\r\x{85}b
+    a\r\n\r\nb 
+    a\r\n\r\n\r\nb 
+    a\n\r\n\rb
+    a\n\n\r\nb 
+    ** Failers
+    a\n\n\n\rb
+    a\r

 / End of testinput5 /
--- a/ext/pcre/pcrelib/testdata/testinput6
+++ b/ext/pcre/pcrelib/testdata/testinput6
@@ -747,4 +747,19 @@
 /([\pL]=(abc))*X/
    L=abcX

+/The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
+will match it only with UCP support, because without that it has no notion
+of case for anything other than the ASCII letters. / 
+
+/((?i)[\x{c0}])/8
+    \x{c0}
+    \x{e0} 
+
+/(?i:[\x{c0}])/8
+    \x{c0}
+    \x{e0} 
+    
+/^\p{Balinese}\p{Cuneiform}\p{Nko}\p{Phags_Pa}\p{Phoenician}/8
+    \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
+
 / End of testinput6 /
--- a/ext/pcre/pcrelib/testdata/testinput7
+++ b/ext/pcre/pcrelib/testdata/testinput7
@@ -1775,8 +1775,7 @@
    abc

 /^a	b
-  
-    c/x
+      c/x
    abc

 /ab{1,3}bc/
@@ -1889,7 +1888,7 @@
 /{4,5a}bc/
    {4,5a}bc

-/^a.b/
+/^a.b/<lf>
    a\rb
    *** Failers
    a\nb
@@ -1932,8 +1931,8 @@
 /(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\12\123/
    abcdefghijk\12S

-/ab\gdef/
-    abgdef
+/ab\hdef/
+    abhdef

 /a{0}bc/
    bc
@@ -4067,7 +4066,7 @@
    xyz\rabc\<crlf>
    xyz\rabc\<lf>
    
-/abc$/m
+/abc$/m<lf>
    xyzabc
    xyzabc\n 
    xyzabc\npqr 
@@ -4099,7 +4098,7 @@
    ** Failers  
    xyz\rabcdef
    
-/.*/
+/.*/<lf>
    abc\ndef
    abc\rdef
    abc\r\ndef
@@ -4115,4 +4114,119 @@
    abc\rdef
    abc\r\ndef

+/^\w+=.*(\\\n.*)*/
+    abc=xyz\\\npqr
+
+/^(a()*)*/
+    aaaa
+
+/^(?:a(?:(?:))*)*/
+    aaaa
+
+/^(a()+)+/
+    aaaa
+
+/^(?:a(?:(?:))+)+/
+    aaaa
+
+/(a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?>a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?:a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/^a.b/<lf>
+    a\rb
+    a\nb\<cr> 
+    ** Failers
+    a\nb
+    a\nb\<any>
+    a\rb\<cr>   
+    a\rb\<any>   
+
+/^abc./mgx<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+
+/abc.$/mgx<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+
+/^a\Rb/
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0cb
+    a\x85b   
+    ** Failers
+    a\n\rb    
+
+/^a\R*b/
+    ab
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0cb
+    a\x85b   
+    a\n\rb    
+    a\n\r\x85\x0cb 
+
+/^a\R+b/
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0cb
+    a\x85b   
+    a\n\rb    
+    a\n\r\x85\x0cb 
+    ** Failers
+    ab  
+    
+/^a\R{1,3}b/
+    a\nb
+    a\n\rb
+    a\n\r\x85b
+    a\r\n\r\nb 
+    a\r\n\r\n\r\nb 
+    a\n\r\n\rb
+    a\n\n\r\nb 
+    ** Failers
+    a\n\n\n\rb
+    a\r
+
+/^a[\R]b/
+    aRb
+    ** Failers
+    a\nb  
+
+/.+foo/
+    afoo
+    ** Failers 
+    \r\nfoo 
+    \nfoo 
+
+/.+foo/<crlf>
+    afoo
+    \nfoo 
+    ** Failers 
+    \r\nfoo 
+
+/.+foo/<any>
+    afoo
+    ** Failers 
+    \nfoo 
+    \r\nfoo 
+
+/.+foo/s
+    afoo
+    \r\nfoo 
+    \nfoo 
+
 / End of testinput7 /
--- a/ext/pcre/pcrelib/testdata/testinput8
+++ b/ext/pcre/pcrelib/testdata/testinput8
@@ -537,4 +537,57 @@
 /^\x{85}$/8i
    \x{85}

+/^abc./mgx8<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+
+/abc.$/mgx8<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
+
+/^a\Rb/8
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0cb
+    a\x{85}b   
+    a\x{2028}b 
+    a\x{2029}b 
+    ** Failers
+    a\n\rb    
+
+/^a\R*b/8
+    ab
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0c\x{2028}\x{2029}b
+    a\x{85}b   
+    a\n\rb    
+    a\n\r\x{85}\x0cb 
+
+/^a\R+b/8
+    a\nb
+    a\rb
+    a\r\nb
+    a\x0bb
+    a\x0c\x{2028}\x{2029}b
+    a\x{85}b   
+    a\n\rb    
+    a\n\r\x{85}\x0cb 
+    ** Failers
+    ab  
+
+/^a\R{1,3}b/8
+    a\nb
+    a\n\rb
+    a\n\r\x{85}b
+    a\r\n\r\nb 
+    a\r\n\r\n\r\nb 
+    a\n\r\n\rb
+    a\n\n\r\nb 
+    ** Failers
+    a\n\n\n\rb
+    a\r
+
 / End of testinput 8 / 
--- a/ext/pcre/pcrelib/testdata/testoutput1
+++ b/ext/pcre/pcrelib/testdata/testoutput1
@@ -1817,8 +1817,7 @@ No match
 No match

 /^a	b
-  
-    c/x
+      c/x
    abc
 0: abc

@@ -2094,14 +2093,6 @@ No match
    {4,5a}bc
 0: {4,5a}bc

-/^a.b/
-    a\rb
- 0: a\x0db
-    *** Failers
-No match
-    a\nb
-No match
-
 /abc$/
    abc
 0: abc
@@ -2198,9 +2189,9 @@ No match
 10: j
 11: k

-/ab\gdef/
-    abgdef
- 0: abgdef
+/ab\hdef/
+    abhdef
+ 0: abhdef

 /a{0}bc/
    bc
@@ -5481,12 +5472,22 @@ No match
 0: b

 /^(?:a?b?)*$/
+    \
+ 0: 
+    a
+ 0: a
+    ab
+ 0: ab
+    aaa   
+ 0: aaa
    *** Failers
 No match
    dbcb
 No match
    a--
 No match
+    aa-- 
+No match

 /((?s)^a(.))((?m)^b$)/
    a\nb\nc\n
@@ -6354,4 +6355,220 @@ No match
    [a,b,c]  
 0: [a,b,c]

+/(?-x: )/x
+    A\x20B
+ 0:  
+    
+"(?x)(?-x: \s*#\s*)"
+    A # B
+ 0:  # 
+    ** Failers
+No match
+    #  
+No match
+
+"(?x-is)(?:(?-ixs) \s*#\s*) include"
+    A #include
+ 0:  #include
+    ** Failers
+No match
+    A#include  
+No match
+    A #Include
+No match
+
+/a*b*\w/
+    aaabbbb
+ 0: aaabbbb
+    aaaa
+ 0: aaaa
+    a
+ 0: a
+
+/a*b?\w/
+    aaabbbb
+ 0: aaabb
+    aaaa
+ 0: aaaa
+    a
+ 0: a
+
+/a*b{0,4}\w/
+    aaabbbb
+ 0: aaabbbb
+    aaaa
+ 0: aaaa
+    a
+ 0: a
+
+/a*b{0,}\w/
+    aaabbbb
+ 0: aaabbbb
+    aaaa
+ 0: aaaa
+    a
+ 0: a
+    
+/a*\d*\w/
+    0a
+ 0: 0a
+    a 
+ 0: a
+    
+/a*b *\w/x
+    a 
+ 0: a
+
+/a*b#comment
+  *\w/x
+    a 
+ 0: a
+
+/a* b *\w/x
+    a 
+ 0: a
+
+/^\w+=.*(\\\n.*)*/
+    abc=xyz\\\npqr
+ 0: abc=xyz\
+
+/(?=(\w+))\1:/
+    abcd:
+ 0: abcd:
+ 1: abcd
+
+/^(?=(\w+))\1:/
+    abcd:
+ 0: abcd:
+ 1: abcd
+
+/^\Eabc/
+    abc
+ 0: abc
+    
+/^[\Eabc]/
+    a
+ 0: a
+    ** Failers 
+No match
+    E 
+No match
+    
+/^[a-\Ec]/
+    b
+ 0: b
+    ** Failers
+No match
+    -
+No match
+    E    
+No match
+
+/^[a\E\E-\Ec]/
+    b
+ 0: b
+    ** Failers
+No match
+    -
+No match
+    E    
+No match
+
+/^[\E\Qa\E-\Qz\E]+/
+    b
+ 0: b
+    ** Failers
+No match
+    -  
+No match
+    
+/^[a\Q]bc\E]/
+    a
+ 0: a
+    ]
+ 0: ]
+    c
+ 0: c
+    
+/^[a-\Q\E]/
+    a
+ 0: a
+    -     
+ 0: -
+
+/^(a()*)*/
+    aaaa
+ 0: aaaa
+ 1: a
+ 2: 
+
+/^(?:a(?:(?:))*)*/
+    aaaa
+ 0: aaaa
+
+/^(a()+)+/
+    aaaa
+ 0: aaaa
+ 1: a
+ 2: 
+
+/^(?:a(?:(?:))+)+/
+    aaaa
+ 0: aaaa
+
+/(a){0,3}(?(1)b|(c|))*D/
+    abbD
+ 0: abbD
+ 1: a
+    ccccD
+ 0: ccccD
+ 1: <unset>
+ 2: 
+    D  
+ 0: D
+ 1: <unset>
+ 2: 
+
+/(a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 1: 
+
+/(?>a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?:a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/\Z/g
+  abc\n
+ 0: 
+ 0: 
+  
+/^(?s)(?>.*)(?<!\n)/
+  abc
+ 0: abc
+  abc\n  
+No match
+
+/^(?![^\n]*\n\z)/
+  abc
+ 0: 
+  abc\n 
+No match
+  
+/\z(?<!\n)/
+  abc
+ 0: 
+  abc\n  
+No match
+
 / End of testinput1 /
--- a/ext/pcre/pcrelib/testdata/testoutput2
+++ b/ext/pcre/pcrelib/testdata/testoutput2
--- a/ext/pcre/pcrelib/testdata/testoutput3
+++ b/ext/pcre/pcrelib/testdata/testoutput3
@@ -149,9 +149,9 @@ No match
 /[[:alpha:]][[:lower:]][[:upper:]]/DLfr_FR 
 ------------------------------------------------------------------
  0 102 Bra 0
-  3     [A-Za-zªµºÀ-ÖØ-öø-ÿ]
- 36     [a-zµß-öø-ÿ]
- 69     [A-ZÀ-ÖØ-Þ]
+  3     [A-Za-z\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\xff]
+ 36     [a-z\xb5\xdf-\xf6\xf8-\xff]
+ 69     [A-Z\xc0-\xd6\xd8-\xde]
 102 102 Ket
 105     End
 ------------------------------------------------------------------
--- a/ext/pcre/pcrelib/testdata/testoutput4
+++ b/ext/pcre/pcrelib/testdata/testoutput4
@@ -914,4 +914,8 @@ No match
 0: ab
 1: ab

+/a*\x{100}*\w/8
+    a 
+ 0: a
+
 / End of testinput4 /
--- a/ext/pcre/pcrelib/testdata/testoutput5
+++ b/ext/pcre/pcrelib/testdata/testoutput5
@@ -103,7 +103,7 @@ First char = 195
 Need char = 191

 /[\x{100}]/8DM
-Memory allocation (code space): 47
+Memory allocation (code space): 15
 ------------------------------------------------------------------
  0  11 Bra 0
  3     [\x{100}]
@@ -429,11 +429,11 @@ No match

 /Ä€{3,4}/8SD
 ------------------------------------------------------------------
-  0  13 Bra 0
+  0  11 Bra 0
  3     \x{100}{3}
-  8     \x{100}{,1}
- 13  13 Ket
- 16     End
+  8     \x{100}?
+ 11  11 Ket
+ 14     End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
 Partial matching not supported
@@ -445,29 +445,10 @@ Study returned NULL
 0: \x{100}\x{100}\x{100}

 /(\x{100}+|x)/8SD
------------------------------------------------------------------
-  0  17 Bra 0
-  3   6 Bra 1
-  6     \x{100}+
-  9   5 Alt
- 12     x
- 14  11 Ket
- 17  17 Ket
- 20     End
------------------------------------------------------------------
-Capturing subpattern count = 1
-Partial matching not supported
-Options: utf8
-No first char
-No need char
-Starting byte set: x \xc4 
-
-/(\x{100}*a|x)/8SD
 ------------------------------------------------------------------
  0  19 Bra 0
  3   8 Bra 1
-  6     \x{100}*
-  9     a
+  8     \x{100}+
 11   5 Alt
 14     x
 16  13 Ket
@@ -479,13 +460,13 @@ Partial matching not supported
 Options: utf8
 No first char
 No need char
-Starting byte set: a x \xc4 
+Starting byte set: x \xc4 

-/(\x{100}{0,2}a|x)/8SD
+/(\x{100}*a|x)/8SD
 ------------------------------------------------------------------
  0  21 Bra 0
  3  10 Bra 1
-  6     \x{100}{,2}
+  8     \x{100}*+
 11     a
 13   5 Alt
 16     x
@@ -500,18 +481,37 @@ No first char
 No need char
 Starting byte set: a x \xc4 

+/(\x{100}{0,2}a|x)/8SD
+------------------------------------------------------------------
+  0  23 Bra 0
+  3  12 Bra 1
+  8     \x{100}{0,2}
+ 13     a
+ 15   5 Alt
+ 18     x
+ 20  17 Ket
+ 23  23 Ket
+ 26     End
+------------------------------------------------------------------
+Capturing subpattern count = 1
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+Starting byte set: a x \xc4 
+
 /(\x{100}{1,2}a|x)/8SD
 ------------------------------------------------------------------
-  0  24 Bra 0
-  3  13 Bra 1
-  6     \x{100}
-  9     \x{100}{,1}
- 14     a
- 16   5 Alt
- 19     x
- 21  18 Ket
- 24  24 Ket
- 27     End
+  0  26 Bra 0
+  3  15 Bra 1
+  8     \x{100}
+ 11     \x{100}{0,1}
+ 16     a
+ 18   5 Alt
+ 21     x
+ 23  20 Ket
+ 26  26 Ket
+ 29     End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Partial matching not supported
@@ -628,7 +628,7 @@ Need char = 129
 /\x{100}*A/8D
 ------------------------------------------------------------------
  0   8 Bra 0
-  3     \x{100}*
+  3     \x{100}*+
  6     A
  8   8 Ket
 11     End
@@ -644,7 +644,7 @@ Need char = 'A'
 /\x{100}*\d(?R)/8D
 ------------------------------------------------------------------
  0  16 Bra 0
-  3     \x{100}*
+  3     \x{100}*+
  6     \d
  7   6 Once
 10   0 Recurse
@@ -683,7 +683,7 @@ No first char
 No need char

 /[\x{100}]/8DM
-Memory allocation (code space): 47
+Memory allocation (code space): 15
 ------------------------------------------------------------------
  0  11 Bra 0
  3     [\x{100}]
@@ -912,16 +912,16 @@ No match

 /\x{100}abc(xyz(?1))/8D
 ------------------------------------------------------------------
-  0  33 Bra 0
+  0  35 Bra 0
  3     \x{100}abc
- 12  18 Bra 1
- 15     xyz
- 21   6 Once
- 24  12 Recurse
- 27   6 Ket
- 30  18 Ket
- 33  33 Ket
- 36     End
+ 12  20 Bra 1
+ 17     xyz
+ 23   6 Once
+ 26  12 Recurse
+ 29   6 Ket
+ 32  20 Ket
+ 35  35 Ket
+ 38     End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Options: utf8
@@ -930,17 +930,17 @@ Need char = 'z'

 /[^\x{100}]abc(xyz(?1))/8D
 ------------------------------------------------------------------
-  0  38 Bra 0
+  0  40 Bra 0
  3     [^\x{100}]
 11     abc
- 17  18 Bra 1
- 20     xyz
- 26   6 Once
- 29  17 Recurse
- 32   6 Ket
- 35  18 Ket
- 38  38 Ket
- 41     End
+ 17  20 Bra 1
+ 22     xyz
+ 28   6 Once
+ 31  17 Recurse
+ 34   6 Ket
+ 37  20 Ket
+ 40  40 Ket
+ 43     End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Options: utf8
@@ -949,17 +949,17 @@ Need char = 'z'

 /[ab\x{100}]abc(xyz(?1))/8D
 ------------------------------------------------------------------
-  0  70 Bra 0
+  0  72 Bra 0
  3     [ab\x{100}]
 43     abc
- 49  18 Bra 1
- 52     xyz
- 58   6 Once
- 61  49 Recurse
- 64   6 Ket
- 67  18 Ket
- 70  70 Ket
- 73     End
+ 49  20 Bra 1
+ 54     xyz
+ 60   6 Once
+ 63  49 Recurse
+ 66   6 Ket
+ 69  20 Ket
+ 72  72 Ket
+ 75     End
 ------------------------------------------------------------------
 Capturing subpattern count = 1
 Options: utf8
@@ -968,20 +968,20 @@ Need char = 'z'

 /(\x{100}(b(?2)c))?/D8
 ------------------------------------------------------------------
-  0  32 Bra 0
+  0  36 Bra 0
  3     Brazero
-  4  25 Bra 1
-  7     \x{100}
- 10  16 Bra 2
- 13     b
- 15   6 Once
- 18  10 Recurse
- 21   6 Ket
- 24     c
- 26  16 Ket
- 29  25 Ket
- 32  32 Ket
- 35     End
+  4  29 Bra 1
+  9     \x{100}
+ 12  18 Bra 2
+ 17     b
+ 19   6 Once
+ 22  12 Recurse
+ 25   6 Ket
+ 28     c
+ 30  18 Ket
+ 33  29 Ket
+ 36  36 Ket
+ 39     End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
 Options: utf8
@@ -990,33 +990,33 @@ No need char

 /(\x{100}(b(?2)c)){0,2}/D8
 ------------------------------------------------------------------
-  0  67 Bra 0
+  0  75 Bra 0
  3     Brazero
-  4  60 Bra 0
-  7  25 Bra 1
- 10     \x{100}
- 13  16 Bra 2
- 16     b
- 18   6 Once
- 21  13 Recurse
- 24   6 Ket
- 27     c
- 29  16 Ket
- 32  25 Ket
- 35     Brazero
- 36  25 Bra 1
- 39     \x{100}
- 42  16 Bra 2
- 45     b
- 47   6 Once
- 50  13 Recurse
- 53   6 Ket
- 56     c
- 58  16 Ket
- 61  25 Ket
- 64  60 Ket
- 67  67 Ket
- 70     End
+  4  68 Bra 0
+  7  29 Bra 1
+ 12     \x{100}
+ 15  18 Bra 2
+ 20     b
+ 22   6 Once
+ 25  15 Recurse
+ 28   6 Ket
+ 31     c
+ 33  18 Ket
+ 36  29 Ket
+ 39     Brazero
+ 40  29 Bra 1
+ 45     \x{100}
+ 48  18 Bra 2
+ 53     b
+ 55   6 Once
+ 58  15 Recurse
+ 61   6 Ket
+ 64     c
+ 66  18 Ket
+ 69  29 Ket
+ 72  68 Ket
+ 75  75 Ket
+ 78     End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
 Options: utf8
@@ -1025,20 +1025,20 @@ No need char

 /(\x{100}(b(?1)c))?/D8
 ------------------------------------------------------------------
-  0  32 Bra 0
+  0  36 Bra 0
  3     Brazero
-  4  25 Bra 1
-  7     \x{100}
- 10  16 Bra 2
- 13     b
- 15   6 Once
- 18   4 Recurse
- 21   6 Ket
- 24     c
- 26  16 Ket
- 29  25 Ket
- 32  32 Ket
- 35     End
+  4  29 Bra 1
+  9     \x{100}
+ 12  18 Bra 2
+ 17     b
+ 19   6 Once
+ 22   4 Recurse
+ 25   6 Ket
+ 28     c
+ 30  18 Ket
+ 33  29 Ket
+ 36  36 Ket
+ 39     End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
 Options: utf8
@@ -1047,33 +1047,33 @@ No need char

 /(\x{100}(b(?1)c)){0,2}/D8
 ------------------------------------------------------------------
-  0  67 Bra 0
+  0  75 Bra 0
  3     Brazero
-  4  60 Bra 0
-  7  25 Bra 1
- 10     \x{100}
- 13  16 Bra 2
- 16     b
- 18   6 Once
- 21   7 Recurse
- 24   6 Ket
- 27     c
- 29  16 Ket
- 32  25 Ket
- 35     Brazero
- 36  25 Bra 1
- 39     \x{100}
- 42  16 Bra 2
- 45     b
- 47   6 Once
- 50   7 Recurse
- 53   6 Ket
- 56     c
- 58  16 Ket
- 61  25 Ket
- 64  60 Ket
- 67  67 Ket
- 70     End
+  4  68 Bra 0
+  7  29 Bra 1
+ 12     \x{100}
+ 15  18 Bra 2
+ 20     b
+ 22   6 Once
+ 25   7 Recurse
+ 28   6 Ket
+ 31     c
+ 33  18 Ket
+ 36  29 Ket
+ 39     Brazero
+ 40  29 Bra 1
+ 45     \x{100}
+ 48  18 Bra 2
+ 53     b
+ 55   6 Once
+ 58   7 Recurse
+ 61   6 Ket
+ 64     c
+ 66  18 Ket
+ 69  29 Ket
+ 72  68 Ket
+ 75  75 Ket
+ 78     End
 ------------------------------------------------------------------
 Capturing subpattern count = 2
 Options: utf8
@@ -1119,5 +1119,285 @@ Need char = 191
 0: \x{1ff}
  \777 
 0: \x{1ff}
+  
+/\x{100}*\d/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*+
+  6     \d
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}*\s/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*+
+  6     \s
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}*\w/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*+
+  6     \w
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}*\D/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*
+  6     \D
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}*\S/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*
+  6     \S
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}*\W/8D
+------------------------------------------------------------------
+  0   7 Bra 0
+  3     \x{100}*
+  6     \W
+  7   7 Ket
+ 10     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+No first char
+No need char
+
+/\x{100}+\x{200}/8D
+------------------------------------------------------------------
+  0   9 Bra 0
+  3     \x{100}++
+  6     \x{200}
+  9   9 Ket
+ 12     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+First char = 196
+Need char = 128
+
+/\x{100}+X/8D
+------------------------------------------------------------------
+  0   8 Bra 0
+  3     \x{100}++
+  6     X
+  8   8 Ket
+ 11     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+First char = 196
+Need char = 'X'
+
+/X+\x{200}/8D
+------------------------------------------------------------------
+  0   8 Bra 0
+  3     X++
+  5     \x{200}
+  8   8 Ket
+ 11     End
+------------------------------------------------------------------
+Capturing subpattern count = 0
+Partial matching not supported
+Options: utf8
+First char = 'X'
+Need char = 128
+
+/()()()()()()()()()()
+ ()()()()()()()()()()
+ ()()()()()()()()()()
+ ()()()()()()()()()()
+ A (x) (?41) B/8x
+    AxxB     
+Matched, but too many substrings
+ 0: AxxB
+ 1: 
+ 2: 
+ 3: 
+ 4: 
+ 5: 
+ 6: 
+ 7: 
+ 8: 
+ 9: 
+10: 
+11: 
+12: 
+13: 
+14: 
+
+/^[\x{100}\E-\Q\E\x{150}]/B8
+------------------------------------------------------------------
+  0  14 Bra 0
+  3     ^
+  4     [\x{100}-\x{150}]
+ 14  14 Ket
+ 17     End
+------------------------------------------------------------------
+
+/^[\QÄ€\E-\QÅ<51>\E]/B8
+------------------------------------------------------------------
+  0  14 Bra 0
+  3     ^
+  4     [\x{100}-\x{150}]
+ 14  14 Ket
+ 17     End
+------------------------------------------------------------------
+
+/^[\QÄ€\E-\QÅ<51>\E/B8
+Failed: missing terminating ] for character class at offset 15
+
+/^abc./mgx8<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/abc.$/mgx8<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/^a\Rb/8
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0cb
+ 0: a\x{0c}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\x{2028}b 
+ 0: a\x{2028}b
+    a\x{2029}b 
+ 0: a\x{2029}b
+    ** Failers
+No match
+    a\n\rb    
+No match
+
+/^a\R*b/8
+    ab
+ 0: ab
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\n\rb    
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}\x0cb 
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+
+/^a\R+b/8
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\n\rb    
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}\x0cb 
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+    ** Failers
+No match
+    ab  
+No match
+
+/^a\R{1,3}b/8
+    a\nb
+ 0: a\x{0a}b
+    a\n\rb
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}b
+ 0: a\x{0a}\x{0d}\x{85}b
+    a\r\n\r\nb 
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}b
+    a\r\n\r\n\r\nb 
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b
+    a\n\r\n\rb
+ 0: a\x{0a}\x{0d}\x{0a}\x{0d}b
+    a\n\n\r\nb 
+ 0: a\x{0a}\x{0a}\x{0d}\x{0a}b
+    ** Failers
+No match
+    a\n\n\n\rb
+No match
+    a\r
+No match

 / End of testinput5 /
--- a/ext/pcre/pcrelib/testdata/testoutput6
+++ b/ext/pcre/pcrelib/testdata/testoutput6
@@ -609,7 +609,7 @@ No first char
 No need char

 /[\p{Nd}]/8DM
-Memory allocation (code space): 47
+Memory allocation (code space): 15
 ------------------------------------------------------------------
  0  11 Bra 0
  3     [\p{Nd}]
@@ -1410,4 +1410,26 @@ No match
 1: L=abc
 2: abc

+/The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
+will match it only with UCP support, because without that it has no notion
+of case for anything other than the ASCII letters. / 
+
+/((?i)[\x{c0}])/8
+    \x{c0}
+ 0: \x{c0}
+ 1: \x{c0}
+    \x{e0} 
+ 0: \x{e0}
+ 1: \x{e0}
+
+/(?i:[\x{c0}])/8
+    \x{c0}
+ 0: \x{c0}
+    \x{e0} 
+ 0: \x{e0}
+    
+/^\p{Balinese}\p{Cuneiform}\p{Nko}\p{Phags_Pa}\p{Phoenician}/8
+    \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
+ 0: \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
+
 / End of testinput6 /
--- a/ext/pcre/pcrelib/testdata/testoutput7
+++ b/ext/pcre/pcrelib/testdata/testoutput7
@@ -2735,8 +2735,7 @@ No match
 No match

 /^a	b
-  
-    c/x
+      c/x
    abc
 0: abc

@@ -2974,7 +2973,7 @@ No match
    {4,5a}bc
 0: {4,5a}bc

-/^a.b/
+/^a.b/<lf>
    a\rb
 0: a\x0db
    *** Failers
@@ -3040,9 +3039,9 @@ No match
    abcdefghijk\12S
 0: abcdefghijk\x0aS

-/ab\gdef/
-    abgdef
- 0: abgdef
+/ab\hdef/
+    abhdef
+ 0: abhdef

 /a{0}bc/
    bc
@@ -6601,7 +6600,7 @@ No match
    xyz\rabc\<lf>
 No match
    
-/abc$/m
+/abc$/m<lf>
    xyzabc
 0: abc
    xyzabc\n 
@@ -6657,7 +6656,7 @@ No match
    xyz\rabcdef
 No match
    
-/.*/
+/.*/<lf>
    abc\ndef
 0: abc
 1: ab
@@ -6729,4 +6728,228 @@ No match
    abc\r\ndef
 0: abc\x0d\x0adef

+/^\w+=.*(\\\n.*)*/
+    abc=xyz\\\npqr
+ 0: abc=xyz\\x0apqr
+ 1: abc=xyz\\x0apq
+ 2: abc=xyz\\x0ap
+ 3: abc=xyz\\x0a
+ 4: abc=xyz\
+ 5: abc=xyz
+ 6: abc=xy
+ 7: abc=x
+ 8: abc=
+
+/^(a()*)*/
+    aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+ 4: 
+
+/^(?:a(?:(?:))*)*/
+    aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+ 4: 
+
+/^(a()+)+/
+    aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+
+/^(?:a(?:(?:))+)+/
+    aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+
+/(a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?>a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/(?:a|)*\d/
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+No match
+  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa4
+
+/^a.b/<lf>
+    a\rb
+ 0: a\x0db
+    a\nb\<cr> 
+ 0: a\x0ab
+    ** Failers
+No match
+    a\nb
+No match
+    a\nb\<any>
+No match
+    a\rb\<cr>   
+No match
+    a\rb\<any>   
+No match
+
+/^abc./mgx<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x85abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+
+/abc.$/mgx<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x85 abc7\x{2028} abc8\x{2029} abc9
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc9
+
+/^a\Rb/
+    a\nb
+ 0: a\x0ab
+    a\rb
+ 0: a\x0db
+    a\r\nb
+ 0: a\x0d\x0ab
+    a\x0bb
+ 0: a\x0bb
+    a\x0cb
+ 0: a\x0cb
+    a\x85b   
+ 0: a\x85b
+    ** Failers
+No match
+    a\n\rb    
+No match
+
+/^a\R*b/
+    ab
+ 0: ab
+    a\nb
+ 0: a\x0ab
+    a\rb
+ 0: a\x0db
+    a\r\nb
+ 0: a\x0d\x0ab
+    a\x0bb
+ 0: a\x0bb
+    a\x0cb
+ 0: a\x0cb
+    a\x85b   
+ 0: a\x85b
+    a\n\rb    
+ 0: a\x0a\x0db
+    a\n\r\x85\x0cb 
+ 0: a\x0a\x0d\x85\x0cb
+
+/^a\R+b/
+    a\nb
+ 0: a\x0ab
+    a\rb
+ 0: a\x0db
+    a\r\nb
+ 0: a\x0d\x0ab
+    a\x0bb
+ 0: a\x0bb
+    a\x0cb
+ 0: a\x0cb
+    a\x85b   
+ 0: a\x85b
+    a\n\rb    
+ 0: a\x0a\x0db
+    a\n\r\x85\x0cb 
+ 0: a\x0a\x0d\x85\x0cb
+    ** Failers
+No match
+    ab  
+No match
+    
+/^a\R{1,3}b/
+    a\nb
+ 0: a\x0ab
+    a\n\rb
+ 0: a\x0a\x0db
+    a\n\r\x85b
+ 0: a\x0a\x0d\x85b
+    a\r\n\r\nb 
+ 0: a\x0d\x0a\x0d\x0ab
+    a\r\n\r\n\r\nb 
+ 0: a\x0d\x0a\x0d\x0a\x0d\x0ab
+    a\n\r\n\rb
+ 0: a\x0a\x0d\x0a\x0db
+    a\n\n\r\nb 
+ 0: a\x0a\x0a\x0d\x0ab
+    ** Failers
+No match
+    a\n\n\n\rb
+No match
+    a\r
+No match
+
+/^a[\R]b/
+    aRb
+ 0: aRb
+    ** Failers
+No match
+    a\nb  
+No match
+
+/.+foo/
+    afoo
+ 0: afoo
+    ** Failers 
+No match
+    \r\nfoo 
+No match
+    \nfoo 
+No match
+
+/.+foo/<crlf>
+    afoo
+ 0: afoo
+    \nfoo 
+ 0: \x0afoo
+    ** Failers 
+No match
+    \r\nfoo 
+No match
+
+/.+foo/<any>
+    afoo
+ 0: afoo
+    ** Failers 
+No match
+    \nfoo 
+No match
+    \r\nfoo 
+No match
+
+/.+foo/s
+    afoo
+ 0: afoo
+    \r\nfoo 
+ 0: \x0d\x0afoo
+    \nfoo 
+ 0: \x0afoo
+
 / End of testinput7 /
--- a/ext/pcre/pcrelib/testdata/testoutput8
+++ b/ext/pcre/pcrelib/testdata/testoutput8
@@ -1028,4 +1028,114 @@ No match
    \x{85}
 0: \x{85}

+/^abc./mgx8<any>
+    abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/abc.$/mgx8<any>
+    abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
+ 0: abc1
+ 0: abc2
+ 0: abc3
+ 0: abc4
+ 0: abc5
+ 0: abc6
+ 0: abc7
+ 0: abc8
+ 0: abc9
+
+/^a\Rb/8
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0cb
+ 0: a\x{0c}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\x{2028}b 
+ 0: a\x{2028}b
+    a\x{2029}b 
+ 0: a\x{2029}b
+    ** Failers
+No match
+    a\n\rb    
+No match
+
+/^a\R*b/8
+    ab
+ 0: ab
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\n\rb    
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}\x0cb 
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+
+/^a\R+b/8
+    a\nb
+ 0: a\x{0a}b
+    a\rb
+ 0: a\x{0d}b
+    a\r\nb
+ 0: a\x{0d}\x{0a}b
+    a\x0bb
+ 0: a\x{0b}b
+    a\x0c\x{2028}\x{2029}b
+ 0: a\x{0c}\x{2028}\x{2029}b
+    a\x{85}b   
+ 0: a\x{85}b
+    a\n\rb    
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}\x0cb 
+ 0: a\x{0a}\x{0d}\x{85}\x{0c}b
+    ** Failers
+No match
+    ab  
+No match
+
+/^a\R{1,3}b/8
+    a\nb
+ 0: a\x{0a}b
+    a\n\rb
+ 0: a\x{0a}\x{0d}b
+    a\n\r\x{85}b
+ 0: a\x{0a}\x{0d}\x{85}b
+    a\r\n\r\nb 
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}b
+    a\r\n\r\n\r\nb 
+ 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b
+    a\n\r\n\rb
+ 0: a\x{0a}\x{0d}\x{0a}\x{0d}b
+    a\n\n\r\nb 
+ 0: a\x{0a}\x{0a}\x{0d}\x{0a}b
+    ** Failers
+No match
+    a\n\n\n\rb
+No match
+    a\r
+No match
+
 / End of testinput 8 / 
--- a/ext/pcre/pcrelib/ucp.h
+++ b/ext/pcre/pcrelib/ucp.h
@@ -6,7 +6,9 @@
 #define _UCP_H

 /* This file contains definitions of the property values that are returned by
-the function _pcre_ucp_findprop(). */
+the function _pcre_ucp_findprop(). New values that are added for new releases
+of Unicode should always be at the end of each enum, for backwards
+compatibility. */

 /* These are the general character categories. */

@@ -118,7 +120,12 @@ enum {
  ucp_Tibetan,
  ucp_Tifinagh,
  ucp_Ugaritic,
-  ucp_Yi
+  ucp_Yi,
+  ucp_Balinese,      /* New for Unicode 5.0.0 */
+  ucp_Cuneiform,     /* New for Unicode 5.0.0 */
+  ucp_Nko,           /* New for Unicode 5.0.0 */
+  ucp_Phags_Pa,      /* New for Unicode 5.0.0 */
+  ucp_Phoenician     /* New for Unicode 5.0.0 */
 };

 #endif
--- a/ext/pcre/pcrelib/ucpinternal.h
+++ b/ext/pcre/pcrelib/ucpinternal.h
@@ -2,6 +2,9 @@
 *           Unicode Property Table handler       *
 *************************************************/

+#ifndef _UCPINTERNAL_H
+#define _UCPINTERNAL_H
+
 /* Internal header file defining the layout of the bits in each pair of 32-bit
 words that form a data item in the table. */

@@ -84,4 +87,6 @@ When searching the data, proceed as follows:
    (2).
 */

+#endif /* _UCPINTERNAL_H */
+
 /* End of ucpinternal.h */
--- a/ext/pcre/pcrelib/ucptable.c
+++ b/ext/pcre/pcrelib/ucptable.c
@@ -1,5 +1,6 @@
 /* This source module is automatically generated from the Unicode
-property table. See ucpinternal.h for a description of the layout. */
+property table. See ucpinternal.h for a description of the layout.
+This version was made from the Unicode 5.0.0 tables. */

 static cnode ucp_table[] = {
  { 0x09800000, 0x0000001f },
@@ -298,7 +299,7 @@ static cnode ucp_table[] = {
  { 0x2100017d, 0x24000001 },
  { 0x2100017e, 0x1400ffff },
  { 0x2100017f, 0x1400fed4 },
-  { 0x21000180, 0x14000000 },
+  { 0x21000180, 0x140000c3 },
  { 0x21000181, 0x240000d2 },
  { 0x21000182, 0x24000001 },
  { 0x21000183, 0x1400ffff },
@@ -475,13 +476,27 @@ static cnode ucp_table[] = {
  { 0x21000232, 0x24000001 },
  { 0x21000233, 0x1400ffff },
  { 0x21800234, 0x14000005 },
-  { 0x2100023a, 0x24000000 },
+  { 0x2100023a, 0x24002a2b },
  { 0x2100023b, 0x24000001 },
  { 0x2100023c, 0x1400ffff },
  { 0x2100023d, 0x2400ff5d },
-  { 0x2100023e, 0x24000000 },
+  { 0x2100023e, 0x24002a28 },
  { 0x2180023f, 0x14000001 },
-  { 0x21000241, 0x24000053 },
+  { 0x21000241, 0x24000001 },
+  { 0x21000242, 0x1400ffff },
+  { 0x21000243, 0x2400ff3d },
+  { 0x21000244, 0x24000045 },
+  { 0x21000245, 0x24000047 },
+  { 0x21000246, 0x24000001 },
+  { 0x21000247, 0x1400ffff },
+  { 0x21000248, 0x24000001 },
+  { 0x21000249, 0x1400ffff },
+  { 0x2100024a, 0x24000001 },
+  { 0x2100024b, 0x1400ffff },
+  { 0x2100024c, 0x24000001 },
+  { 0x2100024d, 0x1400ffff },
+  { 0x2100024e, 0x24000001 },
+  { 0x2100024f, 0x1400ffff },
  { 0x21800250, 0x14000002 },
  { 0x21000253, 0x1400ff2e },
  { 0x21000254, 0x1400ff32 },
@@ -499,25 +514,30 @@ static cnode ucp_table[] = {
  { 0x21800264, 0x14000003 },
  { 0x21000268, 0x1400ff2f },
  { 0x21000269, 0x1400ff2d },
-  { 0x2180026a, 0x14000004 },
+  { 0x2100026a, 0x14000000 },
+  { 0x2100026b, 0x140029f7 },
+  { 0x2180026c, 0x14000002 },
  { 0x2100026f, 0x1400ff2d },
  { 0x21800270, 0x14000001 },
  { 0x21000272, 0x1400ff2b },
  { 0x21800273, 0x14000001 },
  { 0x21000275, 0x1400ff2a },
-  { 0x21800276, 0x14000009 },
+  { 0x21800276, 0x14000006 },
+  { 0x2100027d, 0x140029e7 },
+  { 0x2180027e, 0x14000001 },
  { 0x21000280, 0x1400ff26 },
  { 0x21800281, 0x14000001 },
  { 0x21000283, 0x1400ff26 },
  { 0x21800284, 0x14000003 },
  { 0x21000288, 0x1400ff26 },
-  { 0x21000289, 0x14000000 },
+  { 0x21000289, 0x1400ffbb },
  { 0x2100028a, 0x1400ff27 },
  { 0x2100028b, 0x1400ff27 },
-  { 0x2180028c, 0x14000005 },
+  { 0x2100028c, 0x1400ffb9 },
+  { 0x2180028d, 0x14000004 },
  { 0x21000292, 0x1400ff25 },
  { 0x21000293, 0x14000000 },
-  { 0x21000294, 0x1400ffad },
+  { 0x21000294, 0x1c000000 },
  { 0x21800295, 0x1400001a },
  { 0x218002b0, 0x18000011 },
  { 0x098002c2, 0x60000003 },
@@ -532,6 +552,9 @@ static cnode ucp_table[] = {
  { 0x1b800346, 0x30000029 },
  { 0x13800374, 0x60000001 },
  { 0x1300037a, 0x18000000 },
+  { 0x1300037b, 0x14000082 },
+  { 0x1300037c, 0x14000082 },
+  { 0x1300037d, 0x14000082 },
  { 0x0900037e, 0x54000000 },
  { 0x13800384, 0x60000001 },
  { 0x13000386, 0x24000026 },
@@ -647,7 +670,9 @@ static cnode ucp_table[] = {
  { 0x130003fa, 0x24000001 },
  { 0x130003fb, 0x1400ffff },
  { 0x130003fc, 0x14000000 },
-  { 0x138003fd, 0x24000002 },
+  { 0x130003fd, 0x2400ff7e },
+  { 0x130003fe, 0x2400ff7e },
+  { 0x130003ff, 0x2400ff7e },
  { 0x0c000400, 0x24000050 },
  { 0x0c000401, 0x24000050 },
  { 0x0c000402, 0x24000050 },
@@ -835,7 +860,7 @@ static cnode ucp_table[] = {
  { 0x0c0004bd, 0x1400ffff },
  { 0x0c0004be, 0x24000001 },
  { 0x0c0004bf, 0x1400ffff },
-  { 0x0c0004c0, 0x24000000 },
+  { 0x0c0004c0, 0x2400000f },
  { 0x0c0004c1, 0x24000001 },
  { 0x0c0004c2, 0x1400ffff },
  { 0x0c0004c3, 0x24000001 },
@@ -850,6 +875,7 @@ static cnode ucp_table[] = {
  { 0x0c0004cc, 0x1400ffff },
  { 0x0c0004cd, 0x24000001 },
  { 0x0c0004ce, 0x1400ffff },
+  { 0x0c0004cf, 0x1400fff1 },
  { 0x0c0004d0, 0x24000001 },
  { 0x0c0004d1, 0x1400ffff },
  { 0x0c0004d2, 0x24000001 },
@@ -892,6 +918,12 @@ static cnode ucp_table[] = {
  { 0x0c0004f7, 0x1400ffff },
  { 0x0c0004f8, 0x24000001 },
  { 0x0c0004f9, 0x1400ffff },
+  { 0x0c0004fa, 0x24000001 },
+  { 0x0c0004fb, 0x1400ffff },
+  { 0x0c0004fc, 0x24000001 },
+  { 0x0c0004fd, 0x1400ffff },
+  { 0x0c0004fe, 0x24000001 },
+  { 0x0c0004ff, 0x1400ffff },
  { 0x0c000500, 0x24000001 },
  { 0x0c000501, 0x1400ffff },
  { 0x0c000502, 0x24000001 },
@@ -908,6 +940,10 @@ static cnode ucp_table[] = {
  { 0x0c00050d, 0x1400ffff },
  { 0x0c00050e, 0x24000001 },
  { 0x0c00050f, 0x1400ffff },
+  { 0x0c000510, 0x24000001 },
+  { 0x0c000511, 0x1400ffff },
+  { 0x0c000512, 0x24000001 },
+  { 0x0c000513, 0x1400ffff },
  { 0x01000531, 0x24000030 },
  { 0x01000532, 0x24000030 },
  { 0x01000533, 0x24000030 },
@@ -989,8 +1025,7 @@ static cnode ucp_table[] = {
  { 0x01000587, 0x14000000 },
  { 0x09000589, 0x54000000 },
  { 0x0100058a, 0x44000000 },
-  { 0x19800591, 0x30000028 },
-  { 0x198005bb, 0x30000002 },
+  { 0x19800591, 0x3000002c },
  { 0x190005be, 0x54000000 },
  { 0x190005bf, 0x30000000 },
  { 0x190005c0, 0x54000000 },
@@ -1043,6 +1078,13 @@ static cnode ucp_table[] = {
  { 0x37800780, 0x1c000025 },
  { 0x378007a6, 0x3000000a },
  { 0x370007b1, 0x1c000000 },
+  { 0x3f8007c0, 0x34000009 },
+  { 0x3f8007ca, 0x1c000020 },
+  { 0x3f8007eb, 0x30000008 },
+  { 0x3f8007f4, 0x18000001 },
+  { 0x3f0007f6, 0x68000000 },
+  { 0x3f8007f7, 0x54000002 },
+  { 0x3f0007fa, 0x18000000 },
  { 0x0e800901, 0x30000001 },
  { 0x0e000903, 0x28000000 },
  { 0x0e800904, 0x1c000035 },
@@ -1059,7 +1101,7 @@ static cnode ucp_table[] = {
  { 0x09800964, 0x54000001 },
  { 0x0e800966, 0x34000009 },
  { 0x09000970, 0x54000000 },
-  { 0x0e00097d, 0x1c000000 },
+  { 0x0e80097b, 0x1c000004 },
  { 0x02000981, 0x30000000 },
  { 0x02800982, 0x28000001 },
  { 0x02800985, 0x1c000007 },
@@ -1203,7 +1245,9 @@ static cnode ucp_table[] = {
  { 0x1c800cd5, 0x28000001 },
  { 0x1c000cde, 0x1c000000 },
  { 0x1c800ce0, 0x1c000001 },
+  { 0x1c800ce2, 0x30000001 },
  { 0x1c800ce6, 0x34000009 },
+  { 0x1c800cf1, 0x68000001 },
  { 0x24800d02, 0x28000001 },
  { 0x24800d05, 0x1c000007 },
  { 0x24800d0e, 0x1c000002 },
@@ -1452,13 +1496,33 @@ static cnode ucp_table[] = {
  { 0x05801a17, 0x30000001 },
  { 0x05801a19, 0x28000002 },
  { 0x05801a1e, 0x54000001 },
+  { 0x3d801b00, 0x30000003 },
+  { 0x3d001b04, 0x28000000 },
+  { 0x3d801b05, 0x1c00002e },
+  { 0x3d001b34, 0x30000000 },
+  { 0x3d001b35, 0x28000000 },
+  { 0x3d801b36, 0x30000004 },
+  { 0x3d001b3b, 0x28000000 },
+  { 0x3d001b3c, 0x30000000 },
+  { 0x3d801b3d, 0x28000004 },
+  { 0x3d001b42, 0x30000000 },
+  { 0x3d801b43, 0x28000001 },
+  { 0x3d801b45, 0x1c000006 },
+  { 0x3d801b50, 0x34000009 },
+  { 0x3d801b5a, 0x54000006 },
+  { 0x3d801b61, 0x68000009 },
+  { 0x3d801b6b, 0x30000008 },
+  { 0x3d801b74, 0x68000008 },
  { 0x21801d00, 0x1400002b },
  { 0x21801d2c, 0x18000035 },
  { 0x21801d62, 0x14000015 },
  { 0x0c001d78, 0x18000000 },
-  { 0x21801d79, 0x14000021 },
+  { 0x21801d79, 0x14000003 },
+  { 0x21001d7d, 0x14000ee6 },
+  { 0x21801d7e, 0x1400001c },
  { 0x21801d9b, 0x18000024 },
-  { 0x1b801dc0, 0x30000003 },
+  { 0x1b801dc0, 0x3000000a },
+  { 0x1b801dfe, 0x30000001 },
  { 0x21001e00, 0x24000001 },
  { 0x21001e01, 0x1400ffff },
  { 0x21001e02, 0x24000001 },
@@ -1967,7 +2031,7 @@ static cnode ucp_table[] = {
  { 0x1b8020dd, 0x2c000003 },
  { 0x1b0020e1, 0x30000000 },
  { 0x1b8020e2, 0x2c000002 },
-  { 0x1b8020e5, 0x30000006 },
+  { 0x1b8020e5, 0x3000000a },
  { 0x09802100, 0x68000001 },
  { 0x09002102, 0x24000000 },
  { 0x09802103, 0x68000003 },
@@ -1995,7 +2059,7 @@ static cnode ucp_table[] = {
  { 0x0900212e, 0x68000000 },
  { 0x0900212f, 0x14000000 },
  { 0x09802130, 0x24000001 },
-  { 0x09002132, 0x68000000 },
+  { 0x21002132, 0x2400001c },
  { 0x09002133, 0x24000000 },
  { 0x09002134, 0x14000000 },
  { 0x09802135, 0x1c000003 },
@@ -2008,7 +2072,8 @@ static cnode ucp_table[] = {
  { 0x09802146, 0x14000003 },
  { 0x0900214a, 0x68000000 },
  { 0x0900214b, 0x64000000 },
-  { 0x0900214c, 0x68000000 },
+  { 0x0980214c, 0x68000001 },
+  { 0x2100214e, 0x1400ffe4 },
  { 0x09802153, 0x3c00000c },
  { 0x09002160, 0x38000010 },
  { 0x09002161, 0x38000010 },
@@ -2042,7 +2107,9 @@ static cnode ucp_table[] = {
  { 0x0900217d, 0x3800fff0 },
  { 0x0900217e, 0x3800fff0 },
  { 0x0900217f, 0x3800fff0 },
-  { 0x09802180, 0x38000003 },
+  { 0x09802180, 0x38000002 },
+  { 0x09002183, 0x24000001 },
+  { 0x21002184, 0x1400ffff },
  { 0x09802190, 0x64000004 },
  { 0x09802195, 0x68000004 },
  { 0x0980219a, 0x64000001 },
@@ -2073,10 +2140,9 @@ static cnode ucp_table[] = {
  { 0x0900237c, 0x64000000 },
  { 0x0980237d, 0x6800001d },
  { 0x0980239b, 0x64000018 },
-  { 0x090023b4, 0x58000000 },
-  { 0x090023b5, 0x48000000 },
-  { 0x090023b6, 0x54000000 },
-  { 0x098023b7, 0x68000024 },
+  { 0x098023b4, 0x68000027 },
+  { 0x098023dc, 0x64000005 },
+  { 0x098023e2, 0x68000005 },
  { 0x09802400, 0x68000026 },
  { 0x09802440, 0x6800000a },
  { 0x09802460, 0x3c00003b },
@@ -2143,7 +2209,7 @@ static cnode ucp_table[] = {
  { 0x09802600, 0x6800006e },
  { 0x0900266f, 0x64000000 },
  { 0x09802670, 0x6800002c },
-  { 0x098026a0, 0x68000011 },
+  { 0x098026a0, 0x68000012 },
  { 0x09802701, 0x68000003 },
  { 0x09802706, 0x68000003 },
  { 0x0980270c, 0x6800001b },
@@ -2174,6 +2240,7 @@ static cnode ucp_table[] = {
  { 0x098027c0, 0x64000004 },
  { 0x090027c5, 0x58000000 },
  { 0x090027c6, 0x48000000 },
+  { 0x098027c7, 0x64000003 },
  { 0x098027d0, 0x64000015 },
  { 0x090027e6, 0x58000000 },
  { 0x090027e7, 0x48000000 },
@@ -2215,7 +2282,8 @@ static cnode ucp_table[] = {
  { 0x090029fc, 0x58000000 },
  { 0x090029fd, 0x48000000 },
  { 0x098029fe, 0x64000101 },
-  { 0x09802b00, 0x68000013 },
+  { 0x09802b00, 0x6800001a },
+  { 0x09802b20, 0x68000003 },
  { 0x11002c00, 0x24000030 },
  { 0x11002c01, 0x24000030 },
  { 0x11002c02, 0x24000030 },
@@ -2310,6 +2378,23 @@ static cnode ucp_table[] = {
  { 0x11002c5c, 0x1400ffd0 },
  { 0x11002c5d, 0x1400ffd0 },
  { 0x11002c5e, 0x1400ffd0 },
+  { 0x21002c60, 0x24000001 },
+  { 0x21002c61, 0x1400ffff },
+  { 0x21002c62, 0x2400d609 },
+  { 0x21002c63, 0x2400f11a },
+  { 0x21002c64, 0x2400d619 },
+  { 0x21002c65, 0x1400d5d5 },
+  { 0x21002c66, 0x1400d5d8 },
+  { 0x21002c67, 0x24000001 },
+  { 0x21002c68, 0x1400ffff },
+  { 0x21002c69, 0x24000001 },
+  { 0x21002c6a, 0x1400ffff },
+  { 0x21002c6b, 0x24000001 },
+  { 0x21002c6c, 0x1400ffff },
+  { 0x21002c74, 0x14000000 },
+  { 0x21002c75, 0x24000001 },
+  { 0x21002c76, 0x1400ffff },
+  { 0x21002c77, 0x14000000 },
  { 0x0a002c80, 0x24000001 },
  { 0x0a002c81, 0x1400ffff },
  { 0x0a002c82, 0x24000001 },
@@ -2559,6 +2644,8 @@ static cnode ucp_table[] = {
  { 0x3c80a016, 0x1c000476 },
  { 0x3c80a490, 0x68000036 },
  { 0x0980a700, 0x60000016 },
+  { 0x0980a717, 0x18000003 },
+  { 0x0980a720, 0x60000001 },
  { 0x3080a800, 0x1c000001 },
  { 0x3000a802, 0x28000000 },
  { 0x3080a803, 0x1c000002 },
@@ -2570,6 +2657,8 @@ static cnode ucp_table[] = {
  { 0x3080a825, 0x30000001 },
  { 0x3000a827, 0x28000000 },
  { 0x3080a828, 0x68000003 },
+  { 0x4080a840, 0x1c000033 },
+  { 0x4080a874, 0x54000003 },
  { 0x1780ac00, 0x1c002ba3 },
  { 0x0980d800, 0x1000037f },
  { 0x0980db80, 0x1000007f },
@@ -2765,13 +2854,15 @@ static cnode ucp_table[] = {
  { 0x1301018a, 0x3c000000 },
  { 0x29810300, 0x1c00001e },
  { 0x29810320, 0x3c000003 },
-  { 0x12810330, 0x1c000019 },
+  { 0x12810330, 0x1c000010 },
+  { 0x12010341, 0x38000000 },
+  { 0x12810342, 0x1c000007 },
  { 0x1201034a, 0x38000000 },
  { 0x3b810380, 0x1c00001d },
  { 0x3b01039f, 0x54000000 },
  { 0x2a8103a0, 0x1c000023 },
  { 0x2a8103c8, 0x1c000007 },
-  { 0x2a0103d0, 0x68000000 },
+  { 0x2a0103d0, 0x54000000 },
  { 0x2a8103d1, 0x38000004 },
  { 0x0d010400, 0x24000028 },
  { 0x0d010401, 0x24000028 },
@@ -2861,6 +2952,9 @@ static cnode ucp_table[] = {
  { 0x0b810837, 0x1c000001 },
  { 0x0b01083c, 0x1c000000 },
  { 0x0b01083f, 0x1c000000 },
+  { 0x41810900, 0x1c000015 },
+  { 0x41810916, 0x3c000003 },
+  { 0x4101091f, 0x54000000 },
  { 0x1e010a00, 0x1c000000 },
  { 0x1e810a01, 0x30000002 },
  { 0x1e810a05, 0x30000001 },
@@ -2872,6 +2966,9 @@ static cnode ucp_table[] = {
  { 0x1e010a3f, 0x30000000 },
  { 0x1e810a40, 0x3c000007 },
  { 0x1e810a50, 0x54000008 },
+  { 0x3e812000, 0x1c00036e },
+  { 0x3e812400, 0x38000062 },
+  { 0x3e812470, 0x54000003 },
  { 0x0981d000, 0x680000f5 },
  { 0x0981d100, 0x68000026 },
  { 0x0981d12a, 0x6800003a },
@@ -2890,6 +2987,7 @@ static cnode ucp_table[] = {
  { 0x1381d242, 0x30000002 },
  { 0x1301d245, 0x68000000 },
  { 0x0981d300, 0x68000056 },
+  { 0x0981d360, 0x3c000011 },
  { 0x0981d400, 0x24000019 },
  { 0x0981d41a, 0x14000019 },
  { 0x0981d434, 0x24000019 },
@@ -2957,6 +3055,8 @@ static cnode ucp_table[] = {
  { 0x0981d7aa, 0x14000018 },
  { 0x0901d7c3, 0x64000000 },
  { 0x0981d7c4, 0x14000005 },
+  { 0x0901d7ca, 0x24000000 },
+  { 0x0901d7cb, 0x14000000 },
  { 0x0981d7ce, 0x34000031 },
  { 0x16820000, 0x1c00a6d6 },
  { 0x1682f800, 0x1c00021d },