mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
Patch core for PCRE2 support
RFC https://wiki.php.net/rfc/pcre2-migration
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -225,3 +225,4 @@ ext/sqlite3/tests/phpsql*
|
||||
!ext/fileinfo/libmagic.patch
|
||||
!ext/mbstring/oniguruma.patch
|
||||
!scripts/dev/generate-phpt/tests/*.php
|
||||
!ext/pcre/pcre2lib/config.h
|
||||
|
||||
@@ -471,11 +471,11 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep)
|
||||
pcre_cache_entry *pce;
|
||||
zend_string *res;
|
||||
zend_string *repl;
|
||||
int rep_cnt = 0;
|
||||
size_t rep_cnt = 0;
|
||||
|
||||
(void)setlocale(LC_CTYPE, "C");
|
||||
|
||||
opts |= PCRE_MULTILINE;
|
||||
opts |= PCRE2_MULTILINE;
|
||||
convert_libmagic_pattern(&patt, (char*)pat, strlen(pat), opts);
|
||||
if ((pce = pcre_get_compiled_regex_cache(Z_STR(patt))) == NULL) {
|
||||
zval_ptr_dtor(&patt);
|
||||
|
||||
@@ -412,9 +412,9 @@ flush:
|
||||
private int
|
||||
check_fmt(struct magic_set *ms, struct magic *m)
|
||||
{
|
||||
pcre *pce;
|
||||
int re_options, rv = -1;
|
||||
pcre_extra *re_extra;
|
||||
pcre2_code *pce;
|
||||
uint32_t re_options, capture_count;
|
||||
int rv = -1;
|
||||
zend_string *pattern;
|
||||
|
||||
if (strchr(m->desc, '%') == NULL)
|
||||
@@ -422,10 +422,14 @@ check_fmt(struct magic_set *ms, struct magic *m)
|
||||
|
||||
(void)setlocale(LC_CTYPE, "C");
|
||||
pattern = zend_string_init("~%[-0-9.]*s~", sizeof("~%[-0-9.]*s~") - 1, 0);
|
||||
if ((pce = pcre_get_compiled_regex(pattern, &re_extra, &re_options)) == NULL) {
|
||||
if ((pce = pcre_get_compiled_regex(pattern, &capture_count, &re_options)) == NULL) {
|
||||
rv = -1;
|
||||
} else {
|
||||
rv = !pcre_exec(pce, re_extra, m->desc, strlen(m->desc), 0, re_options, NULL, 0);
|
||||
pcre2_match_data *match_data = php_pcre_create_match_data(capture_count, pce);
|
||||
if (match_data) {
|
||||
rv = pcre2_match(pce, m->desc, strlen(m->desc), 0, re_options, match_data, php_pcre_mctx()) > 0;
|
||||
php_pcre_free_match_data(match_data);
|
||||
}
|
||||
}
|
||||
zend_string_release(pattern);
|
||||
(void)setlocale(LC_CTYPE, "");
|
||||
@@ -1725,10 +1729,10 @@ convert_libmagic_pattern(zval *pattern, char *val, int len, int options)
|
||||
}
|
||||
ZSTR_VAL(t)[j++] = '~';
|
||||
|
||||
if (options & PCRE_CASELESS)
|
||||
if (options & PCRE2_CASELESS)
|
||||
ZSTR_VAL(t)[j++] = 'i';
|
||||
|
||||
if (options & PCRE_MULTILINE)
|
||||
if (options & PCRE2_MULTILINE)
|
||||
ZSTR_VAL(t)[j++] = 'm';
|
||||
|
||||
ZSTR_VAL(t)[j]='\0';
|
||||
@@ -1901,10 +1905,10 @@ magiccheck(struct magic_set *ms, struct magic *m)
|
||||
int options = 0;
|
||||
pcre_cache_entry *pce;
|
||||
|
||||
options |= PCRE_MULTILINE;
|
||||
options |= PCRE2_MULTILINE;
|
||||
|
||||
if (m->str_flags & STRING_IGNORE_CASE) {
|
||||
options |= PCRE_CASELESS;
|
||||
options |= PCRE2_CASELESS;
|
||||
}
|
||||
|
||||
convert_libmagic_pattern(&pattern, (char *)m->value.s, m->vallen, options);
|
||||
|
||||
@@ -428,11 +428,10 @@ void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
|
||||
zval *option_val;
|
||||
zend_string *regexp;
|
||||
int regexp_set;
|
||||
pcre *re = NULL;
|
||||
pcre_extra *pcre_extra = NULL;
|
||||
int preg_options = 0;
|
||||
int ovector[3];
|
||||
int matches;
|
||||
pcre2_code *re = NULL;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
uint32_t preg_options, capture_count;
|
||||
int rc;
|
||||
|
||||
/* Parse options */
|
||||
FETCH_STR_OPTION(regexp, "regexp");
|
||||
@@ -442,14 +441,19 @@ void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
|
||||
re = pcre_get_compiled_regex(regexp, &pcre_extra, &preg_options);
|
||||
re = pcre_get_compiled_regex(regexp, &capture_count, &preg_options);
|
||||
if (!re) {
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
|
||||
match_data = php_pcre_create_match_data(capture_count, re);
|
||||
if (!match_data) {
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
rc = pcre2_match(re, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
|
||||
php_pcre_free_match_data(match_data);
|
||||
|
||||
/* 0 means that the vector is too small to hold all the captured substring offsets */
|
||||
if (matches < 0) {
|
||||
if (rc < 0) {
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
}
|
||||
@@ -599,12 +603,11 @@ void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
|
||||
* Feel free to use and redistribute this code. But please keep this copyright notice.
|
||||
*
|
||||
*/
|
||||
pcre *re = NULL;
|
||||
pcre_extra *pcre_extra = NULL;
|
||||
int preg_options = 0;
|
||||
int ovector[150]; /* Needs to be a multiple of 3 */
|
||||
int matches;
|
||||
pcre2_code *re = NULL;
|
||||
pcre2_match_data *match_data = NULL;
|
||||
uint32_t preg_options = 0, capture_count;
|
||||
zend_string *sregexp;
|
||||
int rc;
|
||||
const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
|
||||
const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
|
||||
const char *regexp;
|
||||
@@ -624,16 +627,21 @@ void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
|
||||
}
|
||||
|
||||
sregexp = zend_string_init(regexp, regexp_len, 0);
|
||||
re = pcre_get_compiled_regex(sregexp, &pcre_extra, &preg_options);
|
||||
re = pcre_get_compiled_regex(sregexp, &capture_count, &preg_options);
|
||||
if (!re) {
|
||||
zend_string_release(sregexp);
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
zend_string_release(sregexp);
|
||||
matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
|
||||
match_data = php_pcre_create_match_data(capture_count, re);
|
||||
if (!match_data) {
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
rc = pcre2_match(re, Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
|
||||
php_pcre_free_match_data(match_data);
|
||||
|
||||
/* 0 means that the vector is too small to hold all the captured substring offsets */
|
||||
if (matches < 0) {
|
||||
if (rc < 0) {
|
||||
RETURN_VALIDATION_FAILED
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
#define ZEND_BLACKLIST_BLOCK_SIZE 32
|
||||
|
||||
struct _zend_regexp_list {
|
||||
pcre *re;
|
||||
pcre2_code *re;
|
||||
zend_regexp_list *next;
|
||||
};
|
||||
|
||||
@@ -73,10 +73,12 @@ static void blacklist_report_regexp_error(const char *pcre_error, int pcre_error
|
||||
|
||||
static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
|
||||
{
|
||||
const char *pcre_error;
|
||||
int i, pcre_error_offset;
|
||||
PCRE2_UCHAR pcre_error[256];
|
||||
int i, errnumber;
|
||||
PCRE2_SIZE pcre_error_offset;
|
||||
zend_regexp_list **regexp_list_it, *it;
|
||||
char regexp[12*1024], *p, *end, *c, *backtrack = NULL;
|
||||
pcre2_compile_context *cctx = php_pcre_cctx();
|
||||
|
||||
if (blacklist->pos == 0) {
|
||||
/* we have no blacklist to talk about */
|
||||
@@ -177,8 +179,9 @@ static void zend_accel_blacklist_update_regexp(zend_blacklist *blacklist)
|
||||
}
|
||||
it->next = NULL;
|
||||
|
||||
if ((it->re = pcre_compile(regexp, PCRE_NO_AUTO_CAPTURE, &pcre_error, &pcre_error_offset, 0)) == NULL) {
|
||||
if ((it->re = pcre2_compile(regexp, PCRE2_ZERO_TERMINATED, PCRE2_NO_AUTO_CAPTURE, &errnumber, &pcre_error_offset, cctx)) == NULL) {
|
||||
free(it);
|
||||
pcre2_get_error_message(errnumber, pcre_error, sizeof(pcre_error));
|
||||
blacklist_report_regexp_error(pcre_error, pcre_error_offset);
|
||||
return;
|
||||
}
|
||||
@@ -207,7 +210,7 @@ void zend_accel_blacklist_shutdown(zend_blacklist *blacklist)
|
||||
if (blacklist->regexp_list) {
|
||||
zend_regexp_list *temp, *it = blacklist->regexp_list;
|
||||
while (it) {
|
||||
pcre_free(it->re);
|
||||
pcre2_code_free(it->re);
|
||||
temp = it;
|
||||
it = it->next;
|
||||
free(temp);
|
||||
@@ -338,15 +341,24 @@ zend_bool zend_accel_blacklist_is_blacklisted(zend_blacklist *blacklist, char *v
|
||||
{
|
||||
int ret = 0;
|
||||
zend_regexp_list *regexp_list_it = blacklist->regexp_list;
|
||||
pcre2_match_context *mctx = php_pcre_mctx();
|
||||
|
||||
if (regexp_list_it == NULL) {
|
||||
return 0;
|
||||
}
|
||||
while (regexp_list_it != NULL) {
|
||||
if (pcre_exec(regexp_list_it->re, NULL, verify_path, strlen(verify_path), 0, 0, NULL, 0) >= 0) {
|
||||
pcre2_match_data *match_data = php_pcre_create_match_data(0, regexp_list_it->re);
|
||||
if (!match_data) {
|
||||
/* Alloc failed, but next one could still come through and match. */
|
||||
continue;
|
||||
}
|
||||
int rc = pcre2_match(regexp_list_it->re, verify_path, strlen(verify_path), 0, 0, match_data, mctx);
|
||||
if (rc >= 0) {
|
||||
ret = 1;
|
||||
php_pcre_free_match_data(match_data);
|
||||
break;
|
||||
}
|
||||
php_pcre_free_match_data(match_data);
|
||||
regexp_list_it = regexp_list_it->next;
|
||||
}
|
||||
return ret;
|
||||
|
||||
@@ -2,14 +2,16 @@
|
||||
// vim:ft=javascript
|
||||
|
||||
EXTENSION("pcre", "php_pcre.c", false /* never shared */,
|
||||
"-Iext/pcre/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
|
||||
ADD_SOURCES("ext/pcre/pcrelib", "pcre_chartables.c pcre_ucd.c pcre_compile.c pcre_config.c pcre_exec.c pcre_fullinfo.c pcre_get.c pcre_globals.c pcre_maketables.c pcre_newline.c pcre_ord2utf8.c pcre_refcount.c pcre_study.c pcre_tables.c pcre_valid_utf8.c pcre_version.c pcre_xclass.c pcre_jit_compile.c", "pcre");
|
||||
"-Iext/pcre/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
|
||||
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c ", "pcre");
|
||||
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
|
||||
|
||||
AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');
|
||||
AC_DEFINE('HAVE_PCRE', 1, 'Have PCRE library');
|
||||
AC_DEFINE('PCRE2_CODE_UNIT_WIDTH', 8, 'Have PCRE library');
|
||||
AC_DEFINE("PCRE2_STATIC", 1, "");
|
||||
PHP_PCRE="yes";
|
||||
PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcrelib/");
|
||||
PHP_INSTALL_HEADERS("ext/pcre", "php_pcre.h pcre2lib/");
|
||||
ADD_FLAG("CFLAGS_PCRE", " /D HAVE_CONFIG_H");
|
||||
|
||||
ARG_WITH("pcre-jit", "Enable PCRE JIT support", "yes");
|
||||
|
||||
@@ -14,66 +14,67 @@ PHP_ARG_WITH(pcre-jit,,[ --with-pcre-jit Enable PCRE JIT functionality
|
||||
if test "$PHP_PCRE_REGEX" != "yes" && test "$PHP_PCRE_REGEX" != "no"; then
|
||||
AC_MSG_CHECKING([for PCRE headers location])
|
||||
for i in $PHP_PCRE_REGEX $PHP_PCRE_REGEX/include $PHP_PCRE_REGEX/include/pcre $PHP_PCRE_REGEX/local/include; do
|
||||
test -f $i/pcre.h && PCRE_INCDIR=$i
|
||||
test -f $i/pcre2.h && PCRE_INCDIR=$i
|
||||
done
|
||||
|
||||
if test -z "$PCRE_INCDIR"; then
|
||||
AC_MSG_ERROR([Could not find pcre.h in $PHP_PCRE_REGEX])
|
||||
AC_MSG_ERROR([Could not find pcre2.h in $PHP_PCRE_REGEX])
|
||||
fi
|
||||
AC_MSG_RESULT([$PCRE_INCDIR])
|
||||
|
||||
AC_MSG_CHECKING([for PCRE library location])
|
||||
for j in $PHP_PCRE_REGEX $PHP_PCRE_REGEX/$PHP_LIBDIR; do
|
||||
test -f $j/libpcre.a || test -f $j/libpcre.$SHLIB_SUFFIX_NAME && PCRE_LIBDIR=$j
|
||||
test -f $j/libpcre2.a || test -f $j/libpcre2.$SHLIB_SUFFIX_NAME && PCRE_LIBDIR=$j
|
||||
done
|
||||
|
||||
if test -z "$PCRE_LIBDIR" ; then
|
||||
AC_MSG_ERROR([Could not find libpcre.(a|$SHLIB_SUFFIX_NAME) in $PHP_PCRE_REGEX])
|
||||
AC_MSG_ERROR([Could not find libpcre2.(a|$SHLIB_SUFFIX_NAME) in $PHP_PCRE_REGEX])
|
||||
fi
|
||||
AC_MSG_RESULT([$PCRE_LIBDIR])
|
||||
|
||||
changequote({,})
|
||||
pcre_major=`grep PCRE_MAJOR $PCRE_INCDIR/pcre.h | sed -e 's/[^0-9]//g'`
|
||||
pcre_minor=`grep PCRE_MINOR $PCRE_INCDIR/pcre.h | sed -e 's/[^0-9]//g'`
|
||||
pcre_major=`grep PCRE2_MAJOR $PCRE_INCDIR/pcre2.h | sed -e 's/[^0-9]//g'`
|
||||
pcre_minor=`grep PCRE2_MINOR $PCRE_INCDIR/pcre2.h | sed -e 's/[^0-9]//g'`
|
||||
changequote([,])
|
||||
pcre_minor_length=`echo "$pcre_minor" | wc -c | sed -e 's/[^0-9]//g'`
|
||||
if test "$pcre_minor_length" -eq 2 ; then
|
||||
pcre_minor="$pcre_minor"0
|
||||
fi
|
||||
pcre_version=$pcre_major$pcre_minor
|
||||
if test "$pcre_version" -lt 660; then
|
||||
AC_MSG_ERROR([The PCRE extension requires PCRE library version >= 6.6])
|
||||
if test "$pcre_version" -lt 1030; then
|
||||
AC_MSG_ERROR([The PCRE extension requires PCRE library version >= 10.30])
|
||||
fi
|
||||
|
||||
PHP_CHECK_LIBRARY(pcre, pcre_jit_exec,
|
||||
PHP_CHECK_LIBRARY(pcre2, pcre2_jit_exec,
|
||||
[
|
||||
AC_DEFINE(HAVE_PCRE_JIT_SUPPORT, 1, [ ])
|
||||
],[
|
||||
],[
|
||||
-L$PCRE_LIBDIR
|
||||
])
|
||||
PHP_ADD_LIBRARY_WITH_PATH(pcre, $PCRE_LIBDIR)
|
||||
PHP_ADD_LIBRARY_WITH_PATH(pcre2, $PCRE_LIBDIR)
|
||||
|
||||
AC_DEFINE(HAVE_PCRE, 1, [ ])
|
||||
AC_DEFINE(PCRE2_CODE_UNIT_WIDTH, 8, [ ])
|
||||
PHP_ADD_INCLUDE($PCRE_INCDIR)
|
||||
PHP_NEW_EXTENSION(pcre, php_pcre.c, no,, -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h])
|
||||
else
|
||||
AC_MSG_CHECKING([for PCRE library to use])
|
||||
AC_MSG_RESULT([bundled])
|
||||
pcrelib_sources="pcrelib/pcre_chartables.c pcrelib/pcre_ucd.c \
|
||||
pcrelib/pcre_compile.c pcrelib/pcre_config.c pcrelib/pcre_exec.c \
|
||||
pcrelib/pcre_fullinfo.c pcrelib/pcre_get.c pcrelib/pcre_globals.c \
|
||||
pcrelib/pcre_maketables.c pcrelib/pcre_newline.c \
|
||||
pcrelib/pcre_ord2utf8.c pcrelib/pcre_refcount.c pcrelib/pcre_study.c \
|
||||
pcrelib/pcre_tables.c pcrelib/pcre_valid_utf8.c \
|
||||
pcrelib/pcre_version.c pcrelib/pcre_xclass.c \
|
||||
pcrelib/pcre_jit_compile.c"
|
||||
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcrelib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
|
||||
pcrelib_sources="pcre2lib/pcre2_auto_possess.c pcre2lib/pcre2_chartables.c pcre2lib/pcre2_compile.c \
|
||||
pcre2lib/pcre2_config.c pcre2lib/pcre2_context.c pcre2lib/pcre2_dfa_match.c pcre2lib/pcre2_error.c \
|
||||
pcre2lib/pcre2_jit_compile.c pcre2lib/pcre2_maketables.c pcre2lib/pcre2_match.c pcre2lib/pcre2_match_data.c \
|
||||
pcre2lib/pcre2_newline.c pcre2lib/pcre2_ord2utf.c pcre2lib/pcre2_pattern_info.c pcre2lib/pcre2_serialize.c \
|
||||
pcre2lib/pcre2_string_utils.c pcre2lib/pcre2_study.c pcre2lib/pcre2_substitute.c pcre2lib/pcre2_substring.c \
|
||||
pcre2lib/pcre2_tables.c pcre2lib/pcre2_ucd.c pcre2lib/pcre2_valid_utf.c pcre2lib/pcre2_xclass.c \
|
||||
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c"
|
||||
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
|
||||
PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcrelib)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcrelib/])
|
||||
PHP_ADD_BUILD_DIR($ext_builddir/pcre2lib)
|
||||
PHP_INSTALL_HEADERS([ext/pcre], [php_pcre.h pcre2lib/])
|
||||
AC_DEFINE(HAVE_BUNDLED_PCRE, 1, [ ])
|
||||
AC_DEFINE(PCRE2_CODE_UNIT_WIDTH, 8, [ ])
|
||||
|
||||
if test "$PHP_PCRE_REGEX" != "no"; then
|
||||
AC_MSG_CHECKING([whether to enable PCRE JIT functionality])
|
||||
|
||||
105
ext/pcre/pcre2lib/config.h
Normal file
105
ext/pcre/pcre2lib/config.h
Normal file
@@ -0,0 +1,105 @@
|
||||
|
||||
#include <php_compat.h>
|
||||
|
||||
#ifdef PHP_WIN32
|
||||
# include <config.w32.h>
|
||||
#else
|
||||
# include <php_config.h>
|
||||
#endif
|
||||
|
||||
#undef PACKAGE_NAME
|
||||
#undef PACKAGE_VERSION
|
||||
#undef PACKAGE_TARNAME
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
#define SUPPORT_UNICODE 1
|
||||
#define SUPPORT_PCRE2_8 1
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C" __attribute__ ((visibility("default")))
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern __attribute__ ((visibility("default")))
|
||||
# endif
|
||||
# define PCRE2_EXP_DEFN __attribute__ ((visibility("default")))
|
||||
#endif
|
||||
|
||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
#if HAVE_PCRE_VALGRIND_SUPPORT
|
||||
#define SUPPORT_VALGRIND 1
|
||||
#endif
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#if HAVE_PCRE_JIT_SUPPORT
|
||||
#define SUPPORT_JIT
|
||||
#endif
|
||||
|
||||
/* This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes. */
|
||||
#ifndef HEAP_LIMIT
|
||||
#define HEAP_LIMIT 20000000
|
||||
#endif
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
large so that it does not accidentally catch legitimate cases. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all backtracks, whether or not they are nested.
|
||||
In some environments it is desirable to limit the nesting of backtracking
|
||||
(that is, the depth of tree that is searched) more strictly, in order to
|
||||
restrict the maximum amount of heap memory that is used. The value of
|
||||
MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
|
||||
must be less than the value of MATCH_LIMIT. The default is to use the same
|
||||
value as MATCH_LIMIT. There is a runtime method for setting a different
|
||||
limit. */
|
||||
#ifndef MATCH_LIMIT_DEPTH
|
||||
#define MATCH_LIMIT_DEPTH MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* Defining NEVER_BACKSLASH_C locks out the use of \C in all patterns. */
|
||||
/* #undef NEVER_BACKSLASH_C */
|
||||
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), 5
|
||||
(ANYCRLF), and 6 (NUL). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,9 +40,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE. The tables are built according to the current
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
character tables for PCRE2. The tables are built according to the current
|
||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
@@ -52,11 +53,11 @@ make use of its code from here in order to be consistent. */
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#define DFTABLES /* pcre_maketables.c notices this */
|
||||
#include "pcre_maketables.c"
|
||||
#define PCRE2_CODE_UNIT_WIDTH 0 /* Must be set, but not relevant here */
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define DFTABLES /* pcre2_maketables.c notices this */
|
||||
#include "pcre2_maketables.c"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@@ -81,7 +82,7 @@ if (argc < i + 1)
|
||||
return 1;
|
||||
}
|
||||
|
||||
tables = pcre_maketables();
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
||||
f = fopen(argv[i], "wb");
|
||||
@@ -100,15 +101,8 @@ fprintf(f,
|
||||
"*************************************************/\n\n"
|
||||
"/* This file was automatically written by the dftables auxiliary\n"
|
||||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256.\n\n");
|
||||
fprintf(f,
|
||||
"The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n");
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
/* Force config.h in z/OS */
|
||||
|
||||
@@ -120,14 +114,22 @@ fprintf(f,
|
||||
"#endif\n\n");
|
||||
#endif
|
||||
|
||||
fprintf(f,
|
||||
"/* The following #includes are present because without them gcc 4.x may remove\n"
|
||||
"the array definition from the final binary if PCRE2 is built into a static\n"
|
||||
"library and dead code stripping is activated. This leads to link errors.\n"
|
||||
"Pulling in the header ensures that the array gets flagged as \"someone\n"
|
||||
"outside this compilation unit might reference this\" and so it will always\n"
|
||||
"be supplied to the linker. */\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"#ifdef HAVE_CONFIG_H\n"
|
||||
"#include \"config.h\"\n"
|
||||
"#endif\n\n"
|
||||
"#include \"pcre_internal.h\"\n\n");
|
||||
"#include \"pcre2_internal.h\"\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"const pcre_uint8 PRIV(default_tables)[] = {\n\n"
|
||||
"const uint8_t PRIV(default_tables)[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
@@ -202,7 +204,7 @@ if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of pcre_chartables.c */\n");
|
||||
fprintf(f, " */\n\n/* End of pcre2_chartables.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
850
ext/pcre/pcre2lib/pcre2.h
Normal file
850
ext/pcre/pcre2lib/pcre2.h
Normal file
@@ -0,0 +1,850 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE2_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 30
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2017-08-14
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
export setting is defined in pcre2_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE2_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE2_STATIC)
|
||||
# ifndef PCRE2_EXP_DECL
|
||||
# define PCRE2_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE2_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE2_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE2_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE2_CALL_CONVENTION
|
||||
#define PCRE2_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
|
||||
uint8_t, UCHAR_MAX, etc are defined. */
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* Allow for C++ users compiling this directly. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
#define PCRE2_ENDANCHORED 0x20000000u
|
||||
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
#define PCRE2_ALLOW_EMPTY_CLASS 0x00000001u /* C */
|
||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_NO_DOTSTAR_ANCHOR 0x00008000u /* C */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00010000u /* J M D */
|
||||
#define PCRE2_UCP 0x00020000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00040000u /* C */
|
||||
#define PCRE2_UTF 0x00080000u /* C J M D */
|
||||
#define PCRE2_NEVER_BACKSLASH_C 0x00100000u /* C */
|
||||
#define PCRE2_ALT_CIRCUMFLEX 0x00200000u /* J M D */
|
||||
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
|
||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||
#define PCRE2_LITERAL 0x02000000u /* C */
|
||||
|
||||
/* An additional compile options word is available in the compile context. */
|
||||
|
||||
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
|
||||
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions (though pcre2_jit_match() ignores the latter since it bypasses all
|
||||
sanity checks). */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
|
||||
/* These are additional options for pcre2_dfa_match(). */
|
||||
|
||||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* These are additional options for pcre2_substitute(), which passes any others
|
||||
through to pcre2_match(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
|
||||
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
|
||||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
|
||||
|
||||
/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
|
||||
ignored for pcre2_jit_match(). */
|
||||
|
||||
#define PCRE2_NO_JIT 0x00002000u
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_CONVERT_UTF 0x00000001u
|
||||
#define PCRE2_CONVERT_NO_UTF_CHECK 0x00000002u
|
||||
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
|
||||
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
|
||||
#define PCRE2_CONVERT_GLOB 0x00000010u
|
||||
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000030u
|
||||
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000050u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
#define PCRE2_NEWLINE_CRLF 3
|
||||
#define PCRE2_NEWLINE_ANY 4
|
||||
#define PCRE2_NEWLINE_ANYCRLF 5
|
||||
#define PCRE2_NEWLINE_NUL 6
|
||||
|
||||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
||||
/* Error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF8_ERR1 (-3)
|
||||
#define PCRE2_ERROR_UTF8_ERR2 (-4)
|
||||
#define PCRE2_ERROR_UTF8_ERR3 (-5)
|
||||
#define PCRE2_ERROR_UTF8_ERR4 (-6)
|
||||
#define PCRE2_ERROR_UTF8_ERR5 (-7)
|
||||
#define PCRE2_ERROR_UTF8_ERR6 (-8)
|
||||
#define PCRE2_ERROR_UTF8_ERR7 (-9)
|
||||
#define PCRE2_ERROR_UTF8_ERR8 (-10)
|
||||
#define PCRE2_ERROR_UTF8_ERR9 (-11)
|
||||
#define PCRE2_ERROR_UTF8_ERR10 (-12)
|
||||
#define PCRE2_ERROR_UTF8_ERR11 (-13)
|
||||
#define PCRE2_ERROR_UTF8_ERR12 (-14)
|
||||
#define PCRE2_ERROR_UTF8_ERR13 (-15)
|
||||
#define PCRE2_ERROR_UTF8_ERR14 (-16)
|
||||
#define PCRE2_ERROR_UTF8_ERR15 (-17)
|
||||
#define PCRE2_ERROR_UTF8_ERR16 (-18)
|
||||
#define PCRE2_ERROR_UTF8_ERR17 (-19)
|
||||
#define PCRE2_ERROR_UTF8_ERR18 (-20)
|
||||
#define PCRE2_ERROR_UTF8_ERR19 (-21)
|
||||
#define PCRE2_ERROR_UTF8_ERR20 (-22)
|
||||
#define PCRE2_ERROR_UTF8_ERR21 (-23)
|
||||
|
||||
/* Error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF16_ERR1 (-24)
|
||||
#define PCRE2_ERROR_UTF16_ERR2 (-25)
|
||||
#define PCRE2_ERROR_UTF16_ERR3 (-26)
|
||||
|
||||
/* Error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
|
||||
functions, and serializing functions. They are in numerical order. Originally
|
||||
they were in alphabetical order too, but now that PCRE2 is released, the
|
||||
numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_DEPTHLIMIT (-53)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53) /* Obsolete synonym */
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
|
||||
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
#define PCRE2_INFO_ALLOPTIONS 0
|
||||
#define PCRE2_INFO_ARGOPTIONS 1
|
||||
#define PCRE2_INFO_BACKREFMAX 2
|
||||
#define PCRE2_INFO_BSR 3
|
||||
#define PCRE2_INFO_CAPTURECOUNT 4
|
||||
#define PCRE2_INFO_FIRSTCODEUNIT 5
|
||||
#define PCRE2_INFO_FIRSTCODETYPE 6
|
||||
#define PCRE2_INFO_FIRSTBITMAP 7
|
||||
#define PCRE2_INFO_HASCRORLF 8
|
||||
#define PCRE2_INFO_JCHANGED 9
|
||||
#define PCRE2_INFO_JITSIZE 10
|
||||
#define PCRE2_INFO_LASTCODEUNIT 11
|
||||
#define PCRE2_INFO_LASTCODETYPE 12
|
||||
#define PCRE2_INFO_MATCHEMPTY 13
|
||||
#define PCRE2_INFO_MATCHLIMIT 14
|
||||
#define PCRE2_INFO_MAXLOOKBEHIND 15
|
||||
#define PCRE2_INFO_MINLENGTH 16
|
||||
#define PCRE2_INFO_NAMECOUNT 17
|
||||
#define PCRE2_INFO_NAMEENTRYSIZE 18
|
||||
#define PCRE2_INFO_NAMETABLE 19
|
||||
#define PCRE2_INFO_NEWLINE 20
|
||||
#define PCRE2_INFO_DEPTHLIMIT 21
|
||||
#define PCRE2_INFO_RECURSIONLIMIT 21 /* Obsolete synonym */
|
||||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
#define PCRE2_CONFIG_BSR 0
|
||||
#define PCRE2_CONFIG_JIT 1
|
||||
#define PCRE2_CONFIG_JITTARGET 2
|
||||
#define PCRE2_CONFIG_LINKSIZE 3
|
||||
#define PCRE2_CONFIG_MATCHLIMIT 4
|
||||
#define PCRE2_CONFIG_NEWLINE 5
|
||||
#define PCRE2_CONFIG_PARENSLIMIT 6
|
||||
#define PCRE2_CONFIG_DEPTHLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 7 /* Obsolete synonym */
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8 /* Obsolete */
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
typedef uint8_t PCRE2_UCHAR8;
|
||||
typedef uint16_t PCRE2_UCHAR16;
|
||||
typedef uint32_t PCRE2_UCHAR32;
|
||||
|
||||
typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_SIZE_MAX SIZE_MAX
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
||||
#define PCRE2_TYPES_LIST \
|
||||
struct pcre2_real_general_context; \
|
||||
typedef struct pcre2_real_general_context pcre2_general_context; \
|
||||
\
|
||||
struct pcre2_real_compile_context; \
|
||||
typedef struct pcre2_real_compile_context pcre2_compile_context; \
|
||||
\
|
||||
struct pcre2_real_match_context; \
|
||||
typedef struct pcre2_real_match_context pcre2_match_context; \
|
||||
\
|
||||
struct pcre2_real_convert_context; \
|
||||
typedef struct pcre2_real_convert_context pcre2_convert_context; \
|
||||
\
|
||||
struct pcre2_real_code; \
|
||||
typedef struct pcre2_real_code pcre2_code; \
|
||||
\
|
||||
struct pcre2_real_match_data; \
|
||||
typedef struct pcre2_real_match_data pcre2_match_data; \
|
||||
\
|
||||
struct pcre2_real_jit_stack; \
|
||||
typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||
\
|
||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. Define the generic version in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block; \
|
||||
\
|
||||
typedef struct pcre2_callout_enumerate_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
expanded for each width below. Start with functions that give general
|
||||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
||||
#define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
#define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile_context_copy(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
|
||||
int (*)(uint32_t, void *), void *);
|
||||
|
||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_context_copy(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_offset_limit(pcre2_match_context *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *, \
|
||||
void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
|
||||
|
||||
#define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_convert_context_copy(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
|
||||
*pcre2_convert_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_convert_context_free(pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
|
||||
|
||||
|
||||
/* Functions concerned with compiling a pattern to PCRE internal code. */
|
||||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
|
||||
pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_info(const pcre2_code *, uint32_t, void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_callout_enumerate(const pcre2_code *, \
|
||||
int (*)(pcre2_callout_enumerate_block *, void *), void *);
|
||||
|
||||
|
||||
/* Functions for running a match and inspecting the result. */
|
||||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_data_create(uint32_t, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
|
||||
*pcre2_match_data_create_from_pattern(const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, int *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
*pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_byname(pcre2_match_data *, PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *, uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_nametable_scan(const pcre2_code *, PCRE2_SPTR, PCRE2_SPTR *, \
|
||||
PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_number_from_name(const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
/* Functions for serializing / deserializing compiled patterns. */
|
||||
|
||||
#define PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_encode(const pcre2_code **, int32_t, uint8_t **, \
|
||||
PCRE2_SIZE *, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_decode(pcre2_code **, int32_t, const uint8_t *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int32_t PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_serialize_free(uint8_t *);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_substitute(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for converting pattern source strings. */
|
||||
|
||||
#define PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_pattern_convert(PCRE2_SPTR, PCRE2_SIZE, uint32_t, PCRE2_UCHAR **, \
|
||||
PCRE2_SIZE *, pcre2_convert_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_converted_pattern_free(PCRE2_UCHAR *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
|
||||
uint32_t, pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
|
||||
*pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
||||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
|
||||
*pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
them to be. First we get the width from PCRE2_LOCAL_WIDTH, which is used for
|
||||
generating three versions of everything below. After that, PCRE2_SUFFIX will be
|
||||
re-defined to use PCRE2_CODE_UNIT_WIDTH, for use when macros such as
|
||||
pcre2_compile are called by application code. */
|
||||
|
||||
#define PCRE2_JOIN(a,b) a ## b
|
||||
#define PCRE2_GLUE(a,b) PCRE2_JOIN(a,b)
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a,PCRE2_LOCAL_WIDTH)
|
||||
|
||||
|
||||
/* Data types */
|
||||
|
||||
#define PCRE2_UCHAR PCRE2_SUFFIX(PCRE2_UCHAR)
|
||||
#define PCRE2_SPTR PCRE2_SUFFIX(PCRE2_SPTR)
|
||||
|
||||
#define pcre2_code PCRE2_SUFFIX(pcre2_code_)
|
||||
#define pcre2_jit_callback PCRE2_SUFFIX(pcre2_jit_callback_)
|
||||
#define pcre2_jit_stack PCRE2_SUFFIX(pcre2_jit_stack_)
|
||||
|
||||
#define pcre2_real_code PCRE2_SUFFIX(pcre2_real_code_)
|
||||
#define pcre2_real_general_context PCRE2_SUFFIX(pcre2_real_general_context_)
|
||||
#define pcre2_real_compile_context PCRE2_SUFFIX(pcre2_real_compile_context_)
|
||||
#define pcre2_real_convert_context PCRE2_SUFFIX(pcre2_real_convert_context_)
|
||||
#define pcre2_real_match_context PCRE2_SUFFIX(pcre2_real_match_context_)
|
||||
#define pcre2_real_jit_stack PCRE2_SUFFIX(pcre2_real_jit_stack_)
|
||||
#define pcre2_real_match_data PCRE2_SUFFIX(pcre2_real_match_data_)
|
||||
|
||||
|
||||
/* Data blocks */
|
||||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||
#define pcre2_match_data PCRE2_SUFFIX(pcre2_match_data_)
|
||||
|
||||
|
||||
/* Functions: the complete list in alphabetical order */
|
||||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
#define pcre2_compile_context_create PCRE2_SUFFIX(pcre2_compile_context_create_)
|
||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||
#define pcre2_convert_context_copy PCRE2_SUFFIX(pcre2_convert_context_copy_)
|
||||
#define pcre2_convert_context_create PCRE2_SUFFIX(pcre2_convert_context_create_)
|
||||
#define pcre2_convert_context_free PCRE2_SUFFIX(pcre2_convert_context_free_)
|
||||
#define pcre2_converted_pattern_free PCRE2_SUFFIX(pcre2_converted_pattern_free_)
|
||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||
#define pcre2_match_data_create PCRE2_SUFFIX(pcre2_match_data_create_)
|
||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_convert PCRE2_SUFFIX(pcre2_pattern_convert_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_serialize_decode PCRE2_SUFFIX(pcre2_serialize_decode_)
|
||||
#define pcre2_serialize_encode PCRE2_SUFFIX(pcre2_serialize_encode_)
|
||||
#define pcre2_serialize_free PCRE2_SUFFIX(pcre2_serialize_free_)
|
||||
#define pcre2_serialize_get_number_of_codes PCRE2_SUFFIX(pcre2_serialize_get_number_of_codes_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
|
||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
#define pcre2_substring_get_byname PCRE2_SUFFIX(pcre2_substring_get_byname_)
|
||||
#define pcre2_substring_get_bynumber PCRE2_SUFFIX(pcre2_substring_get_bynumber_)
|
||||
#define pcre2_substring_length_byname PCRE2_SUFFIX(pcre2_substring_length_byname_)
|
||||
#define pcre2_substring_length_bynumber PCRE2_SUFFIX(pcre2_substring_length_bynumber_)
|
||||
#define pcre2_substring_list_get PCRE2_SUFFIX(pcre2_substring_list_get_)
|
||||
#define pcre2_substring_list_free PCRE2_SUFFIX(pcre2_substring_list_free_)
|
||||
#define pcre2_substring_nametable_scan PCRE2_SUFFIX(pcre2_substring_nametable_scan_)
|
||||
#define pcre2_substring_number_from_name PCRE2_SUFFIX(pcre2_substring_number_from_name_)
|
||||
|
||||
/* Keep this old function name for backwards compatibility */
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
|
||||
/* Keep this obsolete function for backwards compatibility: it is now a noop. */
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
||||
/* Now generate all three sets of width-specific structures and function
|
||||
prototypes. */
|
||||
|
||||
#define PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS \
|
||||
PCRE2_TYPES_LIST \
|
||||
PCRE2_STRUCTURE_LIST \
|
||||
PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_GENERAL_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_CONTEXT_FUNCTIONS \
|
||||
PCRE2_CONVERT_FUNCTIONS \
|
||||
PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_SERIALIZE_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 8
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 16
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
#define PCRE2_LOCAL_WIDTH 32
|
||||
PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
#undef PCRE2_LOCAL_WIDTH
|
||||
|
||||
/* Undefine the list macros; they are no longer needed. */
|
||||
|
||||
#undef PCRE2_TYPES_LIST
|
||||
#undef PCRE2_STRUCTURE_LIST
|
||||
#undef PCRE2_GENERAL_INFO_FUNCTIONS
|
||||
#undef PCRE2_GENERAL_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_CONVERT_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_MATCH_CONTEXT_FUNCTIONS
|
||||
#undef PCRE2_COMPILE_FUNCTIONS
|
||||
#undef PCRE2_PATTERN_INFO_FUNCTIONS
|
||||
#undef PCRE2_MATCH_FUNCTIONS
|
||||
#undef PCRE2_SUBSTRING_FUNCTIONS
|
||||
#undef PCRE2_SERIALIZE_FUNCTIONS
|
||||
#undef PCRE2_SUBSTITUTE_FUNCTION
|
||||
#undef PCRE2_JIT_FUNCTIONS
|
||||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of pcre2.h */
|
||||
1291
ext/pcre/pcre2lib/pcre2_auto_possess.c
Normal file
1291
ext/pcre/pcre2lib/pcre2_auto_possess.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,28 +3,30 @@
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
are passed to PCRE2 by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
called dftables (which is distributed with PCRE2) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without them gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
array definition from the final binary if PCRE2 is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
const pcre_uint8 PRIV(default_tables)[] = {
|
||||
const uint8_t PRIV(default_tables)[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
@@ -193,4 +195,4 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of pcre_chartables.c */
|
||||
/* End of pcre2_chartables.c */
|
||||
9786
ext/pcre/pcre2lib/pcre2_compile.c
Normal file
9786
ext/pcre/pcre2lib/pcre2_compile.c
Normal file
File diff suppressed because it is too large
Load Diff
222
ext/pcre/pcre2lib/pcre2_config.c
Normal file
222
ext/pcre/pcre2lib/pcre2_config.c
Normal file
@@ -0,0 +1,222 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_internal.h to be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE2_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test their values. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* If where is NULL, the length of memory required is returned.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if a numerical value is returned
|
||||
>= 0 if a string value
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_config(uint32_t what, void *where)
|
||||
{
|
||||
if (where == NULL) /* Requests a length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_STACKRECURSE: /* Obsolete */
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((uint32_t *)where) = PCRE2_BSR_ANYCRLF;
|
||||
#else
|
||||
*((uint32_t *)where) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
*((uint32_t *)where) = (uint32_t)configured_link_size;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = MATCH_LIMIT_DEPTH;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
*((uint32_t *)where) = NEWLINE_DEFAULT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
*((uint32_t *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
/* This is now obsolete. The stack is no longer used via recursion for
|
||||
handling backtracking in pcre2_match(). */
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
*((uint32_t *)where) = 0;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
const char *v = "Unicode not supported";
|
||||
#endif
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
#if defined SUPPORT_UNICODE
|
||||
*((uint32_t *)where) = 1;
|
||||
#else
|
||||
*((uint32_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
of STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return (int)(1 + ((where == NULL)?
|
||||
strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v)));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_config.c */
|
||||
476
ext/pcre/pcre2lib/pcre2_context.c
Normal file
476
ext/pcre/pcre2lib/pcre2_context.c
Normal file
@@ -0,0 +1,476 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Default malloc/free functions *
|
||||
*************************************************/
|
||||
|
||||
/* Ignore the "user data" argument in each case. */
|
||||
|
||||
static void *default_malloc(size_t size, void *data)
|
||||
{
|
||||
(void)data;
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void default_free(void *block, void *data)
|
||||
{
|
||||
(void)data;
|
||||
free(block);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
|
||||
extern void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
{
|
||||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
newmemctl->malloc = default_malloc;
|
||||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create and initialize contexts *
|
||||
*************************************************/
|
||||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (private_malloc == NULL) private_malloc = default_malloc;
|
||||
if (private_free == NULL) private_free = default_free;
|
||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default compile context is set up to save having to initialize at run time
|
||||
when no context is supplied to the compile function. */
|
||||
|
||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT, /* As it says */
|
||||
0 }; /* Extra options */
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default match context is set up to save having to initialize at run time
|
||||
when no context is supplied to a match function. */
|
||||
|
||||
const pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL,
|
||||
NULL,
|
||||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
return mcontext;
|
||||
}
|
||||
|
||||
|
||||
/* A default convert context is set up to save having to initialize at run time
|
||||
when no context is supplied to the convert function. */
|
||||
|
||||
const pcre2_convert_context PRIV(default_convert_context) = {
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
#ifdef _WIN32
|
||||
CHAR_BACKSLASH, /* Default path separator */
|
||||
CHAR_GRAVE_ACCENT /* Default escape character */
|
||||
#else /* Not Windows */
|
||||
CHAR_SLASH, /* Default path separator */
|
||||
CHAR_BACKSLASH /* Default escape character */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_convert_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_convert_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_convert_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
return ccontext;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context copy functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *new =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *new =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *new =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_convert_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_copy(pcre2_convert_context *ccontext)
|
||||
{
|
||||
pcre2_convert_context *new =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_convert_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, ccontext, sizeof(pcre2_real_convert_context));
|
||||
return new;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Context free functions *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_free(pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext != NULL)
|
||||
gcontext->memctl.free(gcontext, gcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_free(pcre2_compile_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_free(pcre2_match_context *mcontext)
|
||||
{
|
||||
if (mcontext != NULL)
|
||||
mcontext->memctl.free(mcontext, mcontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_convert_context_free(pcre2_convert_context *ccontext)
|
||||
{
|
||||
if (ccontext != NULL)
|
||||
ccontext->memctl.free(ccontext, ccontext->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set values in contexts *
|
||||
*************************************************/
|
||||
|
||||
/* All these functions return 0 for success or PCRE2_ERROR_BADDATA if invalid
|
||||
data is given. Only some of the functions are able to test the validity of the
|
||||
data. */
|
||||
|
||||
|
||||
/* ------------ Compile context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
{
|
||||
case PCRE2_BSR_ANYCRLF:
|
||||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
{
|
||||
case PCRE2_NEWLINE_CR:
|
||||
case PCRE2_NEWLINE_LF:
|
||||
case PCRE2_NEWLINE_CRLF:
|
||||
case PCRE2_NEWLINE_ANY:
|
||||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
case PCRE2_NEWLINE_NUL:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext, uint32_t limit)
|
||||
{
|
||||
ccontext->parens_nest_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||
{
|
||||
ccontext->extra_options = options;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t, void *), void *user_data)
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
ccontext->stack_guard_data = user_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ------------ Match context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->match_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_depth_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->depth_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_offset_limit(pcre2_match_context *mcontext, PCRE2_SIZE limit)
|
||||
{
|
||||
mcontext->offset_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function became obsolete at release 10.30. It is kept as a synonym for
|
||||
backwards compatibility. */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
return pcre2_set_depth_limit(mcontext, limit);
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
(void)mcontext;
|
||||
(void)mymalloc;
|
||||
(void)myfree;
|
||||
(void)mydata;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ------------ Convert context ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_separator(pcre2_convert_context *ccontext, uint32_t separator)
|
||||
{
|
||||
if (separator != CHAR_SLASH && separator != CHAR_BACKSLASH &&
|
||||
separator != CHAR_DOT) return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_separator = separator;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
|
||||
{
|
||||
if (escape > 255 || (escape != 0 && !ispunct(escape)))
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
ccontext->glob_escape = escape;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
||||
1188
ext/pcre/pcre2lib/pcre2_convert.c
Normal file
1188
ext/pcre/pcre2lib/pcre2_convert.c
Normal file
File diff suppressed because it is too large
Load Diff
3857
ext/pcre/pcre2lib/pcre2_dfa_match.c
Normal file
3857
ext/pcre/pcre2lib/pcre2_dfa_match.c
Normal file
File diff suppressed because it is too large
Load Diff
329
ext/pcre/pcre2lib/pcre2_error.c
Normal file
329
ext/pcre/pcre2lib/pcre2_error.c
Normal file
@@ -0,0 +1,329 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
This used to be a table of strings, but in order to reduce the number of
|
||||
relocations needed when a shared library is loaded dynamically, it is now one
|
||||
long string. We cannot use a table of offsets, because the lengths of inserts
|
||||
such as XSTRING(MAX_NAME_SIZE) are not known. Instead,
|
||||
pcre2_get_error_message() counts through to the one it wants - this isn't a
|
||||
performance issue because these strings are used only when there is an error.
|
||||
|
||||
Each substring ends with \0 to insert a null character. This includes the final
|
||||
substring, so that the whole string ends with \0\0, which can be detected when
|
||||
counting through. */
|
||||
|
||||
static const unsigned char compile_error_texts[] =
|
||||
"no error\0"
|
||||
"\\ at end of pattern\0"
|
||||
"\\c at end of pattern\0"
|
||||
"unrecognized character follows \\\0"
|
||||
"numbers out of order in {} quantifier\0"
|
||||
/* 5 */
|
||||
"number too big in {} quantifier\0"
|
||||
"missing terminating ] for character class\0"
|
||||
"invalid escape sequence in character class\0"
|
||||
"range out of order in character class\0"
|
||||
"quantifier does not follow a repeatable item\0"
|
||||
/* 10 */
|
||||
"internal error: unexpected repeat\0"
|
||||
"unrecognized character after (? or (?-\0"
|
||||
"POSIX named classes are supported only within a class\0"
|
||||
"POSIX collating elements are not supported\0"
|
||||
"missing closing parenthesis\0"
|
||||
/* 15 */
|
||||
"reference to non-existent subpattern\0"
|
||||
"pattern passed as NULL\0"
|
||||
"unrecognised compile-time option bit(s)\0"
|
||||
"missing ) after (?# comment\0"
|
||||
"parentheses are too deeply nested\0"
|
||||
/* 20 */
|
||||
"regular expression is too large\0"
|
||||
"failed to allocate heap memory\0"
|
||||
"unmatched closing parenthesis\0"
|
||||
"internal error: code overflow\0"
|
||||
"missing closing parenthesis for condition\0"
|
||||
/* 25 */
|
||||
"lookbehind assertion is not fixed length\0"
|
||||
"a relative value of zero is not allowed\0"
|
||||
"conditional group contains more than two branches\0"
|
||||
"assertion expected after (?( or (?(?C)\0"
|
||||
"digit expected after (?+ or (?-\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE2 does not have Unicode support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
/* 35 */
|
||||
"lookbehind is too complicated\0"
|
||||
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
|
||||
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
|
||||
"number after (?C is greater than 255\0"
|
||||
"closing parenthesis for (?C expected\0"
|
||||
/* 40 */
|
||||
"invalid escape sequence in (*VERB) name\0"
|
||||
"unrecognized character after (?P\0"
|
||||
"syntax error in subpattern name (missing terminator)\0"
|
||||
"two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
|
||||
"group name must start with a non-digit\0"
|
||||
/* 45 */
|
||||
"this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
|
||||
"malformed \\P or \\p sequence\0"
|
||||
"unknown property name after \\P or \\p\0"
|
||||
"subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
|
||||
"too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
|
||||
/* 50 */
|
||||
"invalid range in character class\0"
|
||||
"octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
|
||||
"internal error: overran compiling workspace\0"
|
||||
"internal error: previously-checked referenced subpattern not found\0"
|
||||
"DEFINE group contains more than one branch\0"
|
||||
/* 55 */
|
||||
"missing opening brace after \\o\0"
|
||||
"internal error: unknown newline setting\0"
|
||||
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
|
||||
"(?R (recursive pattern call) must be followed by a closing parenthesis\0"
|
||||
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
|
||||
/* 60 */
|
||||
"(*VERB) not recognized or malformed\0"
|
||||
"group number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"internal error: parsed pattern overflow\0"
|
||||
"non-octal character in \\o{} (closing brace missing?)\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed\0"
|
||||
"(*MARK) must have an argument\0"
|
||||
"non-hex character in \\x{} (closing brace missing?)\0"
|
||||
#ifndef EBCDIC
|
||||
"\\c must be followed by a printable ASCII character\0"
|
||||
#else
|
||||
"\\c must be followed by a letter or one of [\\]^_?\0"
|
||||
#endif
|
||||
"\\k is not followed by a braced, angle-bracketed, or quoted name\0"
|
||||
/* 70 */
|
||||
"internal error: unknown meta code in check_lookbehinds()\0"
|
||||
"\\N is not supported in a class\0"
|
||||
"callout string is too long\0"
|
||||
"disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
|
||||
"using UTF is disabled by the application\0"
|
||||
/* 75 */
|
||||
"using UCP is disabled by the application\0"
|
||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"syntax error or number too big in (?(VERSION condition\0"
|
||||
/* 80 */
|
||||
"internal error: unknown opcode in auto_possessify()\0"
|
||||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
/* 85 */
|
||||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
"internal error: unknown code in parsed pattern\0"
|
||||
/* 90 */
|
||||
"internal error: bad code value in parsed_skip()\0"
|
||||
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||
"invalid option bits with PCRE2_LITERAL\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const unsigned char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated byte with 0x80 bit set\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
"patterns do not all use the same character tables\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
"invalid replacement string\0"
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
/* 40 */
|
||||
"backreference condition or recursion test is not supported for DFA matching\0"
|
||||
"function is not supported for DFA matching\0"
|
||||
"pattern contains an item that is not supported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
/* 45 */
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown substring\0"
|
||||
/* 50 */
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"matching depth limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
/* 55 */
|
||||
"requested value is not set\0"
|
||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||
"bad escape sequence in replacement string\0"
|
||||
"expected closing curly bracket in replacement string\0"
|
||||
"bad substitution in replacement string\0"
|
||||
/* 60 */
|
||||
"match with end before start is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer in code units
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size)
|
||||
{
|
||||
const unsigned char *message;
|
||||
PCRE2_SIZE i;
|
||||
int n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else if (enumber < 0) /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
else /* Invalid error number */
|
||||
{
|
||||
message = (unsigned char *)"\0"; /* Empty message list */
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NUL) {};
|
||||
if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
if (i >= size - 1)
|
||||
{
|
||||
buffer[i] = 0; /* Terminate partial message */
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
buffer[i] = 0;
|
||||
return (int)i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
||||
218
ext/pcre/pcre2lib/pcre2_find_bracket.c
Normal file
218
ext/pcre/pcre2lib/pcre2_find_bracket.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains a single function that scans through a compiled pattern
|
||||
until it finds a capturing bracket with the given number, or, if the number is
|
||||
negative, an instance of OP_REVERSE for a lookbehind. The function is called
|
||||
from pcre2_compile.c and also from pcre2_study.c when finding the minimum
|
||||
matching length. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf TRUE in UTF mode
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
PCRE2_SPTR
|
||||
PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
||||
if (c == OP_END) return NULL;
|
||||
|
||||
/* XCLASS is used for classes that cannot be represented just by a bit map.
|
||||
This includes negated single high-valued characters. CALLOUT_STR is used for
|
||||
callouts with string arguments. In both cases the length in the table is
|
||||
zero; the actual length is stored in the compiled code. */
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
|
||||
|
||||
/* Handle lookbehind */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA || c == OP_SCBRA ||
|
||||
c == OP_CBRAPOS || c == OP_SCBRAPOS)
|
||||
{
|
||||
int n = (int)GET2(code, 1+LINK_SIZE);
|
||||
if (n == number) return (PCRE2_UCHAR *)code;
|
||||
code += PRIV(OP_lengths)[c];
|
||||
}
|
||||
|
||||
/* Otherwise, we can get the item's length from the table, except that for
|
||||
repeated character types, we have to test for \p and \P, which have an extra
|
||||
two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
|
||||
must add in its length. */
|
||||
|
||||
else
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
|
||||
code += 2;
|
||||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
code += code[1];
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
followed by a multi-byte character. The length in the table is a minimum, so
|
||||
we have to arrange to skip the extra bytes. */
|
||||
|
||||
#ifdef MAYBE_UTF_MULTI
|
||||
if (utf) switch(c)
|
||||
{
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_EXACT:
|
||||
case OP_EXACTI:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_UPTO:
|
||||
case OP_UPTOI:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_MINUPTO:
|
||||
case OP_MINUPTOI:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_POSUPTO:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTPOSUPTOI:
|
||||
case OP_STAR:
|
||||
case OP_STARI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTSTARI:
|
||||
case OP_MINSTAR:
|
||||
case OP_MINSTARI:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSSTARI:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_PLUS:
|
||||
case OP_PLUSI:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_MINPLUS:
|
||||
case OP_MINPLUSI:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSPLUSI:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_QUERY:
|
||||
case OP_QUERYI:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_MINQUERY:
|
||||
case OP_MINQUERYI:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSQUERYI:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSQUERYI:
|
||||
if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
|
||||
break;
|
||||
}
|
||||
#else
|
||||
(void)(utf); /* Keep compiler happy by referencing function argument */
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_find_bracket.c */
|
||||
File diff suppressed because it is too large
Load Diff
896
ext/pcre/pcre2lib/pcre2_intmodedep.h
Normal file
896
ext/pcre/pcre2lib/pcre2_intmodedep.h
Normal file
@@ -0,0 +1,896 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef BYTES2CU
|
||||
#undef CHMAX_255
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef FORWARDCHARTEST
|
||||
#undef GET
|
||||
#undef GET2
|
||||
#undef GETCHAR
|
||||
#undef GETCHARINC
|
||||
#undef GETCHARINCTEST
|
||||
#undef GETCHARLEN
|
||||
#undef GETCHARLENTEST
|
||||
#undef GETCHARTEST
|
||||
#undef GET_EXTRALEN
|
||||
#undef HAS_EXTRALEN
|
||||
#undef IMM2_SIZE
|
||||
#undef MAX_255
|
||||
#undef MAX_MARK
|
||||
#undef MAX_PATTERN_SIZE
|
||||
#undef MAX_UTF_SINGLE_CU
|
||||
#undef NOT_FIRSTCU
|
||||
#undef PUT
|
||||
#undef PUT2
|
||||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
#undef TABLE_GET
|
||||
|
||||
|
||||
|
||||
/* -------------------------- MACROS ----------------------------- */
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as at least 16-bit quantities
|
||||
(always stored in big-endian order in 8-bit mode) by default. These are used,
|
||||
for example, to link from the start of a subpattern to its alternatives and its
|
||||
end. The use of 16 bits per offset limits the size of an 8-bit compiled regex
|
||||
to around 64K, which is big enough for almost everybody. However, I received a
|
||||
request for an even bigger limit. For this reason, and also to make the code
|
||||
easier to maintain, the storing and loading of offsets from the compiled code
|
||||
unit string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 3 or 4 are also supported. */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 24)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+2] = (PCRE2_UCHAR)((d) >> 8)), \
|
||||
(a[(n)+3] = (PCRE2_UCHAR)((d) & 255))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)(d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
#elif LINK_SIZE == 3 || LINK_SIZE == 4
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (PCRE2_UCHAR)((d) >> 16)), \
|
||||
(a[(n)+1] = (PCRE2_UCHAR)((d) & 65535))
|
||||
#define GET(a,n) \
|
||||
(unsigned int)(((a)[n] << 16) | (a)[(n)+1])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#undef LINK_SIZE
|
||||
#define LINK_SIZE 1
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d))
|
||||
#define GET(a,n) \
|
||||
(a[n])
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
#else
|
||||
#error Unsupported compiling mode
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
|
||||
Define the number of code units required to hold a 16-bit count/offset, and
|
||||
macros to load and store such a value. For reasons that I do not understand,
|
||||
the expression in the 8-bit GET2 macro is treated by gcc as a signed
|
||||
expression, even when a is declared as unsigned. It seems that any kind of
|
||||
arithmetic results in a signed value. Hence the cast. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define IMM2_SIZE 2
|
||||
#define GET2(a,n) (unsigned int)(((a)[n] << 8) | (a)[(n)+1])
|
||||
#define PUT2(a,n,d) a[n] = (d) >> 8, a[(n)+1] = (d) & 255
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
#define GET2(a,n) a[n]
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||
items. When code points can be greater than 255, a check is needed before
|
||||
accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#else
|
||||
#define CHMAX_255(c) TRUE
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* ----------------- Character-handling macros ----------------- */
|
||||
|
||||
/* There is a proposed future special "UTF-21" mode, in which only the lowest
|
||||
21 bits of a 32-bit character are interpreted as UTF, with the remaining 11
|
||||
high-order bits available to the application for other uses. In preparation for
|
||||
the future implementation of this mode, there are macros that load a data item
|
||||
and, if in this special mode, mask it to 21 bits. These macros all have names
|
||||
starting with UCHAR21. In all other modes, including the normal 32-bit
|
||||
library, the macros all have the same simple definitions. When the new mode is
|
||||
implemented, it is expected that these definitions will be varied appropriately
|
||||
using #ifdef when compiling the library that supports the special mode. */
|
||||
|
||||
#define UCHAR21(eptr) (*(eptr))
|
||||
#define UCHAR21TEST(eptr) (*(eptr))
|
||||
#define UCHAR21INC(eptr) (*(eptr)++)
|
||||
#define UCHAR21INCTEST(eptr) (*(eptr)++)
|
||||
|
||||
/* When UTF encoding is being used, a character is no longer just a single
|
||||
byte in 8-bit mode or a single short in 16-bit mode. The macros for character
|
||||
handling generate simple sequences when used in the basic mode, and more
|
||||
complicated ones for UTF characters. GETCHARLENTEST and other macros are not
|
||||
used when UTF is not supported. To make sure they can never even appear when
|
||||
UTF support is omitted, we don't even define them. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
||||
/* #define MAX_UTF_SINGLE_CU */
|
||||
/* #define HAS_EXTRALEN(c) */
|
||||
/* #define GET_EXTRALEN(c) */
|
||||
/* #define NOT_FIRSTCU(c) */
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
/* #define GETCHARLENTEST(c, eptr, len) */
|
||||
/* #define BACKCHAR(eptr) */
|
||||
/* #define FORWARDCHAR(eptr) */
|
||||
/* #define FORWARCCHARTEST(eptr,end) */
|
||||
/* #define ACROSSCHAR(condition, eptr, action) */
|
||||
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 127
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) HASUTF8EXTRALEN(c)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3fu])
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xc0u) == 0x80u)
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && c >= 0xc0u) GETUTF8INC(c, eptr);
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
|
||||
pointer, incrementing length if there are extra bytes. This is called when we
|
||||
do not know if we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && c >= 0xc0u) GETUTF8LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-8 mode - we don't put a test within the macro
|
||||
because almost all calls are already within a block of UTF-8 only code. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((eptr) & 0xc0u) == 0x80u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 16-bit support ------------------ */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#define MAYBE_UTF_MULTI /* UTF chars may use multiple code units */
|
||||
|
||||
/* The largest UTF code point that can be encoded as a single code unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU 65535
|
||||
|
||||
/* Tests whether the code point needs extra characters to decode. */
|
||||
|
||||
#define HAS_EXTRALEN(c) (((c) & 0xfc00u) == 0xd800u)
|
||||
|
||||
/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
|
||||
Otherwise it has an undefined behaviour. */
|
||||
|
||||
#define GET_EXTRALEN(c) 1
|
||||
|
||||
/* Returns TRUE, if the given value is not the first code unit of a UTF
|
||||
sequence. */
|
||||
|
||||
#define NOT_FIRSTCU(c) (((c) & 0xfc00u) == 0xdc00u)
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer. */
|
||||
|
||||
#define GETUTF16(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF16INC(c, eptr) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (*eptr++ & 0x3ffu)) + 0x10000u; }
|
||||
|
||||
/* Get the next UTF-16 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16INC(c, eptr);
|
||||
|
||||
/* Base macro to pick up the low surrogate of a UTF-16 character, not
|
||||
advancing the pointer, incrementing the length. */
|
||||
|
||||
#define GETUTF16LEN(c, eptr, len) \
|
||||
{ c = (((c & 0x3ffu) << 10) | (eptr[1] & 0x3ffu)) + 0x10000u; len++; }
|
||||
|
||||
/* Get the next UTF-16 character, not advancing the pointer, incrementing
|
||||
length if there is a low surrogate. This is called when we know we are in
|
||||
UTF-16 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
|
||||
pointer, incrementing length if there is a low surrogate. This is called when
|
||||
we do not know if we are in UTF-16 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if (utf && (c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-16 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-16 only
|
||||
code. */
|
||||
|
||||
#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
if ((condition) && ((eptr) & 0xfc00u) == 0xdc00u) action
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
PRIV(ord2utf)(c,p) : (*p = c, 1))
|
||||
|
||||
|
||||
/* ------------------- 32-bit support ------------------ */
|
||||
|
||||
#else
|
||||
|
||||
/* These are trivial for the 32-bit library, since all UTF-32 characters fit
|
||||
into one PCRE2_UCHAR unit. */
|
||||
|
||||
#define MAX_UTF_SINGLE_CU (0x10ffffu)
|
||||
#define HAS_EXTRALEN(c) (0)
|
||||
#define GET_EXTRALEN(c) (0)
|
||||
#define NOT_FIRSTCU(c) (0)
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, testing for UTF-32 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *(eptr);
|
||||
|
||||
/* Get the next UTF-32 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next character, testing for UTF-32 mode, and advancing the pointer.
|
||||
This is called when we don't know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *((eptr)++);
|
||||
|
||||
/* Get the next UTF-32 character, not advancing the pointer, not incrementing
|
||||
length (since all UTF-32 is of length 1). This is called when we know we are in
|
||||
UTF-32 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
GETCHAR(c, eptr)
|
||||
|
||||
/* Get the next UTF-32character, testing for UTF-32 mode, not advancing the
|
||||
pointer, not incrementing the length (since all UTF-32 is of length 1).
|
||||
This is called when we do not know if we are in UTF-32 mode. */
|
||||
|
||||
#define GETCHARLENTEST(c, eptr, len) \
|
||||
GETCHARTEST(c, eptr)
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. This is called only in UTF-32 mode - we don't put a test within the
|
||||
macro because almost all calls are already within a block of UTF-32 only
|
||||
code.
|
||||
|
||||
These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
||||
|
||||
#define BACKCHAR(eptr) do { } while (0)
|
||||
|
||||
/* Same as above, just in the other direction. */
|
||||
|
||||
#define FORWARDCHAR(eptr) do { } while (0)
|
||||
#define FORWARDCHARTEST(eptr,end) do { } while (0)
|
||||
|
||||
/* Same as above, but it allows a fully customizable form. */
|
||||
|
||||
#define ACROSSCHAR(condition, eptr, action) do { } while (0)
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) (*p = c, 1)
|
||||
|
||||
#endif /* UTF-32 character handling */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
||||
/* Mode-dependent macros that have the same definition in all modes. */
|
||||
|
||||
#define CU2BYTES(x) ((x)*((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define BYTES2CU(x) ((x)/((PCRE2_CODE_UNIT_WIDTH/8)))
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
|
||||
|
||||
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
code that uses them is simpler because it assumes this. */
|
||||
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
memory control. */
|
||||
|
||||
typedef struct pcre2_real_general_context {
|
||||
pcre2_memctl memctl;
|
||||
} pcre2_real_general_context;
|
||||
|
||||
/* The real compile context structure */
|
||||
|
||||
typedef struct pcre2_real_compile_context {
|
||||
pcre2_memctl memctl;
|
||||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
uint32_t extra_options;
|
||||
} pcre2_real_compile_context;
|
||||
|
||||
/* The real match context structure. */
|
||||
|
||||
typedef struct pcre2_real_match_context {
|
||||
pcre2_memctl memctl;
|
||||
#ifdef SUPPORT_JIT
|
||||
pcre2_jit_callback jit_callback;
|
||||
void *jit_callback_data;
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t depth_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
||||
/* The real convert context structure. */
|
||||
|
||||
typedef struct pcre2_real_convert_context {
|
||||
pcre2_memctl memctl;
|
||||
uint32_t glob_separator;
|
||||
uint32_t glob_escape;
|
||||
} pcre2_real_convert_context;
|
||||
|
||||
/* The real compiled code structure. The type for the blocksize field is
|
||||
defined specially because it is required in pcre2_serialize_decode() when
|
||||
copying the size from possibly unaligned memory into a variable of the same
|
||||
type. Use a macro rather than a typedef to avoid compiler warnings when this
|
||||
file is included multiple times by pcre2test. LOOKBEHIND_MAX specifies the
|
||||
largest lookbehind that is supported. (OP_REVERSE in a pattern has a 16-bit
|
||||
argument in 8-bit and 16-bit modes, so we need no more than a 16-bit field
|
||||
here.) */
|
||||
|
||||
#undef CODE_BLOCKSIZE_TYPE
|
||||
#define CODE_BLOCKSIZE_TYPE size_t
|
||||
|
||||
#undef LOOKBEHIND_MAX
|
||||
#define LOOKBEHIND_MAX UINT16_MAX
|
||||
|
||||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
CODE_BLOCKSIZE_TYPE blocksize; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_depth; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||
uint16_t bsr_convention; /* What \R matches */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t top_backref; /* Highest numbered back reference */
|
||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||
uint16_t name_count; /* Number of name entries in the table */
|
||||
} pcre2_real_code;
|
||||
|
||||
/* The real match data structure. Define ovector large so that array bound
|
||||
checkers don't grumble. Memory for this structure is obtained by calling
|
||||
pcre2_match_data_create(), which sets the size as the offset of ovector plus
|
||||
pairs of elements for each capturing group. (See also the heapframe structure
|
||||
below.) */
|
||||
|
||||
typedef struct pcre2_real_match_data {
|
||||
pcre2_memctl memctl;
|
||||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE ovector[10000];/* The first field */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
/* ----------------------- PRIVATE STRUCTURES ----------------------------- */
|
||||
|
||||
/* These structures are not needed for pcre2test. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structures for checking for mutual recursion when scanning compiled or
|
||||
parsed code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
typedef struct parsed_recurse_check {
|
||||
struct parsed_recurse_check *prev;
|
||||
uint32_t *groupptr;
|
||||
} parsed_recurse_check;
|
||||
|
||||
/* Structure for building a cache when filling in recursion offsets. */
|
||||
|
||||
typedef struct recurse_cache {
|
||||
PCRE2_SPTR group;
|
||||
int groupnumber;
|
||||
} recurse_cache;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
PCRE2_UCHAR *current_branch;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
uint32_t number; /* Group number */
|
||||
uint16_t length; /* Length of the name */
|
||||
uint16_t isdup; /* TRUE if a duplicate */
|
||||
} named_group;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_block {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *cbits; /* Points to character type table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SPTR start_workspace; /* The start of working space */
|
||||
PCRE2_SPTR start_code; /* The start of the compiled code */
|
||||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
PCRE2_SIZE workspace_size; /* Size of workspace */
|
||||
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
||||
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
uint16_t name_entry_size; /* Size of each entry */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parentheses */
|
||||
uint32_t lastcapture; /* Last capture encountered */
|
||||
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
||||
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
||||
uint32_t *groupinfo; /* Group info vector */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
uint32_t class_range_start; /* Overall class range start */
|
||||
uint32_t class_range_end; /* Overall class range end */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL had_recurse; /* Had a recursion or subroutine call */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
} compile_block;
|
||||
|
||||
/* Structure for keeping the properties of the in-memory stack used
|
||||
by the JIT matcher. */
|
||||
|
||||
typedef struct pcre2_real_jit_stack {
|
||||
pcre2_memctl memctl;
|
||||
void* stack;
|
||||
} pcre2_real_jit_stack;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern when running pcre_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
PCRE2_SPTR subject_position;
|
||||
uint32_t group_num;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for "stack" frames that are used for remembering backtracking
|
||||
positions during matching. As these are used in a vector, with the ovector item
|
||||
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
|
||||
only way to check this at compile time is to force an error by generating an
|
||||
array with a negative size. By putting this in a typedef (which is never used),
|
||||
we don't generate any code when all is well. */
|
||||
|
||||
typedef struct heapframe {
|
||||
|
||||
/* The first set of fields are variables that have to be preserved over calls
|
||||
to RRMATCH(), but which do not need to be copied to new frames. */
|
||||
|
||||
PCRE2_SPTR ecode; /* The current position in the pattern */
|
||||
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
|
||||
PCRE2_SIZE length; /* Used for character, string, or code lengths */
|
||||
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
|
||||
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
|
||||
uint32_t rdepth; /* "Recursion" depth */
|
||||
uint32_t group_frame_type; /* Type information for group frames */
|
||||
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
|
||||
uint8_t return_id; /* Where to go on in internal "return" */
|
||||
uint8_t op; /* Processing opcode */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||
#else
|
||||
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||
#endif
|
||||
|
||||
/* The rest have to be copied from the previous frame whenever a new frame
|
||||
becomes current. The final field is specified as a large vector so that
|
||||
runtime array bound checks don't catch references to it. However, for any
|
||||
specific call to pcre2_match() the memory allocated for each frame structure
|
||||
allows for exactly the right size ovector for the number of capturing
|
||||
parentheses. */
|
||||
|
||||
PCRE2_SPTR eptr; /* MUST BE FIRST */
|
||||
PCRE2_SPTR start_match; /* Can be adjusted by \K */
|
||||
PCRE2_SPTR mark; /* Most recent mark on the success path */
|
||||
uint32_t current_recurse; /* Current (deepest) recursion number */
|
||||
uint32_t capture_last; /* Most recent capture */
|
||||
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
|
||||
PCRE2_SIZE offset_top; /* Offset after highest capture */
|
||||
PCRE2_SIZE ovector[10000]; /* Must be last in the structure */
|
||||
} heapframe;
|
||||
|
||||
typedef char check_heapframe_size[
|
||||
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
|
||||
heapframe *match_frames; /* Points to vector of frames */
|
||||
heapframe *match_frames_top; /* Points after the end of the vector */
|
||||
heapframe *stack_frames; /* The original vector on the stack */
|
||||
PCRE2_SIZE heap_limit; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
uint16_t name_entry_size; /* Size of entry in names table */
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
|
||||
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
|
||||
uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of calls of internal function */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
/* End of pcre2_intmodedep.h */
|
||||
File diff suppressed because it is too large
Load Diff
189
ext/pcre/pcre2lib/pcre2_jit_match.c
Normal file
189
ext/pcre/pcre2lib/pcre2_jit_match.c
Normal file
@@ -0,0 +1,189 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
|
||||
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func)
|
||||
{
|
||||
sljit_u8 local_space[MACHINE_STACK_SIZE];
|
||||
struct sljit_stack local_stack;
|
||||
|
||||
local_stack.max_limit = local_space;
|
||||
local_stack.limit = local_space;
|
||||
local_stack.base = local_space + MACHINE_STACK_SIZE;
|
||||
local_stack.top = local_space + MACHINE_STACK_SIZE;
|
||||
arguments->stack = &local_stack;
|
||||
return executable_func(arguments);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Do a JIT pattern match *
|
||||
*************************************************/
|
||||
|
||||
/* This function runs a JIT pattern match.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
jit_stack points to a JIT stack
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
-1 => failed to match (PCRE_ERROR_NOMATCH)
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)subject;
|
||||
(void)length;
|
||||
(void)start_offset;
|
||||
(void)options;
|
||||
(void)match_data;
|
||||
(void)mcontext;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
executable_functions *functions = (executable_functions *)re->executable_jit;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
uint32_t oveccount = match_data->oveccount;
|
||||
uint32_t max_oveccount;
|
||||
union {
|
||||
void *executable_func;
|
||||
jit_function call_executable_func;
|
||||
} convert_executable_func;
|
||||
jit_arguments arguments;
|
||||
int rc;
|
||||
int index = 0;
|
||||
|
||||
if ((options & PCRE2_PARTIAL_HARD) != 0)
|
||||
index = 2;
|
||||
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
|
||||
index = 1;
|
||||
|
||||
if (functions->executable_funcs[index] == NULL)
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
/* Sanity checks should be handled by pcre_exec. */
|
||||
arguments.str = subject + start_offset;
|
||||
arguments.begin = subject;
|
||||
arguments.end = subject + length;
|
||||
arguments.match_data = match_data;
|
||||
arguments.startchar_ptr = subject;
|
||||
arguments.mark_ptr = NULL;
|
||||
arguments.options = options;
|
||||
|
||||
if (mcontext != NULL)
|
||||
{
|
||||
arguments.callout = mcontext->callout;
|
||||
arguments.callout_data = mcontext->callout_data;
|
||||
arguments.offset_limit = mcontext->offset_limit;
|
||||
arguments.limit_match = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
if (mcontext->jit_callback != NULL)
|
||||
jit_stack = mcontext->jit_callback(mcontext->jit_callback_data);
|
||||
else
|
||||
jit_stack = (pcre2_jit_stack *)mcontext->jit_callback_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
arguments.callout = NULL;
|
||||
arguments.callout_data = NULL;
|
||||
arguments.offset_limit = PCRE2_UNSET;
|
||||
arguments.limit_match = (MATCH_LIMIT < re->limit_match)?
|
||||
MATCH_LIMIT : re->limit_match;
|
||||
jit_stack = NULL;
|
||||
}
|
||||
|
||||
/* JIT only need two offsets for each ovector entry. Hence
|
||||
the last 1/3 of the ovector will never be touched. */
|
||||
|
||||
max_oveccount = functions->top_bracket;
|
||||
if (oveccount > max_oveccount)
|
||||
oveccount = max_oveccount;
|
||||
arguments.oveccount = oveccount << 1;
|
||||
|
||||
|
||||
convert_executable_func.executable_func = functions->executable_funcs[index];
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
arguments.stack = (struct sljit_stack *)(jit_stack->stack);
|
||||
rc = convert_executable_func.call_executable_func(&arguments);
|
||||
}
|
||||
else
|
||||
rc = jit_machine_stack_exec(&arguments, convert_executable_func.call_executable_func);
|
||||
|
||||
if (rc > (int)oveccount)
|
||||
rc = 0;
|
||||
match_data->code = re;
|
||||
match_data->subject = subject;
|
||||
match_data->rc = rc;
|
||||
match_data->startchar = arguments.startchar_ptr - subject;
|
||||
match_data->leftchar = 0;
|
||||
match_data->rightchar = 0;
|
||||
match_data->mark = arguments.mark_ptr;
|
||||
match_data->matchedby = PCRE2_MATCHEDBY_JIT;
|
||||
|
||||
return match_data->rc;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_match.c */
|
||||
227
ext/pcre/pcre2lib/pcre2_jit_misc.c
Normal file
227
ext/pcre/pcre2lib/pcre2_jit_misc.c
Normal file
@@ -0,0 +1,227 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_FROM_PCRE2_JIT_COMPILE
|
||||
#error This file must be included from pcre2_jit_compile.c.
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free JIT read-only data *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free_rodata)(void *current, void *allocator_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)current;
|
||||
(void)allocator_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
void *next;
|
||||
|
||||
SLJIT_UNUSED_ARG(allocator_data);
|
||||
|
||||
while (current != NULL)
|
||||
{
|
||||
next = *(void**)current;
|
||||
SLJIT_FREE(current, allocator_data);
|
||||
current = next;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
/*************************************************
|
||||
* Free JIT compiled code *
|
||||
*************************************************/
|
||||
|
||||
void
|
||||
PRIV(jit_free)(void *executable_jit, pcre2_memctl *memctl)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
(void)memctl;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
executable_functions *functions = (executable_functions *)executable_jit;
|
||||
void *allocator_data = memctl;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++)
|
||||
{
|
||||
if (functions->executable_funcs[i] != NULL)
|
||||
sljit_free_code(functions->executable_funcs[i]);
|
||||
PRIV(jit_free_rodata)(functions->read_only_data_heads[i], allocator_data);
|
||||
}
|
||||
|
||||
SLJIT_FREE(functions, allocator_data);
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free unused JIT memory *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)gcontext; /* Suppress warning */
|
||||
#else /* SUPPORT_JIT */
|
||||
SLJIT_UNUSED_ARG(gcontext);
|
||||
sljit_free_unused_memory_exec();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Allocate a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_create(size_t startsize, size_t maxsize,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext;
|
||||
(void)startsize;
|
||||
(void)maxsize;
|
||||
return NULL;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
if (startsize < 1 || maxsize < 1)
|
||||
return NULL;
|
||||
if (startsize > maxsize)
|
||||
startsize = maxsize;
|
||||
startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
|
||||
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
|
||||
|
||||
jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
|
||||
if (jit_stack == NULL) return NULL;
|
||||
jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl);
|
||||
return jit_stack;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Assign a JIT stack to a pattern *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_assign(pcre2_match_context *mcontext, pcre2_jit_callback callback,
|
||||
void *callback_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)mcontext;
|
||||
(void)callback;
|
||||
(void)callback_data;
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
if (mcontext == NULL) return;
|
||||
mcontext->jit_callback = callback;
|
||||
mcontext->jit_callback_data = callback_data;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)jit_stack;
|
||||
#else /* SUPPORT_JIT */
|
||||
if (jit_stack != NULL)
|
||||
{
|
||||
sljit_free_stack((struct sljit_stack *)(jit_stack->stack), &jit_stack->memctl);
|
||||
jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data);
|
||||
}
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get target CPU type *
|
||||
*************************************************/
|
||||
|
||||
const char*
|
||||
PRIV(jit_get_target)(void)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
return "JIT is not supported";
|
||||
#else /* SUPPORT_JIT */
|
||||
return sljit_get_platform_name();
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get size of JIT code *
|
||||
*************************************************/
|
||||
|
||||
size_t
|
||||
PRIV(jit_get_size)(void *executable_jit)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
(void)executable_jit;
|
||||
return 0;
|
||||
#else /* SUPPORT_JIT */
|
||||
sljit_uw *executable_sizes = ((executable_functions *)executable_jit)->executable_sizes;
|
||||
SLJIT_COMPILE_ASSERT(JIT_NUMBER_OF_COMPILE_MODES == 3, number_of_compile_modes_changed);
|
||||
return executable_sizes[0] + executable_sizes[1] + executable_sizes[2];
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_misc.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,53 +39,53 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_maketables(), which builds
|
||||
character tables for PCRE in the current locale. The file is compiled on its
|
||||
own as part of the PCRE library. However, it is also included in the
|
||||
/* This module contains the external function pcre2_maketables(), which builds
|
||||
character tables for PCRE2 in the current locale. The file is compiled on its
|
||||
own as part of the PCRE2 library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
# ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
# endif
|
||||
# include "pcre_internal.h"
|
||||
# include "pcre2_internal.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE character tables *
|
||||
* Create PCRE2 character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE and returns
|
||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via PUBL(malloc)(), but when
|
||||
compiled inside dftables, use malloc().
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none
|
||||
Arguments: none when DFTABLES is defined
|
||||
else a PCRE2 general context or NULL
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
const unsigned char *
|
||||
pcre_maketables(void)
|
||||
#elif defined COMPILE_PCRE16
|
||||
const unsigned char *
|
||||
pcre16_maketables(void)
|
||||
#elif defined COMPILE_PCRE32
|
||||
const unsigned char *
|
||||
pcre32_maketables(void)
|
||||
#endif
|
||||
#ifdef DFTABLES /* Included in freestanding dftables.c program */
|
||||
static const uint8_t *maketables(void)
|
||||
{
|
||||
unsigned char *yield, *p;
|
||||
int i;
|
||||
uint8_t *yield = (uint8_t *)malloc(tables_length);
|
||||
|
||||
#ifndef DFTABLES
|
||||
yield = (unsigned char*)(PUBL(malloc))(tables_length);
|
||||
#else
|
||||
yield = (unsigned char*)malloc(tables_length);
|
||||
#endif
|
||||
#else /* Not DFTABLES, compiling the library */
|
||||
PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION
|
||||
pcre2_maketables(pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *yield = (uint8_t *)((gcontext != NULL)?
|
||||
gcontext->memctl.malloc(tables_length, gcontext->memctl.memory_data) :
|
||||
malloc(tables_length));
|
||||
#endif /* DFTABLES */
|
||||
|
||||
int i;
|
||||
uint8_t *p;
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
@@ -153,4 +154,4 @@ for (i = 0; i < 256; i++)
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcre_maketables.c */
|
||||
/* End of pcre2_maketables.c */
|
||||
7006
ext/pcre/pcre2lib/pcre2_match.c
Normal file
7006
ext/pcre/pcre2lib/pcre2_match.c
Normal file
File diff suppressed because it is too large
Load Diff
147
ext/pcre/pcre2lib/pcre2_match_data.c
Normal file
147
ext/pcre/pcre2lib/pcre2_match_data.c
Normal file
@@ -0,0 +1,147 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector pairs. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create a match data block using pattern data *
|
||||
*************************************************/
|
||||
|
||||
/* If no context is supplied, use the memory allocator from the code. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
|
||||
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free a match data block *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get last mark in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SPTR PCRE2_CALL_CONVENTION
|
||||
pcre2_get_mark(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->mark;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get number of ovector slots *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN uint32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->oveccount;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
}
|
||||
|
||||
/* End of pcre2_match_data.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
referring to a different set of newline characters. At present, PCRE2 supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
@@ -51,7 +52,7 @@ http://unicode.org/unicode/reports/tr18/. */
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
@@ -59,8 +60,10 @@ http://unicode.org/unicode/reports/tr18/. */
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
@@ -73,28 +76,30 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
|
||||
BOOL utf)
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
pcre_uint32 c;
|
||||
(void)utf;
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf)
|
||||
{
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
uint32_t c;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF: *lenptr = 1; return TRUE;
|
||||
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
default: return FALSE;
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
@@ -106,25 +111,36 @@ else switch(c)
|
||||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF: *lenptr = 1; return TRUE;
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#ifdef COMPILE_PCRE8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
#endif /* Not EBCDIC */
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default: return FALSE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,8 +150,10 @@ else switch(c)
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
@@ -148,23 +166,23 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
|
||||
BOOL utf)
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
pcre_uint32 c;
|
||||
(void)utf;
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UTF */
|
||||
c = *ptr;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
@@ -172,8 +190,12 @@ if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
default: return FALSE;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
@@ -189,22 +211,33 @@ else switch(c)
|
||||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#ifdef COMPILE_PCRE8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
#else /* COMPILE_PCRE16 || COMPILE_PCRE32 */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
#endif /* COMPILE_PCRE8 */
|
||||
#endif /* NotEBCDIC */
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default: return FALSE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
||||
/* End of pcre2_newline.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,39 +39,51 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
/* This file contains a function that converts a Unicode character code point
|
||||
into a UTF string. The behaviour is different for each code unit width. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define COMPILE_PCRE8
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* If SUPPORT_UNICODE is not defined, this function will never be called.
|
||||
Supply a dummy function because some compilers do not like empty source
|
||||
modules. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0;
|
||||
}
|
||||
#else /* SUPPORT_UNICODE */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
* Convert code point to UTF *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x10ffff
|
||||
and encodes it as a UTF-8 character in 1 to 4 pcre_uchars.
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 pcre_uchars long
|
||||
buffer pointer to buffer for result
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
Returns: number of code units placed in the buffer
|
||||
*/
|
||||
|
||||
unsigned
|
||||
int
|
||||
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
|
||||
unsigned int
|
||||
PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
register int i, j;
|
||||
/* Convert to UTF-8 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int i, j;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
|
||||
buffer += i;
|
||||
@@ -82,13 +95,26 @@ for (j = i; j > 0; j--)
|
||||
*buffer = PRIV(utf8_table2)[i] | cvalue;
|
||||
return i + 1;
|
||||
|
||||
/* Convert to UTF-16 */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (cvalue <= 0xffff)
|
||||
{
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
}
|
||||
cvalue -= 0x10000;
|
||||
*buffer++ = 0xd800 | (cvalue >> 10);
|
||||
*buffer = 0xdc00 | (cvalue & 0x3ff);
|
||||
return 2;
|
||||
|
||||
/* Convert to UTF-32 */
|
||||
|
||||
#else
|
||||
|
||||
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||
(void)(buffer); /* called when SUPPORT_UTF is not defined. */
|
||||
return 0;
|
||||
|
||||
*buffer = (PCRE2_UCHAR)cvalue;
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
/* End of pcre_ord2utf.c */
|
||||
426
ext/pcre/pcre2lib/pcre2_pattern_info.c
Normal file
426
ext/pcre/pcre2lib/pcre2_pattern_info.c
Normal file
@@ -0,0 +1,426 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
what what information is required
|
||||
where where to put the information; if NULL, return length
|
||||
|
||||
Returns: 0 when data returned
|
||||
> 0 when length requested
|
||||
< 0 on error or unset value
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
{
|
||||
const pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
|
||||
if (where == NULL) /* Requests field length */
|
||||
{
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
case PCRE2_INFO_BSR:
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
switch(what)
|
||||
{
|
||||
case PCRE2_INFO_ALLOPTIONS:
|
||||
*((uint32_t *)where) = re->overall_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_ARGOPTIONS:
|
||||
*((uint32_t *)where) = re->compile_options;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BACKREFMAX:
|
||||
*((uint32_t *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_BSR:
|
||||
*((uint32_t *)where) = re->bsr_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_CAPTURECOUNT:
|
||||
*((uint32_t *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_DEPTHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_depth;
|
||||
if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
|
||||
((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
|
||||
re->first_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
*((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
|
||||
&(re->start_bitmap[0]) : NULL;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_FRAMESIZE:
|
||||
*((size_t *)where) = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) = (re->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(re->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
*((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
|
||||
re->last_codeunit : 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHEMPTY:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MAXLOOKBEHIND:
|
||||
*((uint32_t *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_MINLENGTH:
|
||||
*((uint32_t *)where) = re->minlength;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMEENTRYSIZE:
|
||||
*((uint32_t *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMECOUNT:
|
||||
*((uint32_t *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
*((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
*((uint32_t *)where) = re->newline_convention;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_SIZE:
|
||||
*((size_t *)where) = re->blocksize;
|
||||
break;
|
||||
|
||||
default: return PCRE2_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout enumerator *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
code points to compiled code
|
||||
callback function called for each callout block
|
||||
callout_data user data passed to the callback
|
||||
|
||||
Returns: 0 when successfully completed
|
||||
< 0 on local error
|
||||
!= 0 for callback error
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_callout_enumerate(const pcre2_code *code,
|
||||
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||
{
|
||||
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||
pcre2_callout_enumerate_block cb;
|
||||
PCRE2_SPTR cc;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf;
|
||||
#endif
|
||||
|
||||
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
cb.version = 0;
|
||||
cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
|
||||
+ re->name_count * re->name_entry_size;
|
||||
|
||||
while (TRUE)
|
||||
{
|
||||
int rc;
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_END:
|
||||
return 0;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_EXACT:
|
||||
case OP_POSSTAR:
|
||||
case OP_POSPLUS:
|
||||
case OP_POSQUERY:
|
||||
case OP_POSUPTO:
|
||||
case OP_STARI:
|
||||
case OP_MINSTARI:
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_EXACTI:
|
||||
case OP_POSSTARI:
|
||||
case OP_POSPLUSI:
|
||||
case OP_POSQUERYI:
|
||||
case OP_POSUPTOI:
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTPOSQUERY:
|
||||
case OP_NOTPOSUPTO:
|
||||
case OP_NOTSTARI:
|
||||
case OP_NOTMINSTARI:
|
||||
case OP_NOTPLUSI:
|
||||
case OP_NOTMINPLUSI:
|
||||
case OP_NOTQUERYI:
|
||||
case OP_NOTMINQUERYI:
|
||||
case OP_NOTUPTOI:
|
||||
case OP_NOTMINUPTOI:
|
||||
case OP_NOTEXACTI:
|
||||
case OP_NOTPOSSTARI:
|
||||
case OP_NOTPOSPLUSI:
|
||||
case OP_NOTPOSQUERYI:
|
||||
case OP_NOTPOSUPTOI:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEPOSQUERY:
|
||||
case OP_TYPEPOSUPTO:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||
#endif
|
||||
break;
|
||||
|
||||
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||
case OP_XCLASS:
|
||||
cc += GET(cc, 1);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||
cb.callout_string_offset = 0;
|
||||
cb.callout_string_length = 0;
|
||||
cb.callout_string = NULL;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
cb.pattern_position = GET(cc, 1);
|
||||
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||
cb.callout_number = 0;
|
||||
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||
cb.callout_string_length =
|
||||
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||
rc = callback(&cb, callout_data);
|
||||
if (rc != 0) return rc;
|
||||
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||
break;
|
||||
|
||||
default:
|
||||
cc += PRIV(OP_lengths)[*cc];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_pattern_info.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,104 +41,102 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is used in two places:
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. It can also optionally be included in
|
||||
pcre2_compile.c for detailed debugging in error situations. */
|
||||
|
||||
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
|
||||
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
|
||||
compiles. In this case PCRE_INCLUDED is defined.
|
||||
|
||||
(2) It is also compiled separately and linked with pcretest.c, which can be
|
||||
asked to print out a compiled regex for debugging purposes. */
|
||||
/* Tables of operator names. The same 8-bit table is used for all code unit
|
||||
widths, so it must be defined only once. The list itself is defined in
|
||||
pcre2_internal.h, which is #included by pcre2test before this file. */
|
||||
|
||||
#ifndef PCRE_INCLUDED
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#ifndef OP_LISTS_DEFINED
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
#define OP_LISTS_DEFINED
|
||||
#endif
|
||||
|
||||
/* For pcretest program. */
|
||||
#define PRIV(name) name
|
||||
/* The functions and tables herein must all have mode-dependent names. */
|
||||
|
||||
/* We have to include pcre_internal.h because we need the internal info for
|
||||
displaying the results of pcre_study() and we also need to know about the
|
||||
internal macros, structures, and other internal data values; pcretest has
|
||||
"inside information" compared to a program that strictly follows the PCRE API.
|
||||
#define OP_lengths PCRE2_SUFFIX(OP_lengths_)
|
||||
#define get_ucpname PCRE2_SUFFIX(get_ucpname_)
|
||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||
#define print_char PCRE2_SUFFIX(print_char_)
|
||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||
|
||||
Although pcre_internal.h does itself include pcre.h, we explicitly include it
|
||||
here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
|
||||
appropriately for an application, not for building PCRE. */
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
The contents of the table are, however, mode-dependent. */
|
||||
|
||||
#include "pcre.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
/* These are the funtions that are contained within. It doesn't seem worth
|
||||
having a separate .h file just for this. */
|
||||
|
||||
#endif /* PCRE_INCLUDED */
|
||||
|
||||
#ifdef PCRE_INCLUDED
|
||||
static /* Keep the following function as private. */
|
||||
#endif
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
|
||||
#elif defined COMPILE_PCRE16
|
||||
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
|
||||
#elif defined COMPILE_PCRE32
|
||||
void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
|
||||
#endif
|
||||
|
||||
/* Macro that decides whether a character should be output as a literal or in
|
||||
hexadecimal. We don't use isprint() because that can vary from system to system
|
||||
(even without the use of locales) and we want the output always to be the same,
|
||||
for testing purposes. */
|
||||
|
||||
#ifdef EBCDIC
|
||||
#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
|
||||
#else
|
||||
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
|
||||
#endif
|
||||
|
||||
/* The table of operator names. */
|
||||
|
||||
static const char *priv_OP_names[] = { OP_NAME_LIST };
|
||||
|
||||
/* This table of operator lengths is not actually used by the working code,
|
||||
but its size is needed for a check that ensures it is the correct size for the
|
||||
number of opcodes (thus catching update omissions). */
|
||||
|
||||
static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
|
||||
static const uint8_t OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print single- or multi-byte character *
|
||||
* Print one character from a string *
|
||||
*************************************************/
|
||||
|
||||
/* In UTF mode the character may occupy more than one code unit.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
{
|
||||
pcre_uint32 c = *ptr;
|
||||
uint32_t c = *ptr;
|
||||
BOOL one_code_unit = !utf;
|
||||
|
||||
#ifndef SUPPORT_UTF
|
||||
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%x}", c);
|
||||
return 0;
|
||||
/* If UTF is supported and requested, check for a valid single code unit. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
one_code_unit = c < 0x80;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
one_code_unit = (c & 0xfc00) != 0xd800;
|
||||
#else
|
||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
if (!utf || (c & 0xc0) != 0xc0)
|
||||
if (one_code_unit)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
return 0;
|
||||
#else
|
||||
|
||||
/* Malformed UTF-8 should occur only if the sanity check has been turned off.
|
||||
Rather than swallow random bytes, just stop if we hit a bad one. Print it with
|
||||
\X instead of \x as an indication. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((c & 0xc0) != 0xc0)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
@@ -146,198 +145,181 @@ else
|
||||
c = (c & PRIV(utf8_table3)[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow random bytes, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid secondary byte */
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/* The byte is OK */
|
||||
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
#elif defined COMPILE_PCRE16
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
Print it with \X instead of \x as an indication. */
|
||||
|
||||
if (!utf || (c & 0xfc00) != 0xd800)
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This is a check for malformed UTF-16; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow a low surrogate, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[1] & 0xfc00) != 0xdc00)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#elif defined COMPILE_PCRE32
|
||||
|
||||
if (!utf || (c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
|
||||
else if (c <= 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This is a check for malformed UTF-32; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow a surrogate, just stop if
|
||||
we hit one. Print it with \X instead of \x as an indication. */
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
}
|
||||
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000;
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return 1;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
|
||||
|
||||
#endif /* COMPILE_PCRE[8|16|32] */
|
||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print uchar string (regardless of utf) *
|
||||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* These take no account of UTF as they always print each individual code unit.
|
||||
The string is zero-terminated for print_custring(); the length is given for
|
||||
print_custring_bylen().
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
len length for print_custring_bylen()
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_puchar(FILE *f, PCRE_PUCHAR ptr)
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
{
|
||||
while (*ptr != '\0')
|
||||
{
|
||||
register pcre_uint32 c = *ptr++;
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||
{
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
uint32_t c = *ptr++;
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function. */
|
||||
|
||||
static const char *
|
||||
get_ucpname(unsigned int ptype, unsigned int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
#ifdef SUPPORT_UNICODE
|
||||
int i;
|
||||
for (i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break;
|
||||
}
|
||||
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??";
|
||||
#else
|
||||
/* It gets harder and harder to shut off unwanted compiler warnings. */
|
||||
ptype = ptype * pvalue;
|
||||
return (ptype == pvalue)? "??" : "??";
|
||||
#endif
|
||||
#else /* No UTF support */
|
||||
(void)ptype;
|
||||
(void)pvalue;
|
||||
return "??";
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print Unicode property value *
|
||||
*************************************************/
|
||||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters. This is used only when UCP support is available and UTF mode is
|
||||
selected. It should never occur otherwise, but just in case it does, have
|
||||
something ready to print. */
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
|
||||
print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
||||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
|
||||
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
|
||||
code[2]), after);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *not = (*code == OP_PROP)? "" : "not ";
|
||||
#ifndef SUPPORT_UCP
|
||||
fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
|
||||
#else
|
||||
const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + code[2];
|
||||
fprintf (f, "%s%sclist", before, not);
|
||||
while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
|
||||
fprintf(f, "%s", after);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled regex *
|
||||
* Print compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. The
|
||||
print_lengths flag controls whether offsets and lengths of items are printed.
|
||||
They can be turned off from pcretest so that automatic tests on bytecode can be
|
||||
written that do not depend on the value of LINK_SIZE. */
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
#ifdef PCRE_INCLUDED
|
||||
static /* Keep the following function as private. */
|
||||
#endif
|
||||
#if defined COMPILE_PCRE8
|
||||
void
|
||||
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
#elif defined COMPILE_PCRE16
|
||||
void
|
||||
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
#elif defined COMPILE_PCRE32
|
||||
void
|
||||
pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
#endif
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lengths show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
pcre2_printint(pcre2_code *re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
REAL_PCRE *re = (REAL_PCRE *)external_re;
|
||||
pcre_uchar *codestart, *code;
|
||||
BOOL utf;
|
||||
PCRE2_SPTR codestart, nametable, code;
|
||||
uint32_t nesize = re->name_entry_size;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
|
||||
unsigned int options = re->options;
|
||||
int offset = re->name_table_offset;
|
||||
int count = re->name_count;
|
||||
int size = re->name_entry_size;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
|
||||
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
|
||||
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
|
||||
options = ((options << 24) & 0xff000000) |
|
||||
((options << 8) & 0x00ff0000) |
|
||||
((options >> 8) & 0x0000ff00) |
|
||||
((options >> 24) & 0x000000ff);
|
||||
}
|
||||
|
||||
code = codestart = (pcre_uchar *)re + offset + count * size;
|
||||
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
|
||||
utf = (options & PCRE_UTF8) != 0;
|
||||
nametable = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
|
||||
code = codestart = nametable + re->name_count * re->name_entry_size;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
pcre_uchar *ccode;
|
||||
PCRE2_SPTR ccode;
|
||||
uint32_t c;
|
||||
int i;
|
||||
const char *flag = " ";
|
||||
pcre_uint32 c;
|
||||
unsigned int extra = 0;
|
||||
|
||||
if (print_lengths)
|
||||
@@ -356,13 +338,13 @@ for(;;)
|
||||
|
||||
case OP_TABLE_LENGTH:
|
||||
case OP_TABLE_LENGTH +
|
||||
((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
|
||||
(sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
|
||||
break;
|
||||
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
|
||||
(sizeof(OP_lengths) == OP_TABLE_LENGTH)):
|
||||
return;
|
||||
/* ========================================================================== */
|
||||
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", priv_OP_names[*code]);
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
@@ -394,7 +376,7 @@ for(;;)
|
||||
case OP_SCBRAPOS:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
@@ -411,29 +393,27 @@ for(;;)
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", priv_OP_names[*code]);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1));
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_DNCREF:
|
||||
{
|
||||
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||
IMM2_SIZE;
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond ref <", flag);
|
||||
print_puchar(f, entry);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
@@ -448,16 +428,19 @@ for(;;)
|
||||
|
||||
case OP_DNRREF:
|
||||
{
|
||||
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||
IMM2_SIZE;
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s Cond recurse <", flag);
|
||||
print_puchar(f, entry);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_DEF:
|
||||
fprintf(f, " Cond def");
|
||||
case OP_FALSE:
|
||||
fprintf(f, " Cond false");
|
||||
break;
|
||||
|
||||
case OP_TRUE:
|
||||
fprintf(f, " Cond true");
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
@@ -490,6 +473,7 @@ for(;;)
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
@@ -497,10 +481,10 @@ for(;;)
|
||||
print_prop(f, code + 1, "", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, "%s", priv_OP_names[code[1]]);
|
||||
else fprintf(f, "%s", OP_names[code[1]]);
|
||||
}
|
||||
else extra = print_char(f, code+1, utf);
|
||||
fprintf(f, "%s", priv_OP_names[*code]);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACTI:
|
||||
@@ -531,7 +515,7 @@ for(;;)
|
||||
print_prop(f, code + IMM2_SIZE + 1, " ", " ");
|
||||
extra = 2;
|
||||
}
|
||||
else fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]);
|
||||
else fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
@@ -571,7 +555,7 @@ for(;;)
|
||||
case OP_NOTPOSQUERY:
|
||||
fprintf(f, " %s [^", flag);
|
||||
extra = print_char(f, code + 1, utf);
|
||||
fprintf(f, "]%s", priv_OP_names[*code]);
|
||||
fprintf(f, "]%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACTI:
|
||||
@@ -598,7 +582,7 @@ for(;;)
|
||||
case OP_RECURSE:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", priv_OP_names[*code]);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REFI:
|
||||
@@ -606,7 +590,7 @@ for(;;)
|
||||
/* Fall through */
|
||||
case OP_REF:
|
||||
fprintf(f, " %s \\%d", flag, GET2(code,1));
|
||||
ccode = code + priv_OP_lengths[*code];
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_DNREFI:
|
||||
@@ -614,18 +598,32 @@ for(;;)
|
||||
/* Fall through */
|
||||
case OP_DNREF:
|
||||
{
|
||||
pcre_uchar *entry = (pcre_uchar *)re + offset + (GET2(code, 1) * size) +
|
||||
IMM2_SIZE;
|
||||
PCRE2_SPTR entry = nametable + (GET2(code, 1) * nesize) + IMM2_SIZE;
|
||||
fprintf(f, " %s \\k<", flag);
|
||||
print_puchar(f, entry);
|
||||
print_custring(f, entry);
|
||||
fprintf(f, ">%d", GET2(code, 1 + IMM2_SIZE));
|
||||
}
|
||||
ccode = code + priv_OP_lengths[*code];
|
||||
ccode = code + OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
|
||||
GET(code, 2 + LINK_SIZE));
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1 + 2*LINK_SIZE],
|
||||
GET(code, 1), GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_CALLOUT_STR:
|
||||
c = code[1 + 4*LINK_SIZE];
|
||||
fprintf(f, " %s %c", OP_names[*code], c);
|
||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||
if (c == PRIV(callout_start_delims)[i])
|
||||
{
|
||||
c = PRIV(callout_end_delims)[i];
|
||||
break;
|
||||
}
|
||||
fprintf(f, "%c %d %d %d", c, GET(code, 1 + 3*LINK_SIZE), GET(code, 1),
|
||||
GET(code, 1 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
@@ -641,12 +639,11 @@ for(;;)
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
int i;
|
||||
unsigned int min, max;
|
||||
BOOL printmap;
|
||||
BOOL invertmap = FALSE;
|
||||
pcre_uint8 *map;
|
||||
pcre_uint8 inverted_map[32];
|
||||
uint8_t *map;
|
||||
uint8_t inverted_map[32];
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
@@ -672,7 +669,7 @@ for(;;)
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
map = (pcre_uint8 *)ccode;
|
||||
map = (uint8_t *)ccode;
|
||||
if (invertmap)
|
||||
{
|
||||
for (i = 0; i < 32; i++) inverted_map[i] = ~map[i];
|
||||
@@ -699,14 +696,14 @@ for(;;)
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32 / sizeof(pcre_uchar);
|
||||
ccode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
pcre_uchar ch;
|
||||
PCRE2_UCHAR ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
BOOL not = FALSE;
|
||||
@@ -776,8 +773,8 @@ for(;;)
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
fprintf(f, "%s", priv_OP_names[*ccode]);
|
||||
extra += priv_OP_lengths[*ccode];
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
@@ -789,7 +786,7 @@ for(;;)
|
||||
else fprintf(f, "{%u,%u}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
|
||||
extra += priv_OP_lengths[*ccode];
|
||||
extra += OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
@@ -805,13 +802,13 @@ for(;;)
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
fprintf(f, " %s ", priv_OP_names[*code]);
|
||||
print_puchar(f, code + 2);
|
||||
fprintf(f, " %s ", OP_names[*code]);
|
||||
print_custring_bylen(f, code + 2, code[1]);
|
||||
extra += code[1];
|
||||
break;
|
||||
|
||||
case OP_THEN:
|
||||
fprintf(f, " %s", priv_OP_names[*code]);
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CIRCM:
|
||||
@@ -822,13 +819,13 @@ for(;;)
|
||||
/* Anything else is just an item with no data, but possibly a flag. */
|
||||
|
||||
default:
|
||||
fprintf(f, " %s %s", flag, priv_OP_names[*code]);
|
||||
fprintf(f, " %s %s", flag, OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += priv_OP_lengths[*code] + extra;
|
||||
code += OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_printint.src */
|
||||
/* End of pcre2_printint.c */
|
||||
268
ext/pcre/pcre2lib/pcre2_serialize.c
Normal file
268
ext/pcre/pcre2lib/pcre2_serialize.c
Normal file
@@ -0,0 +1,268 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions for serializing and deserializing
|
||||
a sequence of compiled codes. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
#define SERIALIZED_DATA_MAGIC 0x50523253u
|
||||
|
||||
/* Deserialization is limited to the current PCRE version and
|
||||
character width. */
|
||||
|
||||
#define SERIALIZED_DATA_VERSION \
|
||||
((PCRE2_MAJOR) | ((PCRE2_MINOR) << 16))
|
||||
|
||||
#define SERIALIZED_DATA_CONFIG \
|
||||
(sizeof(PCRE2_UCHAR) | ((sizeof(void*)) << 8) | ((sizeof(PCRE2_SIZE)) << 16))
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Serialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_encode(const pcre2_code **codes, int32_t number_of_codes,
|
||||
uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
uint8_t *bytes;
|
||||
uint8_t *dst_bytes;
|
||||
int32_t i;
|
||||
PCRE2_SIZE total_size;
|
||||
const pcre2_real_code *re;
|
||||
const uint8_t *tables;
|
||||
pcre2_serialized_data *data;
|
||||
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
if (codes == NULL || serialized_bytes == NULL || serialized_size == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
|
||||
/* Compute total size. */
|
||||
total_size = sizeof(pcre2_serialized_data) + tables_length;
|
||||
tables = NULL;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
if (codes[i] == NULL) return PCRE2_ERROR_NULL;
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||
if (tables == NULL)
|
||||
tables = re->tables;
|
||||
else if (tables != re->tables)
|
||||
return PCRE2_ERROR_MIXEDTABLES;
|
||||
total_size += re->blocksize;
|
||||
}
|
||||
|
||||
/* Initialize the byte stream. */
|
||||
bytes = memctl->malloc(total_size + sizeof(pcre2_memctl), memctl->memory_data);
|
||||
if (bytes == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
/* The controller is stored as a hidden parameter. */
|
||||
memcpy(bytes, memctl, sizeof(pcre2_memctl));
|
||||
bytes += sizeof(pcre2_memctl);
|
||||
|
||||
data = (pcre2_serialized_data *)bytes;
|
||||
data->magic = SERIALIZED_DATA_MAGIC;
|
||||
data->version = SERIALIZED_DATA_VERSION;
|
||||
data->config = SERIALIZED_DATA_CONFIG;
|
||||
data->number_of_codes = number_of_codes;
|
||||
|
||||
/* Copy all compiled code data. */
|
||||
dst_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
memcpy(dst_bytes, tables, tables_length);
|
||||
dst_bytes += tables_length;
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
re = (const pcre2_real_code *)(codes[i]);
|
||||
memcpy(dst_bytes, (char *)re, re->blocksize);
|
||||
dst_bytes += re->blocksize;
|
||||
}
|
||||
|
||||
*serialized_bytes = bytes;
|
||||
*serialized_size = total_size;
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Deserialize compiled patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_decode(pcre2_code **codes, int32_t number_of_codes,
|
||||
const uint8_t *bytes, pcre2_general_context *gcontext)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
const pcre2_memctl *memctl = (gcontext != NULL) ?
|
||||
&gcontext->memctl : &PRIV(default_compile_context).memctl;
|
||||
|
||||
const uint8_t *src_bytes;
|
||||
pcre2_real_code *dst_re;
|
||||
uint8_t *tables;
|
||||
int32_t i, j;
|
||||
|
||||
/* Sanity checks. */
|
||||
|
||||
if (data == NULL || codes == NULL) return PCRE2_ERROR_NULL;
|
||||
if (number_of_codes <= 0) return PCRE2_ERROR_BADDATA;
|
||||
if (data->number_of_codes <= 0) return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
if (number_of_codes > data->number_of_codes)
|
||||
number_of_codes = data->number_of_codes;
|
||||
|
||||
src_bytes = bytes + sizeof(pcre2_serialized_data);
|
||||
|
||||
/* Decode tables. The reference count for the tables is stored immediately
|
||||
following them. */
|
||||
|
||||
tables = memctl->malloc(tables_length + sizeof(PCRE2_SIZE), memctl->memory_data);
|
||||
if (tables == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
memcpy(tables, src_bytes, tables_length);
|
||||
*(PCRE2_SIZE *)(tables + tables_length) = number_of_codes;
|
||||
src_bytes += tables_length;
|
||||
|
||||
/* Decode the byte stream. We must not try to read the size from the compiled
|
||||
code block in the stream, because it might be unaligned, which causes errors on
|
||||
hardware such as Sparc-64 that doesn't like unaligned memory accesses. The type
|
||||
of the blocksize field is given its own name to ensure that it is the same here
|
||||
as in the block. */
|
||||
|
||||
for (i = 0; i < number_of_codes; i++)
|
||||
{
|
||||
CODE_BLOCKSIZE_TYPE blocksize;
|
||||
memcpy(&blocksize, src_bytes + offsetof(pcre2_real_code, blocksize),
|
||||
sizeof(CODE_BLOCKSIZE_TYPE));
|
||||
if (blocksize <= sizeof(pcre2_real_code))
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
|
||||
/* The allocator provided by gcontext replaces the original one. */
|
||||
|
||||
dst_re = (pcre2_real_code *)PRIV(memctl_malloc)(blocksize,
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (dst_re == NULL)
|
||||
{
|
||||
memctl->free(tables, memctl->memory_data);
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
memctl->free(codes[j], memctl->memory_data);
|
||||
codes[j] = NULL;
|
||||
}
|
||||
return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
/* The new allocator must be preserved. */
|
||||
|
||||
memcpy(((uint8_t *)dst_re) + sizeof(pcre2_memctl),
|
||||
src_bytes + sizeof(pcre2_memctl), blocksize - sizeof(pcre2_memctl));
|
||||
if (dst_re->magic_number != MAGIC_NUMBER ||
|
||||
dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
|
||||
dst_re->name_count > MAX_NAME_COUNT)
|
||||
{
|
||||
memctl->free(dst_re, memctl->memory_data);
|
||||
return PCRE2_ERROR_BADSERIALIZEDDATA;
|
||||
}
|
||||
|
||||
/* At the moment only one table is supported. */
|
||||
|
||||
dst_re->tables = tables;
|
||||
dst_re->executable_jit = NULL;
|
||||
dst_re->flags |= PCRE2_DEREF_TABLES;
|
||||
|
||||
codes[i] = dst_re;
|
||||
src_bytes += blocksize;
|
||||
}
|
||||
|
||||
return number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get the number of serialized patterns *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN int32_t PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_get_number_of_codes(const uint8_t *bytes)
|
||||
{
|
||||
const pcre2_serialized_data *data = (const pcre2_serialized_data *)bytes;
|
||||
|
||||
if (data == NULL) return PCRE2_ERROR_NULL;
|
||||
if (data->magic != SERIALIZED_DATA_MAGIC) return PCRE2_ERROR_BADMAGIC;
|
||||
if (data->version != SERIALIZED_DATA_VERSION) return PCRE2_ERROR_BADMODE;
|
||||
if (data->config != SERIALIZED_DATA_CONFIG) return PCRE2_ERROR_BADMODE;
|
||||
|
||||
return data->number_of_codes;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free the allocated stream *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_serialize_free(uint8_t *bytes)
|
||||
{
|
||||
if (bytes != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)(bytes - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_serialize.c */
|
||||
201
ext/pcre/pcre2lib/pcre2_string_utils.c
Normal file
201
ext/pcre/pcre2lib/pcre2_string_utils.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp)(PCRE2_SPTR str1, PCRE2_SPTR str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcmp_c8)(PCRE2_SPTR str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
while (*str1 != '\0' || *str2 != '\0')
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
len the length
|
||||
|
||||
Returns: 0, 1, or -1
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
|
||||
{
|
||||
PCRE2_UCHAR c1, c2;
|
||||
for (; len > 0; len--)
|
||||
{
|
||||
c1 = *str1++;
|
||||
c2 = *str2++;
|
||||
if (c1 != c2) return ((c1 > c2) << 1) - 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strlen)(PCRE2_SPTR str)
|
||||
{
|
||||
PCRE2_SIZE c = 0;
|
||||
while (*str++ != 0) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy 8-bit 0-terminated string to PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
|
||||
PCRE2_SIZE
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
{
|
||||
PCRE2_UCHAR *t = str1;
|
||||
while (*str2 != 0) *t++ = *str2++;
|
||||
*t = 0;
|
||||
return t - str1;
|
||||
}
|
||||
|
||||
/* End of pcre2_string_utils.c */
|
||||
File diff suppressed because it is too large
Load Diff
858
ext/pcre/pcre2lib/pcre2_substitute.c
Normal file
858
ext/pcre/pcre2lib/pcre2_substitute.c
Normal file
@@ -0,0 +1,858 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
#define PTR_STACK_SIZE 20
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find end of substitute text *
|
||||
*************************************************/
|
||||
|
||||
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||
constructions. This requires the identification of unescaped : and }
|
||||
characters. This function scans for such. It must deal with nested ${
|
||||
constructions. The pointer to the text is updated, either to the required end
|
||||
character, or to where an error was detected.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression (for options)
|
||||
ptrptr points to the pointer to the start of the text (updated)
|
||||
ptrend end of the whole string
|
||||
last TRUE if the last expected string (only } recognized)
|
||||
|
||||
Returns: 0 on success
|
||||
negative error code on failure
|
||||
*/
|
||||
|
||||
static int
|
||||
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||
BOOL last)
|
||||
{
|
||||
int rc = 0;
|
||||
uint32_t nestlevel = 0;
|
||||
BOOL literal = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
|
||||
for (; ptr < ptrend; ptr++)
|
||||
{
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
if (nestlevel == 0) goto EXIT;
|
||||
nestlevel--;
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||
|
||||
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
nestlevel++;
|
||||
ptr += 1;
|
||||
}
|
||||
}
|
||||
|
||||
else if (*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int erc;
|
||||
int errorcode;
|
||||
uint32_t ch;
|
||||
|
||||
if (ptr < ptrend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
case CHAR_l:
|
||||
case CHAR_U:
|
||||
case CHAR_u:
|
||||
ptr += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
ptr += 1; /* Must point after \ */
|
||||
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
ptr -= 1; /* Back to last code unit of escape */
|
||||
if (errorcode != 0)
|
||||
{
|
||||
rc = errorcode;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
switch(erc)
|
||||
{
|
||||
case 0: /* Data character */
|
||||
case ESC_E: /* Isolated \E is ignored */
|
||||
break;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||
|
||||
EXIT:
|
||||
*ptrptr = ptr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match and substitute *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and creates a new
|
||||
string with substitutions. The first 7 arguments are the same as for
|
||||
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
|
||||
|
||||
Arguments:
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block, or is NULL
|
||||
context points a PCRE2 context
|
||||
replacement points to the replacement string
|
||||
rlength length of replacement string
|
||||
buffer where to put the substituted string
|
||||
blength points to length of buffer; updated to length of string
|
||||
|
||||
Returns: >= 0 number of substitutions made
|
||||
< 0 an error code
|
||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||
*/
|
||||
|
||||
/* This macro checks for space in the buffer before copying into it. On
|
||||
overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from,length) \
|
||||
if (!overflowed && lengthleft < length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
overflowed = TRUE; \
|
||||
extra_needed = length - lengthleft; \
|
||||
} \
|
||||
else if (overflowed) \
|
||||
{ \
|
||||
extra_needed += length; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
|
||||
buff_offset += length; \
|
||||
lengthleft -= length; \
|
||||
}
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||
{
|
||||
int rc;
|
||||
int subs;
|
||||
int forcecase = 0;
|
||||
int forcecasereset = 0;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR repend;
|
||||
PCRE2_SIZE extra_needed = 0;
|
||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
}
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
/* Find lengths of zero-terminated strings and the end of the replacement. */
|
||||
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||
repend = replacement + rlength;
|
||||
|
||||
/* Check UTF replacement string if necessary. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Save the substitute options and remove them from the match options. */
|
||||
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Copy up to the start offset */
|
||||
|
||||
if (start_offset > length)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
rc = PCRE2_ERROR_BADOFFSET;
|
||||
goto EXIT;
|
||||
}
|
||||
CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
{
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) options |= PCRE2_NO_UTF_CHECK; /* Only need to check once */
|
||||
#endif
|
||||
|
||||
/* Any error other than no match returns the error code. No match when not
|
||||
doing the special after-empty-match global rematch, or when at the end of the
|
||||
subject, breaks the global loop. Otherwise, advance the starting point by one
|
||||
character, copying it to the output, and try again. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
PCRE2_SIZE save_start;
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
|
||||
if (goptions == 0 || start_offset >= length) break;
|
||||
|
||||
/* Advance by one code point. Then, if CRLF is a valid newline sequence and
|
||||
we have advanced into the middle of it, advance one more code point. In
|
||||
other words, do not start in the middle of CRLF, even if CR and LF on their
|
||||
own are valid newlines. */
|
||||
|
||||
save_start = start_offset++;
|
||||
if (subject[start_offset-1] == CHAR_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_CR &&
|
||||
code->newline_convention != PCRE2_NEWLINE_LF &&
|
||||
start_offset < length &&
|
||||
subject[start_offset] == CHAR_LF)
|
||||
start_offset++;
|
||||
|
||||
/* Otherwise, in UTF mode, advance past any secondary code points. */
|
||||
|
||||
else if ((code->overall_options & PCRE2_UTF) != 0)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
|
||||
start_offset++;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
while (start_offset < length &&
|
||||
(subject[start_offset] & 0xfc00) == 0xdc00)
|
||||
start_offset++;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past, reset the special global options, and
|
||||
continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
are not supported. */
|
||||
|
||||
if (ovector[1] < ovector[0])
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
if (subs == INT_MAX)
|
||||
{
|
||||
rc = PCRE2_ERROR_TOOMANYREPLACE;
|
||||
goto EXIT;
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
||||
/* If at the end of a nested substring, pop the stack. */
|
||||
|
||||
if (ptr >= repend)
|
||||
{
|
||||
if (ptrstackptr <= 0) break; /* End of replacement string */
|
||||
repend = ptrstack[--ptrstackptr];
|
||||
ptr = ptrstack[--ptrstackptr];
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
goto LOADLITERAL;
|
||||
}
|
||||
|
||||
/* Not in literal mode. */
|
||||
|
||||
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||
{
|
||||
int group, n;
|
||||
uint32_t special = 0;
|
||||
BOOL inparens;
|
||||
BOOL star;
|
||||
PCRE2_SIZE sublength;
|
||||
PCRE2_SPTR text1_start = NULL;
|
||||
PCRE2_SPTR text1_end = NULL;
|
||||
PCRE2_SPTR text2_start = NULL;
|
||||
PCRE2_SPTR text2_end = NULL;
|
||||
PCRE2_UCHAR next;
|
||||
PCRE2_UCHAR name[33];
|
||||
|
||||
if (++ptr >= repend) goto BAD;
|
||||
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||
|
||||
group = -1;
|
||||
n = 0;
|
||||
inparens = FALSE;
|
||||
star = FALSE;
|
||||
|
||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
inparens = TRUE;
|
||||
}
|
||||
|
||||
if (next == CHAR_ASTERISK)
|
||||
{
|
||||
if (++ptr >= repend) goto BAD;
|
||||
next = *ptr;
|
||||
star = TRUE;
|
||||
}
|
||||
|
||||
if (!star && next >= CHAR_0 && next <= CHAR_9)
|
||||
{
|
||||
group = next - CHAR_0;
|
||||
while (++ptr < repend)
|
||||
{
|
||||
next = *ptr;
|
||||
if (next < CHAR_0 || next > CHAR_9) break;
|
||||
group = group * 10 + next - CHAR_0;
|
||||
|
||||
/* A check for a number greater than the hightest captured group
|
||||
is sufficient here; no need for a separate overflow check. If unknown
|
||||
groups are to be treated as unset, just skip over any remaining
|
||||
digits and carry on. */
|
||||
|
||||
if (group > code->top_bracket)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = PCRE2_ERROR_NOSUBSTRING;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8_t *ctypes = code->tables + ctypes_offset;
|
||||
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
|
||||
{
|
||||
name[n++] = next;
|
||||
if (n > 32) goto BAD;
|
||||
if (++ptr >= repend) break;
|
||||
next = *ptr;
|
||||
}
|
||||
if (n == 0) goto BAD;
|
||||
name[n] = 0;
|
||||
}
|
||||
|
||||
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||
${name:-default text}. */
|
||||
|
||||
if (inparens)
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
!star && ptr < repend - 2 && next == CHAR_COLON)
|
||||
{
|
||||
special = *(++ptr);
|
||||
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||
goto PTREXIT;
|
||||
}
|
||||
|
||||
text1_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text1_end = ptr;
|
||||
|
||||
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||
{
|
||||
text2_start = ++ptr;
|
||||
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||
if (rc != 0) goto PTREXIT;
|
||||
text2_end = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||
{
|
||||
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||
goto PTREXIT;
|
||||
}
|
||||
}
|
||||
|
||||
ptr++;
|
||||
}
|
||||
|
||||
/* Have found a syntactically correct group number or name, or *name.
|
||||
Only *MARK is currently recognized. */
|
||||
|
||||
if (star)
|
||||
{
|
||||
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
|
||||
{
|
||||
PCRE2_SPTR mark = pcre2_get_mark(match_data);
|
||||
if (mark != NULL)
|
||||
{
|
||||
PCRE2_SPTR mark_start = mark;
|
||||
while (*mark != 0) mark++;
|
||||
fraglength = mark - mark_start;
|
||||
CHECKMEMCPY(mark_start, fraglength);
|
||||
}
|
||||
}
|
||||
else goto BAD;
|
||||
}
|
||||
|
||||
/* Substitute the contents of a group. We don't use substring_copy
|
||||
functions any more, in order to support case forcing. */
|
||||
|
||||
else
|
||||
{
|
||||
PCRE2_SPTR subptr, subptrend;
|
||||
|
||||
/* Find a number for a named group. In case there are duplicate names,
|
||||
search for the first one that is set. If the name is not found when
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
|
||||
non-existent group. */
|
||||
|
||||
if (group < 0)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
group = code->top_bracket + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rc < 0) goto PTREXIT;
|
||||
for (entry = first; entry <= last; entry += rc)
|
||||
{
|
||||
uint32_t ng = GET2(entry, 0);
|
||||
if (ng < ovector_count)
|
||||
{
|
||||
if (group < 0) group = ng; /* First in ovector */
|
||||
if (ovector[ng*2] != PCRE2_UNSET)
|
||||
{
|
||||
group = ng; /* First that is set */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If group is still negative, it means we did not find a group
|
||||
that is in the ovector. Just set the first group. */
|
||||
|
||||
if (group < 0) group = GET2(first, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a group that is identified by number. Find the length of
|
||||
the captured string. If a group in a non-special substitution is unset
|
||||
when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
|
||||
|
||||
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||
if (rc < 0)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_NOSUBSTRING &&
|
||||
(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
|
||||
{
|
||||
rc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; /* Non-unset errors */
|
||||
if (special == 0) /* Plain substitution */
|
||||
{
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
|
||||
goto PTREXIT; /* Else error */
|
||||
}
|
||||
}
|
||||
|
||||
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||
both of which are reprocessed when used. If special is '-' we have a
|
||||
default text for when the group is unset; it must be reprocessed. */
|
||||
|
||||
if (special != 0)
|
||||
{
|
||||
if (special == CHAR_MINUS)
|
||||
{
|
||||
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||
text2_start = text1_start;
|
||||
text2_end = text1_end;
|
||||
}
|
||||
|
||||
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||
ptrstack[ptrstackptr++] = ptr;
|
||||
ptrstack[ptrstackptr++] = repend;
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
ptr = text1_start;
|
||||
repend = text1_end;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = text2_start;
|
||||
repend = text2_end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise we have a literal substitution of a group's contents. */
|
||||
|
||||
LITERAL_SUBSTITUTE:
|
||||
subptr = subject + ovector[group*2];
|
||||
subptrend = subject + ovector[group*2 + 1];
|
||||
|
||||
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||
|
||||
while (subptr < subptrend)
|
||||
{
|
||||
GETCHARINCTEST(ch, subptr);
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||
recognized here. */
|
||||
|
||||
else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
|
||||
*ptr == CHAR_BACKSLASH)
|
||||
{
|
||||
int errorcode;
|
||||
|
||||
if (ptr < repend - 1) switch (ptr[1])
|
||||
{
|
||||
case CHAR_L:
|
||||
forcecase = forcecasereset = -1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_l:
|
||||
forcecase = -1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_U:
|
||||
forcecase = forcecasereset = 1;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
case CHAR_u:
|
||||
forcecase = 1;
|
||||
forcecasereset = 0;
|
||||
ptr += 2;
|
||||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ptr++; /* Point after \ */
|
||||
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||
code->overall_options, FALSE, NULL);
|
||||
if (errorcode != 0) goto BADESCAPE;
|
||||
|
||||
switch(rc)
|
||||
{
|
||||
case ESC_E:
|
||||
forcecase = forcecasereset = 0;
|
||||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
goto LITERAL;
|
||||
|
||||
default:
|
||||
goto BADESCAPE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle a literal code unit */
|
||||
|
||||
else
|
||||
{
|
||||
LOADLITERAL:
|
||||
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||
|
||||
LITERAL:
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||
ch = UCD_OTHERCASE(ch);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (((code->tables + cbits_offset +
|
||||
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||
)[ch/8] & (1 << (ch%8))) == 0)
|
||||
ch = (code->tables + fcc_offset)[ch];
|
||||
}
|
||||
forcecase = forcecasereset;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) chlen = PRIV(ord2utf)(ch, temp); else
|
||||
#endif
|
||||
{
|
||||
temp[0] = ch;
|
||||
chlen = 1;
|
||||
}
|
||||
CHECKMEMCPY(temp, chlen);
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output. Update the start offset to
|
||||
point to the rest of the subject string. If we matched an empty string,
|
||||
do the magic for global matches. */
|
||||
|
||||
start_offset = ovector[1];
|
||||
goptions = (ovector[0] != ovector[1])? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp , 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
needed. Otherwise, an overflow generates an immediate error return. */
|
||||
|
||||
if (overflowed)
|
||||
{
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
*blength = buff_length + extra_needed;
|
||||
}
|
||||
|
||||
/* After a successful execution, return the number of substitutions and set the
|
||||
length of buffer used, excluding the trailing zero. */
|
||||
|
||||
else
|
||||
{
|
||||
rc = subs;
|
||||
*blength = buff_offset - 1;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
NOROOM:
|
||||
rc = PCRE2_ERROR_NOMEMORY;
|
||||
goto EXIT;
|
||||
|
||||
BAD:
|
||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||
goto PTREXIT;
|
||||
|
||||
BADESCAPE:
|
||||
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||
|
||||
PTREXIT:
|
||||
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* End of pcre2_substitute.c */
|
||||
542
ext/pcre/pcre2lib/pcre2_substring.c
Normal file
542
ext/pcre/pcre2lib/pcre2_substring.c
Normal file
@@ -0,0 +1,542 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
|
||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy numbered captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by number.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
sizeptr the size of the buffer, updated to the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract named captured string *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new memory. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract captured string to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
memory.
|
||||
|
||||
Arguments:
|
||||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
int rc;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_UCHAR *yield;
|
||||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
*sizeptr = size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_free(PCRE2_UCHAR *string)
|
||||
{
|
||||
if (string != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a named substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a named captured substring. If the regex
|
||||
permits duplicate names, the first substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_byname(pcre2_match_data *match_data,
|
||||
PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SPTR first, last, entry;
|
||||
int failrc, entrysize;
|
||||
if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
return PCRE2_ERROR_DFA_UFUNC;
|
||||
entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
|
||||
&first, &last);
|
||||
if (entrysize < 0) return entrysize;
|
||||
failrc = PCRE2_ERROR_UNAVAILABLE;
|
||||
for (entry = first; entry <= last; entry += entrysize)
|
||||
{
|
||||
uint32_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount)
|
||||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length, if not NULL
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
uint32_t stringnumber, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
int count = match_data->rc;
|
||||
if (count == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
|
||||
count = 0;
|
||||
}
|
||||
else if (count < 0) return count; /* Match failed */
|
||||
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
else /* Matched using pcre2_dfa_match() */
|
||||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Extract all captured strings to new memory *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
PCRE2_SIZE size;
|
||||
PCRE2_SIZE *lensp;
|
||||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (count == 0) count = match_data->oveccount; /* Ovector too small */
|
||||
|
||||
count2 = 2*count;
|
||||
ovector = match_data->ovector;
|
||||
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
|
||||
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
|
||||
*listp++ = sp;
|
||||
if (lensp != NULL) *lensp++ = size;
|
||||
sp += size;
|
||||
*sp++ = 0;
|
||||
}
|
||||
|
||||
*listp = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free memory obtained by substring_list_get *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_list_get()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_free(PCRE2_SPTR *list)
|
||||
{
|
||||
if (list != NULL)
|
||||
{
|
||||
pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
|
||||
memctl->free(memctl, memctl->memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a unique group with the given name. If duplicate names are
|
||||
permitted, and the name is not unique, an error is generated.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
|
||||
otherwise, if firstptr and lastptr are NULL:
|
||||
a group number for a unique substring
|
||||
else PCRE2_ERROR_NOUNIQUESUBSTRING
|
||||
otherwise:
|
||||
the length of each entry, having set firstptr and lastptr
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
|
||||
PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
|
||||
{
|
||||
uint16_t bot = 0;
|
||||
uint16_t top = code->name_count;
|
||||
uint16_t entrysize = code->name_entry_size;
|
||||
PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
|
||||
|
||||
while (top > bot)
|
||||
{
|
||||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parenthesis, or a negative number
|
||||
PCRE2_ERROR_NOSUBSTRING if not found
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
}
|
||||
|
||||
/* End of pcre2_substring.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,47 +38,65 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef PCRE_INCLUDED
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
PCRE2 code modules. The tables are also #included by the pcre2test program,
|
||||
which uses macros to change their names from _pcre2_xxx to xxxx, thereby
|
||||
avoiding name clashes with the library. In this case, PCRE2_PCRE2TEST is
|
||||
defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#endif /* PCRE_INCLUDED */
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
the definition is next to the definition of the opcodes in pcre2_internal.h.
|
||||
This is mode-dependent, so is skipped when this file is included by pcre2test. */
|
||||
|
||||
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
|
||||
#endif
|
||||
|
||||
/* Tables of horizontal and vertical whitespace characters, suitable for
|
||||
adding to classes. */
|
||||
|
||||
const pcre_uint32 PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const pcre_uint32 PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
const uint32_t PRIV(hspace_list)[] = { HSPACE_LIST };
|
||||
const uint32_t PRIV(vspace_list)[] = { VSPACE_LIST };
|
||||
|
||||
/* These tables are the pairs of delimiters that are valid for callout string
|
||||
arguments. For each starting delimiter there must be a matching ending
|
||||
delimiter, which in fact is different only for bracket-like delimiters. */
|
||||
|
||||
const uint32_t PRIV(callout_start_delims)[] = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_LEFT_CURLY_BRACKET, 0 };
|
||||
|
||||
const uint32_t PRIV(callout_end_delims[]) = {
|
||||
CHAR_GRAVE_ACCENT, CHAR_APOSTROPHE, CHAR_QUOTATION_MARK,
|
||||
CHAR_CIRCUMFLEX_ACCENT, CHAR_PERCENT_SIGN, CHAR_NUMBER_SIGN,
|
||||
CHAR_DOLLAR_SIGN, CHAR_RIGHT_CURLY_BRACKET, 0 };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These tables are required by pcre2test in 16- or 32-bit mode, as well
|
||||
as for the library in 8-bit mode, because pcre2test uses UTF-8 internally for
|
||||
handling wide characters. */
|
||||
|
||||
#if defined PCRE2_PCRE2TEST || \
|
||||
(defined SUPPORT_UNICODE && \
|
||||
defined PCRE2_CODE_UNIT_WIDTH && \
|
||||
PCRE2_CODE_UNIT_WIDTH == 8)
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
|
||||
|| (defined PCRE_INCLUDED && (defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32))
|
||||
|
||||
/* These tables are also required by pcretest in 16- or 32-bit mode. */
|
||||
|
||||
const int PRIV(utf8_table1)[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
@@ -92,19 +111,20 @@ const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const pcre_uint8 PRIV(utf8_table4)[] = {
|
||||
const uint8_t PRIV(utf8_table4)[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE[16|32])*/
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const pcre_uint32 PRIV(ucp_gentype)[] = {
|
||||
const uint32_t PRIV(ucp_gentype)[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
@@ -117,18 +137,18 @@ const pcre_uint32 PRIV(ucp_gentype)[] = {
|
||||
|
||||
/* This table encodes the rules for finding the end of an extended grapheme
|
||||
cluster. Every code point has a grapheme break property which is one of the
|
||||
ucp_gbXX values defined in ucp.h. The 2-dimensional table is indexed by the
|
||||
properties of two adjacent code points. The left property selects a word from
|
||||
the table, and the right property selects a bit from that word like this:
|
||||
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
|
||||
the properties of two adjacent code points. The left property selects a word
|
||||
from the table, and the right property selects a bit from that word like this:
|
||||
|
||||
ucp_gbtable[left-property] & (1 << right-property)
|
||||
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
||||
1. Break at the start and end of text (pretty obviously).
|
||||
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
2. Do not break between a CR and LF; otherwise, break before and after
|
||||
controls.
|
||||
|
||||
3. Do not break Hangul syllable sequences, the rules for which are:
|
||||
@@ -137,44 +157,62 @@ two code points. The breaking rules are as follows:
|
||||
LV or V may be followed by V or T
|
||||
LVT or T may be followed by T
|
||||
|
||||
4. Do not break before extending characters.
|
||||
4. Do not break before extending characters or zero-width-joiner (ZWJ).
|
||||
|
||||
The next two rules are only for extended grapheme clusters (but that's what we
|
||||
The following rules are only for extended grapheme clusters (but that's what we
|
||||
are implementing).
|
||||
|
||||
5. Do not break before SpacingMarks.
|
||||
|
||||
6. Do not break after Prepend characters.
|
||||
|
||||
7. Otherwise, break everywhere.
|
||||
7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
|
||||
by E_Modifier). Extend characters are allowed before the modifier; this
|
||||
cannot be represented in this table, the code has to deal with it.
|
||||
|
||||
8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or
|
||||
E_Base_GAZ).
|
||||
|
||||
9. Do not break within emoji flag sequences. That is, do not break between
|
||||
regional indicator (RI) symbols if there are an odd number of RI characters
|
||||
before the break point. This table encodes "join RI characters"; the code
|
||||
has to deal with checking for previous adjoining RIs.
|
||||
|
||||
10. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
const pcre_uint32 PRIV(ucp_gbtable[]) = {
|
||||
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 3 Extend */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)|
|
||||
(1<<ucp_gbV)|(1<<ucp_gbT)|(1<<ucp_gbLV)|
|
||||
(1<<ucp_gbLVT)|(1<<ucp_gbOther),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark), /* 5 SpacingMark */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbL)| /* 6 L */
|
||||
(1<<ucp_gbV)|(1<<ucp_gbLV)|(1<<ucp_gbLVT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 7 V */
|
||||
(1<<ucp_gbT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 8 T */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbV)| /* 9 LV */
|
||||
(1<<ucp_gbT),
|
||||
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbT), /* 10 LVT */
|
||||
ESZ, /* 3 Extend */
|
||||
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
|
||||
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
|
||||
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
|
||||
(1<<ucp_gbRegionalIndicator)|
|
||||
(1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
|
||||
(1<<ucp_gbE_Base_GAZ)|
|
||||
(1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
|
||||
ESZ, /* 5 SpacingMark */
|
||||
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
|
||||
(1<<ucp_gbLVT),
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 7 V */
|
||||
ESZ|(1<<ucp_gbT), /* 8 T */
|
||||
ESZ|(1<<ucp_gbV)|(1<<ucp_gbT), /* 9 LV */
|
||||
ESZ|(1<<ucp_gbT), /* 10 LVT */
|
||||
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
|
||||
(1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark) /* 12 Other */
|
||||
ESZ, /* 12 Other */
|
||||
ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */
|
||||
ESZ, /* 14 E_Modifier */
|
||||
ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
|
||||
ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
|
||||
ESZ /* 12 Glue_After_Zwj */
|
||||
};
|
||||
|
||||
#undef ESZ
|
||||
|
||||
#ifdef SUPPORT_JIT
|
||||
/* This table reverses PRIV(ucp_gentype). We can save the cost
|
||||
of a memory load. */
|
||||
@@ -190,7 +228,7 @@ const int PRIV(ucp_typerange)[] = {
|
||||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
/* The PRIV(utt)[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
@@ -207,6 +245,9 @@ version. Like all other character and string literals that are compared against
|
||||
the regular expression pattern, we must use STR_ macros instead of literal
|
||||
strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
|
||||
#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0"
|
||||
#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
|
||||
#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Any0 STR_A STR_n STR_y "\0"
|
||||
#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
|
||||
#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
|
||||
@@ -216,6 +257,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
|
||||
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
|
||||
@@ -254,6 +296,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Han0 STR_H STR_a STR_n "\0"
|
||||
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
|
||||
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
|
||||
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
|
||||
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
|
||||
#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
|
||||
#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
|
||||
@@ -290,6 +333,8 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0"
|
||||
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
@@ -301,16 +346,20 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||
#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
#define STRING_Nl0 STR_N STR_l "\0"
|
||||
#define STRING_No0 STR_N STR_o "\0"
|
||||
#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
|
||||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
@@ -318,6 +367,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||
@@ -342,11 +392,13 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
#define STRING_So0 STR_S STR_o "\0"
|
||||
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
|
||||
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
|
||||
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
|
||||
#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
|
||||
@@ -357,6 +409,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
|
||||
#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
|
||||
#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
|
||||
#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0"
|
||||
#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
|
||||
#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
|
||||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
@@ -373,11 +426,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
|
||||
#define STRING_Zl0 STR_Z STR_l "\0"
|
||||
#define STRING_Zp0 STR_Z STR_p "\0"
|
||||
#define STRING_Zs0 STR_Z STR_s "\0"
|
||||
|
||||
const char PRIV(utt_names)[] =
|
||||
STRING_Adlam0
|
||||
STRING_Ahom0
|
||||
STRING_Anatolian_Hieroglyphs0
|
||||
STRING_Any0
|
||||
STRING_Arabic0
|
||||
STRING_Armenian0
|
||||
@@ -387,6 +444,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Bassa_Vah0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bhaiksuki0
|
||||
STRING_Bopomofo0
|
||||
STRING_Brahmi0
|
||||
STRING_Braille0
|
||||
@@ -425,6 +483,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Han0
|
||||
STRING_Hangul0
|
||||
STRING_Hanunoo0
|
||||
STRING_Hatran0
|
||||
STRING_Hebrew0
|
||||
STRING_Hiragana0
|
||||
STRING_Imperial_Aramaic0
|
||||
@@ -461,6 +520,8 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Manichaean0
|
||||
STRING_Marchen0
|
||||
STRING_Masaram_Gondi0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
@@ -472,16 +533,20 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Modi0
|
||||
STRING_Mongolian0
|
||||
STRING_Mro0
|
||||
STRING_Multani0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Newa0
|
||||
STRING_Nko0
|
||||
STRING_Nl0
|
||||
STRING_No0
|
||||
STRING_Nushu0
|
||||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Hungarian0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
@@ -489,6 +554,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osage0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pahawh_Hmong0
|
||||
@@ -513,11 +579,13 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Siddham0
|
||||
STRING_SignWriting0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
STRING_So0
|
||||
STRING_Sora_Sompeng0
|
||||
STRING_Soyombo0
|
||||
STRING_Sundanese0
|
||||
STRING_Syloti_Nagri0
|
||||
STRING_Syriac0
|
||||
@@ -528,6 +596,7 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Tai_Viet0
|
||||
STRING_Takri0
|
||||
STRING_Tamil0
|
||||
STRING_Tangut0
|
||||
STRING_Telugu0
|
||||
STRING_Thaana0
|
||||
STRING_Thai0
|
||||
@@ -544,184 +613,201 @@ const char PRIV(utt_names)[] =
|
||||
STRING_Xwd0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zanabazar_Square0
|
||||
STRING_Zl0
|
||||
STRING_Zp0
|
||||
STRING_Zs0;
|
||||
|
||||
const ucp_type_table PRIV(utt)[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||
{ 53, PT_SC, ucp_Batak },
|
||||
{ 59, PT_SC, ucp_Bengali },
|
||||
{ 67, PT_SC, ucp_Bopomofo },
|
||||
{ 76, PT_SC, ucp_Brahmi },
|
||||
{ 83, PT_SC, ucp_Braille },
|
||||
{ 91, PT_SC, ucp_Buginese },
|
||||
{ 100, PT_SC, ucp_Buhid },
|
||||
{ 106, PT_GC, ucp_C },
|
||||
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 128, PT_SC, ucp_Carian },
|
||||
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 154, PT_PC, ucp_Cc },
|
||||
{ 157, PT_PC, ucp_Cf },
|
||||
{ 160, PT_SC, ucp_Chakma },
|
||||
{ 167, PT_SC, ucp_Cham },
|
||||
{ 172, PT_SC, ucp_Cherokee },
|
||||
{ 181, PT_PC, ucp_Cn },
|
||||
{ 184, PT_PC, ucp_Co },
|
||||
{ 187, PT_SC, ucp_Common },
|
||||
{ 194, PT_SC, ucp_Coptic },
|
||||
{ 201, PT_PC, ucp_Cs },
|
||||
{ 204, PT_SC, ucp_Cuneiform },
|
||||
{ 214, PT_SC, ucp_Cypriot },
|
||||
{ 222, PT_SC, ucp_Cyrillic },
|
||||
{ 231, PT_SC, ucp_Deseret },
|
||||
{ 239, PT_SC, ucp_Devanagari },
|
||||
{ 250, PT_SC, ucp_Duployan },
|
||||
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 280, PT_SC, ucp_Elbasan },
|
||||
{ 288, PT_SC, ucp_Ethiopic },
|
||||
{ 297, PT_SC, ucp_Georgian },
|
||||
{ 306, PT_SC, ucp_Glagolitic },
|
||||
{ 317, PT_SC, ucp_Gothic },
|
||||
{ 324, PT_SC, ucp_Grantha },
|
||||
{ 332, PT_SC, ucp_Greek },
|
||||
{ 338, PT_SC, ucp_Gujarati },
|
||||
{ 347, PT_SC, ucp_Gurmukhi },
|
||||
{ 356, PT_SC, ucp_Han },
|
||||
{ 360, PT_SC, ucp_Hangul },
|
||||
{ 367, PT_SC, ucp_Hanunoo },
|
||||
{ 375, PT_SC, ucp_Hebrew },
|
||||
{ 382, PT_SC, ucp_Hiragana },
|
||||
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 408, PT_SC, ucp_Inherited },
|
||||
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 463, PT_SC, ucp_Javanese },
|
||||
{ 472, PT_SC, ucp_Kaithi },
|
||||
{ 479, PT_SC, ucp_Kannada },
|
||||
{ 487, PT_SC, ucp_Katakana },
|
||||
{ 496, PT_SC, ucp_Kayah_Li },
|
||||
{ 505, PT_SC, ucp_Kharoshthi },
|
||||
{ 516, PT_SC, ucp_Khmer },
|
||||
{ 522, PT_SC, ucp_Khojki },
|
||||
{ 529, PT_SC, ucp_Khudawadi },
|
||||
{ 539, PT_GC, ucp_L },
|
||||
{ 541, PT_LAMP, 0 },
|
||||
{ 544, PT_SC, ucp_Lao },
|
||||
{ 548, PT_SC, ucp_Latin },
|
||||
{ 554, PT_SC, ucp_Lepcha },
|
||||
{ 561, PT_SC, ucp_Limbu },
|
||||
{ 567, PT_SC, ucp_Linear_A },
|
||||
{ 576, PT_SC, ucp_Linear_B },
|
||||
{ 585, PT_SC, ucp_Lisu },
|
||||
{ 590, PT_PC, ucp_Ll },
|
||||
{ 593, PT_PC, ucp_Lm },
|
||||
{ 596, PT_PC, ucp_Lo },
|
||||
{ 599, PT_PC, ucp_Lt },
|
||||
{ 602, PT_PC, ucp_Lu },
|
||||
{ 605, PT_SC, ucp_Lycian },
|
||||
{ 612, PT_SC, ucp_Lydian },
|
||||
{ 619, PT_GC, ucp_M },
|
||||
{ 621, PT_SC, ucp_Mahajani },
|
||||
{ 630, PT_SC, ucp_Malayalam },
|
||||
{ 640, PT_SC, ucp_Mandaic },
|
||||
{ 648, PT_SC, ucp_Manichaean },
|
||||
{ 659, PT_PC, ucp_Mc },
|
||||
{ 662, PT_PC, ucp_Me },
|
||||
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 730, PT_SC, ucp_Miao },
|
||||
{ 735, PT_PC, ucp_Mn },
|
||||
{ 738, PT_SC, ucp_Modi },
|
||||
{ 743, PT_SC, ucp_Mongolian },
|
||||
{ 753, PT_SC, ucp_Mro },
|
||||
{ 757, PT_SC, ucp_Myanmar },
|
||||
{ 765, PT_GC, ucp_N },
|
||||
{ 767, PT_SC, ucp_Nabataean },
|
||||
{ 777, PT_PC, ucp_Nd },
|
||||
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 792, PT_SC, ucp_Nko },
|
||||
{ 796, PT_PC, ucp_Nl },
|
||||
{ 799, PT_PC, ucp_No },
|
||||
{ 802, PT_SC, ucp_Ogham },
|
||||
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||
{ 817, PT_SC, ucp_Old_Italic },
|
||||
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 846, PT_SC, ucp_Old_Permic },
|
||||
{ 857, PT_SC, ucp_Old_Persian },
|
||||
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 887, PT_SC, ucp_Old_Turkic },
|
||||
{ 898, PT_SC, ucp_Oriya },
|
||||
{ 904, PT_SC, ucp_Osmanya },
|
||||
{ 912, PT_GC, ucp_P },
|
||||
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 927, PT_SC, ucp_Palmyrene },
|
||||
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 949, PT_PC, ucp_Pc },
|
||||
{ 952, PT_PC, ucp_Pd },
|
||||
{ 955, PT_PC, ucp_Pe },
|
||||
{ 958, PT_PC, ucp_Pf },
|
||||
{ 961, PT_SC, ucp_Phags_Pa },
|
||||
{ 970, PT_SC, ucp_Phoenician },
|
||||
{ 981, PT_PC, ucp_Pi },
|
||||
{ 984, PT_PC, ucp_Po },
|
||||
{ 987, PT_PC, ucp_Ps },
|
||||
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1006, PT_SC, ucp_Rejang },
|
||||
{ 1013, PT_SC, ucp_Runic },
|
||||
{ 1019, PT_GC, ucp_S },
|
||||
{ 1021, PT_SC, ucp_Samaritan },
|
||||
{ 1031, PT_SC, ucp_Saurashtra },
|
||||
{ 1042, PT_PC, ucp_Sc },
|
||||
{ 1045, PT_SC, ucp_Sharada },
|
||||
{ 1053, PT_SC, ucp_Shavian },
|
||||
{ 1061, PT_SC, ucp_Siddham },
|
||||
{ 1069, PT_SC, ucp_Sinhala },
|
||||
{ 1077, PT_PC, ucp_Sk },
|
||||
{ 1080, PT_PC, ucp_Sm },
|
||||
{ 1083, PT_PC, ucp_So },
|
||||
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1099, PT_SC, ucp_Sundanese },
|
||||
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1122, PT_SC, ucp_Syriac },
|
||||
{ 1129, PT_SC, ucp_Tagalog },
|
||||
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||
{ 1146, PT_SC, ucp_Tai_Le },
|
||||
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||
{ 1171, PT_SC, ucp_Takri },
|
||||
{ 1177, PT_SC, ucp_Tamil },
|
||||
{ 1183, PT_SC, ucp_Telugu },
|
||||
{ 1190, PT_SC, ucp_Thaana },
|
||||
{ 1197, PT_SC, ucp_Thai },
|
||||
{ 1202, PT_SC, ucp_Tibetan },
|
||||
{ 1210, PT_SC, ucp_Tifinagh },
|
||||
{ 1219, PT_SC, ucp_Tirhuta },
|
||||
{ 1227, PT_SC, ucp_Ugaritic },
|
||||
{ 1236, PT_SC, ucp_Vai },
|
||||
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||
{ 1252, PT_ALNUM, 0 },
|
||||
{ 1256, PT_PXSPACE, 0 },
|
||||
{ 1260, PT_SPACE, 0 },
|
||||
{ 1264, PT_UCNC, 0 },
|
||||
{ 1268, PT_WORD, 0 },
|
||||
{ 1272, PT_SC, ucp_Yi },
|
||||
{ 1275, PT_GC, ucp_Z },
|
||||
{ 1277, PT_PC, ucp_Zl },
|
||||
{ 1280, PT_PC, ucp_Zp },
|
||||
{ 1283, PT_PC, ucp_Zs }
|
||||
{ 0, PT_SC, ucp_Adlam },
|
||||
{ 6, PT_SC, ucp_Ahom },
|
||||
{ 11, PT_SC, ucp_Anatolian_Hieroglyphs },
|
||||
{ 33, PT_ANY, 0 },
|
||||
{ 37, PT_SC, ucp_Arabic },
|
||||
{ 44, PT_SC, ucp_Armenian },
|
||||
{ 53, PT_SC, ucp_Avestan },
|
||||
{ 61, PT_SC, ucp_Balinese },
|
||||
{ 70, PT_SC, ucp_Bamum },
|
||||
{ 76, PT_SC, ucp_Bassa_Vah },
|
||||
{ 86, PT_SC, ucp_Batak },
|
||||
{ 92, PT_SC, ucp_Bengali },
|
||||
{ 100, PT_SC, ucp_Bhaiksuki },
|
||||
{ 110, PT_SC, ucp_Bopomofo },
|
||||
{ 119, PT_SC, ucp_Brahmi },
|
||||
{ 126, PT_SC, ucp_Braille },
|
||||
{ 134, PT_SC, ucp_Buginese },
|
||||
{ 143, PT_SC, ucp_Buhid },
|
||||
{ 149, PT_GC, ucp_C },
|
||||
{ 151, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 171, PT_SC, ucp_Carian },
|
||||
{ 178, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 197, PT_PC, ucp_Cc },
|
||||
{ 200, PT_PC, ucp_Cf },
|
||||
{ 203, PT_SC, ucp_Chakma },
|
||||
{ 210, PT_SC, ucp_Cham },
|
||||
{ 215, PT_SC, ucp_Cherokee },
|
||||
{ 224, PT_PC, ucp_Cn },
|
||||
{ 227, PT_PC, ucp_Co },
|
||||
{ 230, PT_SC, ucp_Common },
|
||||
{ 237, PT_SC, ucp_Coptic },
|
||||
{ 244, PT_PC, ucp_Cs },
|
||||
{ 247, PT_SC, ucp_Cuneiform },
|
||||
{ 257, PT_SC, ucp_Cypriot },
|
||||
{ 265, PT_SC, ucp_Cyrillic },
|
||||
{ 274, PT_SC, ucp_Deseret },
|
||||
{ 282, PT_SC, ucp_Devanagari },
|
||||
{ 293, PT_SC, ucp_Duployan },
|
||||
{ 302, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 323, PT_SC, ucp_Elbasan },
|
||||
{ 331, PT_SC, ucp_Ethiopic },
|
||||
{ 340, PT_SC, ucp_Georgian },
|
||||
{ 349, PT_SC, ucp_Glagolitic },
|
||||
{ 360, PT_SC, ucp_Gothic },
|
||||
{ 367, PT_SC, ucp_Grantha },
|
||||
{ 375, PT_SC, ucp_Greek },
|
||||
{ 381, PT_SC, ucp_Gujarati },
|
||||
{ 390, PT_SC, ucp_Gurmukhi },
|
||||
{ 399, PT_SC, ucp_Han },
|
||||
{ 403, PT_SC, ucp_Hangul },
|
||||
{ 410, PT_SC, ucp_Hanunoo },
|
||||
{ 418, PT_SC, ucp_Hatran },
|
||||
{ 425, PT_SC, ucp_Hebrew },
|
||||
{ 432, PT_SC, ucp_Hiragana },
|
||||
{ 441, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 458, PT_SC, ucp_Inherited },
|
||||
{ 468, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 490, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 513, PT_SC, ucp_Javanese },
|
||||
{ 522, PT_SC, ucp_Kaithi },
|
||||
{ 529, PT_SC, ucp_Kannada },
|
||||
{ 537, PT_SC, ucp_Katakana },
|
||||
{ 546, PT_SC, ucp_Kayah_Li },
|
||||
{ 555, PT_SC, ucp_Kharoshthi },
|
||||
{ 566, PT_SC, ucp_Khmer },
|
||||
{ 572, PT_SC, ucp_Khojki },
|
||||
{ 579, PT_SC, ucp_Khudawadi },
|
||||
{ 589, PT_GC, ucp_L },
|
||||
{ 591, PT_LAMP, 0 },
|
||||
{ 594, PT_SC, ucp_Lao },
|
||||
{ 598, PT_SC, ucp_Latin },
|
||||
{ 604, PT_SC, ucp_Lepcha },
|
||||
{ 611, PT_SC, ucp_Limbu },
|
||||
{ 617, PT_SC, ucp_Linear_A },
|
||||
{ 626, PT_SC, ucp_Linear_B },
|
||||
{ 635, PT_SC, ucp_Lisu },
|
||||
{ 640, PT_PC, ucp_Ll },
|
||||
{ 643, PT_PC, ucp_Lm },
|
||||
{ 646, PT_PC, ucp_Lo },
|
||||
{ 649, PT_PC, ucp_Lt },
|
||||
{ 652, PT_PC, ucp_Lu },
|
||||
{ 655, PT_SC, ucp_Lycian },
|
||||
{ 662, PT_SC, ucp_Lydian },
|
||||
{ 669, PT_GC, ucp_M },
|
||||
{ 671, PT_SC, ucp_Mahajani },
|
||||
{ 680, PT_SC, ucp_Malayalam },
|
||||
{ 690, PT_SC, ucp_Mandaic },
|
||||
{ 698, PT_SC, ucp_Manichaean },
|
||||
{ 709, PT_SC, ucp_Marchen },
|
||||
{ 717, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 731, PT_PC, ucp_Mc },
|
||||
{ 734, PT_PC, ucp_Me },
|
||||
{ 737, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 750, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 764, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 781, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 802, PT_SC, ucp_Miao },
|
||||
{ 807, PT_PC, ucp_Mn },
|
||||
{ 810, PT_SC, ucp_Modi },
|
||||
{ 815, PT_SC, ucp_Mongolian },
|
||||
{ 825, PT_SC, ucp_Mro },
|
||||
{ 829, PT_SC, ucp_Multani },
|
||||
{ 837, PT_SC, ucp_Myanmar },
|
||||
{ 845, PT_GC, ucp_N },
|
||||
{ 847, PT_SC, ucp_Nabataean },
|
||||
{ 857, PT_PC, ucp_Nd },
|
||||
{ 860, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 872, PT_SC, ucp_Newa },
|
||||
{ 877, PT_SC, ucp_Nko },
|
||||
{ 881, PT_PC, ucp_Nl },
|
||||
{ 884, PT_PC, ucp_No },
|
||||
{ 887, PT_SC, ucp_Nushu },
|
||||
{ 893, PT_SC, ucp_Ogham },
|
||||
{ 899, PT_SC, ucp_Ol_Chiki },
|
||||
{ 908, PT_SC, ucp_Old_Hungarian },
|
||||
{ 922, PT_SC, ucp_Old_Italic },
|
||||
{ 933, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 951, PT_SC, ucp_Old_Permic },
|
||||
{ 962, PT_SC, ucp_Old_Persian },
|
||||
{ 974, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 992, PT_SC, ucp_Old_Turkic },
|
||||
{ 1003, PT_SC, ucp_Oriya },
|
||||
{ 1009, PT_SC, ucp_Osage },
|
||||
{ 1015, PT_SC, ucp_Osmanya },
|
||||
{ 1023, PT_GC, ucp_P },
|
||||
{ 1025, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1038, PT_SC, ucp_Palmyrene },
|
||||
{ 1048, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1060, PT_PC, ucp_Pc },
|
||||
{ 1063, PT_PC, ucp_Pd },
|
||||
{ 1066, PT_PC, ucp_Pe },
|
||||
{ 1069, PT_PC, ucp_Pf },
|
||||
{ 1072, PT_SC, ucp_Phags_Pa },
|
||||
{ 1081, PT_SC, ucp_Phoenician },
|
||||
{ 1092, PT_PC, ucp_Pi },
|
||||
{ 1095, PT_PC, ucp_Po },
|
||||
{ 1098, PT_PC, ucp_Ps },
|
||||
{ 1101, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1117, PT_SC, ucp_Rejang },
|
||||
{ 1124, PT_SC, ucp_Runic },
|
||||
{ 1130, PT_GC, ucp_S },
|
||||
{ 1132, PT_SC, ucp_Samaritan },
|
||||
{ 1142, PT_SC, ucp_Saurashtra },
|
||||
{ 1153, PT_PC, ucp_Sc },
|
||||
{ 1156, PT_SC, ucp_Sharada },
|
||||
{ 1164, PT_SC, ucp_Shavian },
|
||||
{ 1172, PT_SC, ucp_Siddham },
|
||||
{ 1180, PT_SC, ucp_SignWriting },
|
||||
{ 1192, PT_SC, ucp_Sinhala },
|
||||
{ 1200, PT_PC, ucp_Sk },
|
||||
{ 1203, PT_PC, ucp_Sm },
|
||||
{ 1206, PT_PC, ucp_So },
|
||||
{ 1209, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1222, PT_SC, ucp_Soyombo },
|
||||
{ 1230, PT_SC, ucp_Sundanese },
|
||||
{ 1240, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1253, PT_SC, ucp_Syriac },
|
||||
{ 1260, PT_SC, ucp_Tagalog },
|
||||
{ 1268, PT_SC, ucp_Tagbanwa },
|
||||
{ 1277, PT_SC, ucp_Tai_Le },
|
||||
{ 1284, PT_SC, ucp_Tai_Tham },
|
||||
{ 1293, PT_SC, ucp_Tai_Viet },
|
||||
{ 1302, PT_SC, ucp_Takri },
|
||||
{ 1308, PT_SC, ucp_Tamil },
|
||||
{ 1314, PT_SC, ucp_Tangut },
|
||||
{ 1321, PT_SC, ucp_Telugu },
|
||||
{ 1328, PT_SC, ucp_Thaana },
|
||||
{ 1335, PT_SC, ucp_Thai },
|
||||
{ 1340, PT_SC, ucp_Tibetan },
|
||||
{ 1348, PT_SC, ucp_Tifinagh },
|
||||
{ 1357, PT_SC, ucp_Tirhuta },
|
||||
{ 1365, PT_SC, ucp_Ugaritic },
|
||||
{ 1374, PT_SC, ucp_Vai },
|
||||
{ 1378, PT_SC, ucp_Warang_Citi },
|
||||
{ 1390, PT_ALNUM, 0 },
|
||||
{ 1394, PT_PXSPACE, 0 },
|
||||
{ 1398, PT_SPACE, 0 },
|
||||
{ 1402, PT_UCNC, 0 },
|
||||
{ 1406, PT_WORD, 0 },
|
||||
{ 1410, PT_SC, ucp_Yi },
|
||||
{ 1413, PT_GC, ucp_Z },
|
||||
{ 1415, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1432, PT_PC, ucp_Zl },
|
||||
{ 1435, PT_PC, ucp_Zp },
|
||||
{ 1438, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
/* End of pcre2_tables.c */
|
||||
4046
ext/pcre/pcre2lib/pcre2_ucd.c
Normal file
4046
ext/pcre/pcre2lib/pcre2_ucd.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,46 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
|
||||
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the UCD access macros. New values that are added for new releases of Unicode
|
||||
@@ -14,7 +51,7 @@ the same as the values that are generated by the maint/MultiStage2.py script,
|
||||
where the equivalent property descriptive names are listed in vectors.
|
||||
|
||||
ALSO: The specific values of the first two enums are assumed for the table
|
||||
called catposstab in pcre_compile.c. */
|
||||
called catposstab in pcre2_compile.c. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
@@ -63,9 +100,7 @@ enum {
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. Note that the code for processing them
|
||||
assumes that the values are less than 16. If more values are added that take
|
||||
the number to 16 or more, the code will have to be rewritten. */
|
||||
/* These are grapheme break properties. */
|
||||
|
||||
enum {
|
||||
ucp_gbCR, /* 0 */
|
||||
@@ -80,7 +115,12 @@ enum {
|
||||
ucp_gbLV, /* 9 Hangul syllable type LV */
|
||||
ucp_gbLVT, /* 10 Hangul syllable type LVT */
|
||||
ucp_gbRegionalIndicator, /* 11 */
|
||||
ucp_gbOther /* 12 */
|
||||
ucp_gbOther, /* 12 */
|
||||
ucp_gbE_Base, /* 13 */
|
||||
ucp_gbE_Modifier, /* 14 */
|
||||
ucp_gbE_Base_GAZ, /* 15 */
|
||||
ucp_gbZWJ, /* 16 */
|
||||
ucp_gbGlue_After_Zwj /* 17 */
|
||||
};
|
||||
|
||||
/* These are the script identifications. */
|
||||
@@ -147,13 +187,13 @@ enum {
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi,
|
||||
/* New for Unicode 5.0: */
|
||||
/* New for Unicode 5.0 */
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Nko,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Phoenician,
|
||||
/* New for Unicode 5.1: */
|
||||
/* New for Unicode 5.1 */
|
||||
ucp_Carian,
|
||||
ucp_Cham,
|
||||
ucp_Kayah_Li,
|
||||
@@ -165,7 +205,7 @@ enum {
|
||||
ucp_Saurashtra,
|
||||
ucp_Sundanese,
|
||||
ucp_Vai,
|
||||
/* New for Unicode 5.2: */
|
||||
/* New for Unicode 5.2 */
|
||||
ucp_Avestan,
|
||||
ucp_Bamum,
|
||||
ucp_Egyptian_Hieroglyphs,
|
||||
@@ -181,11 +221,11 @@ enum {
|
||||
ucp_Samaritan,
|
||||
ucp_Tai_Tham,
|
||||
ucp_Tai_Viet,
|
||||
/* New for Unicode 6.0.0: */
|
||||
/* New for Unicode 6.0.0 */
|
||||
ucp_Batak,
|
||||
ucp_Brahmi,
|
||||
ucp_Mandaic,
|
||||
/* New for Unicode 6.1.0: */
|
||||
/* New for Unicode 6.1.0 */
|
||||
ucp_Chakma,
|
||||
ucp_Meroitic_Cursive,
|
||||
ucp_Meroitic_Hieroglyphs,
|
||||
@@ -193,7 +233,7 @@ enum {
|
||||
ucp_Sharada,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Takri,
|
||||
/* New for Unicode 7.0.0: */
|
||||
/* New for Unicode 7.0.0 */
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
@@ -216,9 +256,27 @@ enum {
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Tirhuta,
|
||||
ucp_Warang_Citi
|
||||
ucp_Warang_Citi,
|
||||
/* New for Unicode 8.0.0 */
|
||||
ucp_Ahom,
|
||||
ucp_Anatolian_Hieroglyphs,
|
||||
ucp_Hatran,
|
||||
ucp_Multani,
|
||||
ucp_Old_Hungarian,
|
||||
ucp_SignWriting,
|
||||
/* New for Unicode 10.0.0 (no update since 8.0.0) */
|
||||
ucp_Adlam,
|
||||
ucp_Bhaiksuki,
|
||||
ucp_Marchen,
|
||||
ucp_Newa,
|
||||
ucp_Osage,
|
||||
ucp_Tangut,
|
||||
ucp_Masaram_Gondi,
|
||||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
||||
/* End of ucp.h */
|
||||
/* End of pcre2_ucp.h */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2013 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,107 +39,131 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
/* This module contains an internal function for validating UTF character
|
||||
strings. This file is also #included by the pcre2test program, which uses
|
||||
macros to change names from _pcre2_xxx to xxxx, thereby avoiding name clashes
|
||||
with the library. In this case, PCRE2_PCRE2TEST is defined. */
|
||||
|
||||
#ifndef PCRE2_PCRE2TEST /* We're compiling the library */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "pcre2_internal.h"
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Dummy function when Unicode is not supported *
|
||||
*************************************************/
|
||||
|
||||
/* This function should never be called when Unicode is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
(void)erroroffset;
|
||||
return 0;
|
||||
}
|
||||
#else /* UTF is supported */
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF-8 string *
|
||||
* Validate a UTF string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
check that a supposed UTF-8 string is actually valid. The early check means
|
||||
check that a supposed UTF string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying an
|
||||
invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked.
|
||||
|
||||
From release 8.13 more information about the details of the error are passed
|
||||
back in the returned value:
|
||||
|
||||
PCRE_UTF8_ERR0 No error
|
||||
PCRE_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE_UTF8_ERR14 3-byte character with value 0xd000-0xdfff is not permitted
|
||||
PCRE_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
PCRE_UTF8_ERR22 Unused (was non-character)
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
length length of string
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: = 0 if the string is a valid UTF-8 string
|
||||
> 0 otherwise, setting the offset of the bad character
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
!= 0 otherwise, setting the offset of the bad character
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
register PCRE_PUCHAR p;
|
||||
PCRE2_SPTR p;
|
||||
uint32_t c;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = (int)(p - string);
|
||||
}
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
||||
/* Originally, this function checked according to RFC 2279, allowing for values
|
||||
in the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were
|
||||
in the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded. However, the format of 5-byte and 6-byte
|
||||
characters is still checked. Error returns are as follows:
|
||||
|
||||
PCRE2_ERROR_UTF8_ERR1 Missing 1 byte at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR2 Missing 2 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR3 Missing 3 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR4 Missing 4 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR5 Missing 5 bytes at the end of the string
|
||||
PCRE2_ERROR_UTF8_ERR6 2nd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR7 3rd-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR8 4th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR9 5th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR10 6th-byte's two top bits are not 0x80
|
||||
PCRE2_ERROR_UTF8_ERR11 5-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR12 6-byte character is not permitted by RFC 3629
|
||||
PCRE2_ERROR_UTF8_ERR13 4-byte character with value > 0x10ffff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR14 3-byte character with value 0xd800-0xdfff is not permitted
|
||||
PCRE2_ERROR_UTF8_ERR15 Overlong 2-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR16 Overlong 3-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR17 Overlong 4-byte sequence
|
||||
PCRE2_ERROR_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
|
||||
PCRE2_ERROR_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
|
||||
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
register pcre_uchar ab, c, d;
|
||||
uint32_t ab, d;
|
||||
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if (c < 128) continue; /* ASCII character */
|
||||
|
||||
if (c < 0xc0) /* Isolated 10xx xxxx byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR20;
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR20;
|
||||
}
|
||||
|
||||
if (c >= 0xfe) /* Invalid 0xfe or 0xff bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
return PCRE_UTF8_ERR21;
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
return PCRE2_ERROR_UTF8_ERR21;
|
||||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab)
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string); /* Missing bytes */
|
||||
return ab - length; /* Codes ERR1 to ERR5 */
|
||||
*erroroffset = (PCRE2_SIZE)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
||||
@@ -147,7 +172,7 @@ for (p = string; length-- > 0; p++)
|
||||
if (((d = *(++p)) & 0xc0) != 0x80)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR6;
|
||||
return PCRE2_ERROR_UTF8_ERR6;
|
||||
}
|
||||
|
||||
/* For each length, check that the remaining bytes start with the 0x80 bit
|
||||
@@ -162,7 +187,7 @@ for (p = string; length-- > 0; p++)
|
||||
case 1: if ((c & 0x3e) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 1;
|
||||
return PCRE_UTF8_ERR15;
|
||||
return PCRE2_ERROR_UTF8_ERR15;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -174,17 +199,17 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if (c == 0xe0 && (d & 0x20) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR16;
|
||||
return PCRE2_ERROR_UTF8_ERR16;
|
||||
}
|
||||
if (c == 0xed && d >= 0xa0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR14;
|
||||
return PCRE2_ERROR_UTF8_ERR14;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -196,22 +221,22 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if (c == 0xf0 && (d & 0x30) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR17;
|
||||
return PCRE2_ERROR_UTF8_ERR17;
|
||||
}
|
||||
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR13;
|
||||
return PCRE2_ERROR_UTF8_ERR13;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -227,22 +252,22 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if (c == 0xf8 && (d & 0x38) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR18;
|
||||
return PCRE2_ERROR_UTF8_ERR18;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -253,27 +278,27 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 2;
|
||||
return PCRE_UTF8_ERR7;
|
||||
return PCRE2_ERROR_UTF8_ERR7;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 3;
|
||||
return PCRE_UTF8_ERR8;
|
||||
return PCRE2_ERROR_UTF8_ERR8;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 4;
|
||||
return PCRE_UTF8_ERR9;
|
||||
return PCRE2_ERROR_UTF8_ERR9;
|
||||
}
|
||||
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR10;
|
||||
return PCRE2_ERROR_UTF8_ERR10;
|
||||
}
|
||||
if (c == 0xfc && (d & 0x3c) == 0)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - 5;
|
||||
return PCRE_UTF8_ERR19;
|
||||
return PCRE2_ERROR_UTF8_ERR19;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -285,17 +310,89 @@ for (p = string; length-- > 0; p++)
|
||||
if (ab > 3)
|
||||
{
|
||||
*erroroffset = (int)(p - string) - ab;
|
||||
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
|
||||
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
#else /* Not SUPPORT_UTF */
|
||||
(void)(string); /* Keep picky compilers happy */
|
||||
(void)(length);
|
||||
(void)(erroroffset);
|
||||
#endif
|
||||
|
||||
return PCRE_UTF8_ERR0; /* This indicates success */
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
PCRE2_ERROR_UTF16_ERR2 Invalid low surrogate
|
||||
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; p++)
|
||||
{
|
||||
c = *p;
|
||||
length--;
|
||||
|
||||
if ((c & 0xf800) != 0xd800)
|
||||
{
|
||||
/* Normal UTF-16 code point. Neither high nor low surrogate. */
|
||||
}
|
||||
else if ((c & 0x0400) == 0)
|
||||
{
|
||||
/* High surrogate. Must be a followed by a low surrogate. */
|
||||
if (length == 0)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR1;
|
||||
}
|
||||
p++;
|
||||
length--;
|
||||
if ((*p & 0xfc00) != 0xdc00)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Isolated low surrogate. Always an error. */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF16_ERR3;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
|
||||
|
||||
/* ----------------- Check a UTF-32 string ----------------- */
|
||||
|
||||
#else
|
||||
|
||||
/* There is very little to do for a UTF-32 string.
|
||||
PCRE2_ERROR_UTF32_ERR1 Surrogate character
|
||||
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
|
||||
*/
|
||||
|
||||
for (p = string; length > 0; length--, p++)
|
||||
{
|
||||
c = *p;
|
||||
if ((c & 0xfffff800u) != 0xd800u)
|
||||
{
|
||||
/* Normal UTF-32 code point. Neither high nor low surrogate. */
|
||||
if (c > 0x10ffffu)
|
||||
{
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A surrogate */
|
||||
*erroroffset = p - string;
|
||||
return PCRE2_ERROR_UTF32_ERR1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* End of pcre_valid_utf8.c */
|
||||
/* End of pcre2_valid_utf.c */
|
||||
@@ -6,7 +6,8 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2013 University of Cambridge
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,46 +38,46 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class. It is used by both pcre_exec() and pcre_def_exec(). */
|
||||
class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
|
||||
pcre2_def_match(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255 and/or Unicode properties.
|
||||
might contain codepoints above 255 and/or Unicode properties.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
data points to the flag code unit of the XCLASS data
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
|
||||
PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
|
||||
{
|
||||
pcre_uchar t;
|
||||
PCRE2_UCHAR t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
(void)utf;
|
||||
#ifdef COMPILE_PCRE8
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
|
||||
utf = TRUE;
|
||||
#endif
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
/* Code points < 256 are matched against a bitmap, if one is present. If not,
|
||||
we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
@@ -84,37 +85,37 @@ if (c < 256)
|
||||
if ((*data & XCL_HASPROP) == 0)
|
||||
{
|
||||
if ((*data & XCL_MAP) == 0) return negated;
|
||||
return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
|
||||
return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
|
||||
}
|
||||
if ((*data & XCL_MAP) != 0 &&
|
||||
(((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||
(((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
pcre_uint32 x, y;
|
||||
uint32_t x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x = *data++;
|
||||
x = *data++;
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
GETCHARINC(x, data); /* macro generates multiple statements */
|
||||
@@ -129,7 +130,7 @@ while ((t = *data++) != XCL_END)
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
#ifdef SUPPORT_UNICODE
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
@@ -259,10 +260,12 @@ while ((t = *data++) != XCL_END)
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcre_xclass.c */
|
||||
/* End of pcre2_xclass.c */
|
||||
@@ -124,10 +124,10 @@
|
||||
/* SLJIT_REWRITABLE_JUMP is 0x1000. */
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
# define PATCH_MB 0x4
|
||||
# define PATCH_MW 0x8
|
||||
# define PATCH_MB 0x4
|
||||
# define PATCH_MW 0x8
|
||||
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||
# define PATCH_MD 0x10
|
||||
# define PATCH_MD 0x10
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1561,6 +1561,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
|
||||
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
|
||||
if (src != SLJIT_IMM) {
|
||||
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src));
|
||||
CHECK_ARGUMENT(srcw == 0);
|
||||
}
|
||||
|
||||
if ((type & 0xff) <= SLJIT_NOT_ZERO)
|
||||
@@ -1586,6 +1587,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
|
||||
|
||||
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
|
||||
{
|
||||
/* Any offset is allowed. */
|
||||
SLJIT_UNUSED_ARG(offset);
|
||||
|
||||
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
|
||||
@@ -1225,7 +1225,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
|
||||
sljit_s32 dst_reg,
|
||||
sljit_s32 src, sljit_sw srcw);
|
||||
|
||||
/* Copies the base address of SLJIT_SP + offset to dst.
|
||||
/* Copies the base address of SLJIT_SP + offset to dst. The offset can be
|
||||
anything to negate the effect of relative addressing. For example if an
|
||||
array of sljit_sw values is stored on the stack from offset 0x40, and R0
|
||||
contains the offset of an array item plus 0x120, this item can be
|
||||
overwritten by two SLJIT instructions:
|
||||
|
||||
sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120);
|
||||
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5);
|
||||
|
||||
Flags: - (may destroy flags) */
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
|
||||
|
||||
@@ -498,12 +498,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
|
||||
{
|
||||
sljit_sw fir = 0;
|
||||
|
||||
switch (feature_type) {
|
||||
case SLJIT_HAS_FPU:
|
||||
#ifdef SLJIT_IS_FPU_AVAILABLE
|
||||
return SLJIT_IS_FPU_AVAILABLE;
|
||||
#elif defined(__GNUC__)
|
||||
sljit_sw fir;
|
||||
asm ("cfc1 %0, $0" : "=r"(fir));
|
||||
return (fir >> 22) & 0x1;
|
||||
#else
|
||||
@@ -517,7 +518,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
|
||||
#endif
|
||||
|
||||
default:
|
||||
return 0;
|
||||
return fir;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -760,7 +760,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
|
||||
(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
|
||||
#endif
|
||||
|
||||
static const sljit_ins data_transfer_insts[64 + 8] = {
|
||||
static const sljit_ins data_transfer_insts[64 + 16] = {
|
||||
|
||||
/* -------- Unsigned -------- */
|
||||
|
||||
@@ -869,11 +869,20 @@ static const sljit_ins data_transfer_insts[64 + 8] = {
|
||||
/* d n x s */ HI(31) | LO(727) /* stfdx */,
|
||||
/* d n x l */ HI(31) | LO(599) /* lfdx */,
|
||||
|
||||
/* d w i s */ HI(55) /* stfdu */,
|
||||
/* d w i l */ HI(51) /* lfdu */,
|
||||
/* d w x s */ HI(31) | LO(759) /* stfdux */,
|
||||
/* d w x l */ HI(31) | LO(631) /* lfdux */,
|
||||
|
||||
/* s n i s */ HI(52) /* stfs */,
|
||||
/* s n i l */ HI(48) /* lfs */,
|
||||
/* s n x s */ HI(31) | LO(663) /* stfsx */,
|
||||
/* s n x l */ HI(31) | LO(535) /* lfsx */,
|
||||
|
||||
/* s w i s */ HI(53) /* stfsu */,
|
||||
/* s w i l */ HI(49) /* lfsu */,
|
||||
/* s w x s */ HI(31) | LO(695) /* stfsux */,
|
||||
/* s w x l */ HI(31) | LO(567) /* lfsux */,
|
||||
};
|
||||
|
||||
#undef ARCH_32_64
|
||||
@@ -1753,7 +1762,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
|
||||
/* Floating point operators */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
|
||||
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 5))
|
||||
#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
|
||||
|
||||
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
|
||||
@@ -2282,16 +2291,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
|
||||
FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
|
||||
|
||||
invert = 0;
|
||||
cr_bit = 0;
|
||||
|
||||
switch (type & 0xff) {
|
||||
case SLJIT_LESS:
|
||||
case SLJIT_SIG_LESS:
|
||||
cr_bit = 0;
|
||||
break;
|
||||
|
||||
case SLJIT_GREATER_EQUAL:
|
||||
case SLJIT_SIG_GREATER_EQUAL:
|
||||
cr_bit = 0;
|
||||
invert = 1;
|
||||
break;
|
||||
|
||||
421
ext/pcre/pcre2lib/sljit/sljitProtExecAllocator.c
Normal file
421
ext/pcre/pcre2lib/sljit/sljitProtExecAllocator.c
Normal file
@@ -0,0 +1,421 @@
|
||||
/*
|
||||
* Stack-less Just-In-Time compiler
|
||||
*
|
||||
* Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification, are
|
||||
* permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
* conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
* of conditions and the following disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
||||
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
||||
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This file contains a simple executable memory allocator
|
||||
|
||||
It is assumed, that executable code blocks are usually medium (or sometimes
|
||||
large) memory blocks, and the allocator is not too frequently called (less
|
||||
optimized than other allocators). Thus, using it as a generic allocator is
|
||||
not suggested.
|
||||
|
||||
How does it work:
|
||||
Memory is allocated in continuous memory areas called chunks by alloc_chunk()
|
||||
Chunk format:
|
||||
[ block ][ block ] ... [ block ][ block terminator ]
|
||||
|
||||
All blocks and the block terminator is started with block_header. The block
|
||||
header contains the size of the previous and the next block. These sizes
|
||||
can also contain special values.
|
||||
Block size:
|
||||
0 - The block is a free_block, with a different size member.
|
||||
1 - The block is a block terminator.
|
||||
n - The block is used at the moment, and the value contains its size.
|
||||
Previous block size:
|
||||
0 - This is the first block of the memory chunk.
|
||||
n - The size of the previous block.
|
||||
|
||||
Using these size values we can go forward or backward on the block chain.
|
||||
The unused blocks are stored in a chain list pointed by free_blocks. This
|
||||
list is useful if we need to find a suitable memory area when the allocator
|
||||
is called.
|
||||
|
||||
When a block is freed, the new free block is connected to its adjacent free
|
||||
blocks if possible.
|
||||
|
||||
[ free block ][ used block ][ free block ]
|
||||
and "used block" is freed, the three blocks are connected together:
|
||||
[ one big free block ]
|
||||
*/
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* System (OS) functions */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
/* 64 KByte. */
|
||||
#define CHUNK_SIZE 0x10000
|
||||
|
||||
struct chunk_header {
|
||||
void *executable;
|
||||
int fd;
|
||||
};
|
||||
|
||||
/*
|
||||
alloc_chunk / free_chunk :
|
||||
* allocate executable system memory chunks
|
||||
* the size is always divisible by CHUNK_SIZE
|
||||
allocator_grab_lock / allocator_release_lock :
|
||||
* make the allocator thread safe
|
||||
* can be empty if the OS (or the application) does not support threading
|
||||
* only the allocator requires this lock, sljit is fully thread safe
|
||||
as it only uses local variables
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
#ifndef O_NOATIME
|
||||
#define O_NOATIME 0
|
||||
#endif
|
||||
|
||||
#ifdef __O_TMPFILE
|
||||
#ifndef O_TMPFILE
|
||||
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int mkostemp(char *template, int flags);
|
||||
char *secure_getenv(const char *name);
|
||||
|
||||
static SLJIT_INLINE int create_tempfile(void)
|
||||
{
|
||||
int fd;
|
||||
|
||||
char tmp_name[256];
|
||||
size_t tmp_name_len;
|
||||
char *dir;
|
||||
size_t len;
|
||||
|
||||
#ifdef P_tmpdir
|
||||
len = (P_tmpdir != NULL) ? strlen(P_tmpdir) : 0;
|
||||
|
||||
if (len > 0 && len < sizeof(tmp_name)) {
|
||||
strcpy(tmp_name, P_tmpdir);
|
||||
tmp_name_len = len;
|
||||
}
|
||||
else {
|
||||
strcpy(tmp_name, "/tmp");
|
||||
tmp_name_len = 4;
|
||||
}
|
||||
#else
|
||||
strcpy(tmp_name, "/tmp");
|
||||
tmp_name_len = 4;
|
||||
#endif
|
||||
|
||||
dir = secure_getenv("TMPDIR");
|
||||
if (dir) {
|
||||
len = strlen(dir);
|
||||
if (len > 0 && len < sizeof(tmp_name)) {
|
||||
strcpy(tmp_name, dir);
|
||||
tmp_name_len = len;
|
||||
}
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(tmp_name_len > 0 && tmp_name_len < sizeof(tmp_name));
|
||||
|
||||
while (tmp_name_len > 0 && tmp_name[tmp_name_len - 1] == '/') {
|
||||
tmp_name_len--;
|
||||
tmp_name[tmp_name_len] = '\0';
|
||||
}
|
||||
|
||||
#ifdef O_TMPFILE
|
||||
fd = open(tmp_name, O_TMPFILE | O_EXCL | O_RDWR | O_NOATIME | O_CLOEXEC, S_IRUSR | S_IWUSR);
|
||||
if (fd != -1)
|
||||
return fd;
|
||||
#endif
|
||||
|
||||
if (tmp_name_len + 7 >= sizeof(tmp_name))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
strcpy(tmp_name + tmp_name_len, "/XXXXXX");
|
||||
fd = mkostemp(tmp_name, O_CLOEXEC | O_NOATIME);
|
||||
|
||||
if (fd == -1)
|
||||
return fd;
|
||||
|
||||
if (unlink(tmp_name)) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
|
||||
{
|
||||
struct chunk_header *retval;
|
||||
int fd;
|
||||
|
||||
fd = create_tempfile();
|
||||
if (fd == -1)
|
||||
return NULL;
|
||||
|
||||
if (ftruncate(fd, size)) {
|
||||
close(fd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retval = (struct chunk_header *)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
|
||||
if (retval == MAP_FAILED) {
|
||||
close(fd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retval->executable = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
||||
|
||||
if (retval->executable == MAP_FAILED) {
|
||||
munmap(retval, size);
|
||||
close(fd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
retval->fd = fd;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
|
||||
{
|
||||
struct chunk_header *header = ((struct chunk_header *)chunk) - 1;
|
||||
|
||||
int fd = header->fd;
|
||||
munmap(header->executable, size);
|
||||
munmap(header, size);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
/* Common functions */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#define CHUNK_MASK (~(CHUNK_SIZE - 1))
|
||||
|
||||
struct block_header {
|
||||
sljit_uw size;
|
||||
sljit_uw prev_size;
|
||||
sljit_sw executable_offset;
|
||||
};
|
||||
|
||||
struct free_block {
|
||||
struct block_header header;
|
||||
struct free_block *next;
|
||||
struct free_block *prev;
|
||||
sljit_uw size;
|
||||
};
|
||||
|
||||
#define AS_BLOCK_HEADER(base, offset) \
|
||||
((struct block_header*)(((sljit_u8*)base) + offset))
|
||||
#define AS_FREE_BLOCK(base, offset) \
|
||||
((struct free_block*)(((sljit_u8*)base) + offset))
|
||||
#define MEM_START(base) ((void*)((base) + 1))
|
||||
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7)
|
||||
|
||||
static struct free_block* free_blocks;
|
||||
static sljit_uw allocated_size;
|
||||
static sljit_uw total_size;
|
||||
|
||||
static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size)
|
||||
{
|
||||
free_block->header.size = 0;
|
||||
free_block->size = size;
|
||||
|
||||
free_block->next = free_blocks;
|
||||
free_block->prev = NULL;
|
||||
if (free_blocks)
|
||||
free_blocks->prev = free_block;
|
||||
free_blocks = free_block;
|
||||
}
|
||||
|
||||
static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
|
||||
{
|
||||
if (free_block->next)
|
||||
free_block->next->prev = free_block->prev;
|
||||
|
||||
if (free_block->prev)
|
||||
free_block->prev->next = free_block->next;
|
||||
else {
|
||||
SLJIT_ASSERT(free_blocks == free_block);
|
||||
free_blocks = free_block->next;
|
||||
}
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
|
||||
{
|
||||
struct chunk_header *chunk_header;
|
||||
struct block_header *header;
|
||||
struct block_header *next_header;
|
||||
struct free_block *free_block;
|
||||
sljit_uw chunk_size;
|
||||
sljit_sw executable_offset;
|
||||
|
||||
allocator_grab_lock();
|
||||
if (size < (64 - sizeof(struct block_header)))
|
||||
size = (64 - sizeof(struct block_header));
|
||||
size = ALIGN_SIZE(size);
|
||||
|
||||
free_block = free_blocks;
|
||||
while (free_block) {
|
||||
if (free_block->size >= size) {
|
||||
chunk_size = free_block->size;
|
||||
if (chunk_size > size + 64) {
|
||||
/* We just cut a block from the end of the free block. */
|
||||
chunk_size -= size;
|
||||
free_block->size = chunk_size;
|
||||
header = AS_BLOCK_HEADER(free_block, chunk_size);
|
||||
header->prev_size = chunk_size;
|
||||
header->executable_offset = free_block->header.executable_offset;
|
||||
AS_BLOCK_HEADER(header, size)->prev_size = size;
|
||||
}
|
||||
else {
|
||||
sljit_remove_free_block(free_block);
|
||||
header = (struct block_header*)free_block;
|
||||
size = chunk_size;
|
||||
}
|
||||
allocated_size += size;
|
||||
header->size = size;
|
||||
allocator_release_lock();
|
||||
return MEM_START(header);
|
||||
}
|
||||
free_block = free_block->next;
|
||||
}
|
||||
|
||||
chunk_size = sizeof(struct chunk_header) + sizeof(struct block_header);
|
||||
chunk_size = (chunk_size + size + CHUNK_SIZE - 1) & CHUNK_MASK;
|
||||
|
||||
chunk_header = alloc_chunk(chunk_size);
|
||||
if (!chunk_header) {
|
||||
allocator_release_lock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
executable_offset = (sljit_sw)((sljit_u8*)chunk_header->executable - (sljit_u8*)chunk_header);
|
||||
|
||||
chunk_size -= sizeof(struct chunk_header) + sizeof(struct block_header);
|
||||
total_size += chunk_size;
|
||||
|
||||
header = (struct block_header *)(chunk_header + 1);
|
||||
|
||||
header->prev_size = 0;
|
||||
header->executable_offset = executable_offset;
|
||||
if (chunk_size > size + 64) {
|
||||
/* Cut the allocated space into a free and a used block. */
|
||||
allocated_size += size;
|
||||
header->size = size;
|
||||
chunk_size -= size;
|
||||
|
||||
free_block = AS_FREE_BLOCK(header, size);
|
||||
free_block->header.prev_size = size;
|
||||
free_block->header.executable_offset = executable_offset;
|
||||
sljit_insert_free_block(free_block, chunk_size);
|
||||
next_header = AS_BLOCK_HEADER(free_block, chunk_size);
|
||||
}
|
||||
else {
|
||||
/* All space belongs to this allocation. */
|
||||
allocated_size += chunk_size;
|
||||
header->size = chunk_size;
|
||||
next_header = AS_BLOCK_HEADER(header, chunk_size);
|
||||
}
|
||||
next_header->size = 1;
|
||||
next_header->prev_size = chunk_size;
|
||||
next_header->executable_offset = executable_offset;
|
||||
allocator_release_lock();
|
||||
return MEM_START(header);
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
|
||||
{
|
||||
struct block_header *header;
|
||||
struct free_block* free_block;
|
||||
|
||||
allocator_grab_lock();
|
||||
header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
|
||||
header = AS_BLOCK_HEADER(header, -header->executable_offset);
|
||||
allocated_size -= header->size;
|
||||
|
||||
/* Connecting free blocks together if possible. */
|
||||
|
||||
/* If header->prev_size == 0, free_block will equal to header.
|
||||
In this case, free_block->header.size will be > 0. */
|
||||
free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size);
|
||||
if (SLJIT_UNLIKELY(!free_block->header.size)) {
|
||||
free_block->size += header->size;
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
header->prev_size = free_block->size;
|
||||
}
|
||||
else {
|
||||
free_block = (struct free_block*)header;
|
||||
sljit_insert_free_block(free_block, header->size);
|
||||
}
|
||||
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
if (SLJIT_UNLIKELY(!header->size)) {
|
||||
free_block->size += ((struct free_block*)header)->size;
|
||||
sljit_remove_free_block((struct free_block*)header);
|
||||
header = AS_BLOCK_HEADER(free_block, free_block->size);
|
||||
header->prev_size = free_block->size;
|
||||
}
|
||||
|
||||
/* The whole chunk is free. */
|
||||
if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) {
|
||||
/* If this block is freed, we still have (allocated_size / 2) free space. */
|
||||
if (total_size - free_block->size > (allocated_size * 3 / 2)) {
|
||||
total_size -= free_block->size;
|
||||
sljit_remove_free_block(free_block);
|
||||
free_chunk(free_block, free_block->size + sizeof(struct block_header));
|
||||
}
|
||||
}
|
||||
|
||||
allocator_release_lock();
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
|
||||
{
|
||||
struct free_block* free_block;
|
||||
struct free_block* next_free_block;
|
||||
|
||||
allocator_grab_lock();
|
||||
|
||||
free_block = free_blocks;
|
||||
while (free_block) {
|
||||
next_free_block = free_block->next;
|
||||
if (!free_block->header.prev_size &&
|
||||
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
|
||||
total_size -= free_block->size;
|
||||
sljit_remove_free_block(free_block);
|
||||
free_chunk(free_block, free_block->size + sizeof(struct block_header));
|
||||
}
|
||||
free_block = next_free_block;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
|
||||
allocator_release_lock();
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
|
||||
{
|
||||
return ((struct block_header *)(ptr))[-1].executable_offset;
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
THE MAIN PCRE LIBRARY
|
||||
---------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||
-------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER LIBRARY
|
||||
-----------------------
|
||||
|
||||
Written by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2012 Google Inc
|
||||
All rights reserved
|
||||
|
||||
####
|
||||
@@ -1,5 +0,0 @@
|
||||
PCRE LICENCE
|
||||
|
||||
Please see the file LICENCE in the PCRE distribution for licensing details.
|
||||
|
||||
End
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,528 +0,0 @@
|
||||
Technical Notes about PCRE
|
||||
--------------------------
|
||||
|
||||
These are very rough technical notes that record potentially useful information
|
||||
about PCRE internals. For information about testing PCRE, see the pcretest
|
||||
documentation and the comment at the head of the RunTest file.
|
||||
|
||||
|
||||
Historical note 1
|
||||
-----------------
|
||||
|
||||
Many years ago I implemented some regular expression functions to an algorithm
|
||||
suggested by Martin Richards. These were not Unix-like in form, and were quite
|
||||
restricted in what they could do by comparison with Perl. The interesting part
|
||||
about the algorithm was that the amount of space required to hold the compiled
|
||||
form of an expression was known in advance. The code to apply an expression did
|
||||
not operate by backtracking, as the original Henry Spencer code and current
|
||||
Perl code does, but instead checked all possibilities simultaneously by keeping
|
||||
a list of current states and checking all of them as it advanced through the
|
||||
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
|
||||
algorithm", though it was not a traditional Finite State Machine (FSM). When
|
||||
the pattern was all used up, all remaining states were possible matches, and
|
||||
the one matching the longest subset of the subject string was chosen. This did
|
||||
not necessarily maximize the individual wild portions of the pattern, as is
|
||||
expected in Unix and Perl-style regular expressions.
|
||||
|
||||
|
||||
Historical note 2
|
||||
-----------------
|
||||
|
||||
By contrast, the code originally written by Henry Spencer (which was
|
||||
subsequently heavily modified for Perl) compiles the expression twice: once in
|
||||
a dummy mode in order to find out how much store will be needed, and then for
|
||||
real. (The Perl version probably doesn't do this any more; I'm talking about
|
||||
the original library.) The execution function operates by backtracking and
|
||||
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
|
||||
matches individual wild portions of the pattern. This is an "NFA algorithm" in
|
||||
Friedl's terminology.
|
||||
|
||||
|
||||
OK, here's the real stuff
|
||||
-------------------------
|
||||
|
||||
For the set of functions that form the "basic" PCRE library (which are
|
||||
unrelated to those mentioned above), I tried at first to invent an algorithm
|
||||
that used an amount of store bounded by a multiple of the number of characters
|
||||
in the pattern, to save on compiling time. However, because of the greater
|
||||
complexity in Perl regular expressions, I couldn't do this. In any case, a
|
||||
first pass through the pattern is helpful for other reasons.
|
||||
|
||||
|
||||
Support for 16-bit and 32-bit data strings
|
||||
-------------------------------------------
|
||||
|
||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||
different libraries. In the description that follows, the word "short" is used
|
||||
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||
given in 8-bit form only.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
--------------------------------------------
|
||||
|
||||
Up to and including release 6.7, PCRE worked by running a very degenerate first
|
||||
pass to calculate a maximum store size, and then a second pass to do the real
|
||||
compile - which might use a bit less than the predicted amount of memory. The
|
||||
idea was that this would turn out faster than the Henry Spencer code because
|
||||
the first pass is degenerate and the second pass can just store stuff straight
|
||||
into the vector, which it knows is big enough.
|
||||
|
||||
|
||||
Computing the memory requirement: how it is
|
||||
-------------------------------------------
|
||||
|
||||
By the time I was working on a potential 6.8 release, the degenerate first pass
|
||||
had become very complicated and hard to maintain. Indeed one of the early
|
||||
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
|
||||
I had a flash of inspiration as to how I could run the real compile function in
|
||||
a "fake" mode that enables it to compute how much memory it would need, while
|
||||
actually only ever using a few hundred bytes of working memory, and without too
|
||||
many tests of the mode that might slow it down. So I refactored the compiling
|
||||
functions to work this way. This got rid of about 600 lines of source. It
|
||||
should make future maintenance and development easier. As this was such a major
|
||||
change, I never released 6.8, instead upping the number to 7.0 (other quite
|
||||
major changes were also present in the 7.0 release).
|
||||
|
||||
A side effect of this work was that the previous limit of 200 on the nesting
|
||||
depth of parentheses was removed. However, there is a downside: pcre_compile()
|
||||
runs more slowly than before (30% or more, depending on the pattern) because it
|
||||
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||
big issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by pcre_compile(). This is a safety feature for environments
|
||||
with small stacks where the patterns are provided by users.
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
|
||||
The "traditional", and original, matching function is called pcre_exec(), and
|
||||
it implements an NFA algorithm, similar to the original Henry Spencer algorithm
|
||||
and the way that Perl works. This is not surprising, since it is intended to be
|
||||
as compatible with Perl as possible. This is the function most users of PCRE
|
||||
will use most of the time. From release 8.20, if PCRE is compiled with
|
||||
just-in-time (JIT) support, and studying a compiled pattern with JIT is
|
||||
successful, the JIT code is run instead of the normal pcre_exec() code, but the
|
||||
result is the same.
|
||||
|
||||
|
||||
Supplementary matching function
|
||||
-------------------------------
|
||||
|
||||
From PCRE 6.0, there is also a supplementary matching function called
|
||||
pcre_dfa_exec(). This implements a DFA matching algorithm that searches
|
||||
simultaneously for all possible matches that start at one point in the subject
|
||||
string. (Going back to my roots: see Historical Note 1 above.) This function
|
||||
intreprets the same compiled pattern data as pcre_exec(); however, not all the
|
||||
facilities are available, and those that are do not always work in quite the
|
||||
same way. See the user documentation for details.
|
||||
|
||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||
is handled entirely at compile time by generating different opcodes for the
|
||||
different settings. The runtime functions do not need to keep track of an
|
||||
options state any more.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||
variable length. The first unit in an item contains an opcode, and the length
|
||||
of the item is either implicit in the opcode or contained in the data that
|
||||
follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
||||
4-byte values for these offsets, although this impairs the performance. (3-byte
|
||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||
than 64K are going to be processed. In this description, we assume the "normal"
|
||||
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||
and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
|
||||
These items are all just one unit long
|
||||
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
OP_CIRC ^ (start of data)
|
||||
OP_CIRCM ^ multiline mode (start of data or after newline)
|
||||
OP_NOT_WORD_BOUNDARY \W
|
||||
OP_WORD_BOUNDARY \w
|
||||
OP_NOT_DIGIT \D
|
||||
OP_DIGIT \d
|
||||
OP_NOT_HSPACE \H
|
||||
OP_HSPACE \h
|
||||
OP_NOT_WHITESPACE \S
|
||||
OP_WHITESPACE \s
|
||||
OP_NOT_VSPACE \V
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match.
|
||||
|
||||
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||
following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
---------------------------
|
||||
|
||||
The OP_CHAR opcode is followed by a single character that is to be matched
|
||||
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||
the character may be more than one unit long. In UTF-32 mode, characters
|
||||
are always exactly one unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
|
||||
The common repeats (*, +, ?), when applied to a single character, use the
|
||||
following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Caseful Caseless
|
||||
OP_STAR OP_STARI
|
||||
OP_MINSTAR OP_MINSTARI
|
||||
OP_POSSTAR OP_POSSTARI
|
||||
OP_PLUS OP_PLUSI
|
||||
OP_MINPLUS OP_MINPLUSI
|
||||
OP_POSPLUS OP_POSPLUSI
|
||||
OP_QUERY OP_QUERYI
|
||||
OP_MINQUERY OP_MINQUERYI
|
||||
OP_POSQUERY OP_POSQUERYI
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||
Other repeats make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
OP_MINUPTO OP_MINUPTOI
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||
OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
-------------------------
|
||||
|
||||
Repeats of things like \d are done exactly as for single characters, except
|
||||
that instead of a character, the opcode for the type is stored in the data
|
||||
unit. The opcodes are:
|
||||
|
||||
OP_TYPESTAR
|
||||
OP_TYPEMINSTAR
|
||||
OP_TYPEPOSSTAR
|
||||
OP_TYPEPLUS
|
||||
OP_TYPEMINPLUS
|
||||
OP_TYPEPOSPLUS
|
||||
OP_TYPEQUERY
|
||||
OP_TYPEMINQUERY
|
||||
OP_TYPEPOSQUERY
|
||||
OP_TYPEUPTO
|
||||
OP_TYPEMINUPTO
|
||||
OP_TYPEPOSUPTO
|
||||
OP_TYPEEXACT
|
||||
|
||||
|
||||
Match by Unicode property
|
||||
-------------------------
|
||||
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two units that encode the desired property as a type and a
|
||||
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||
PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
value.
|
||||
|
||||
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||
mode, subject characters with values greater than 255 can be handled correctly.
|
||||
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||
are less than 256, followed by a list of pairs (for a range) and single
|
||||
characters. In caseless mode, both cases are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||
coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||
This means that if no other items in the class set bits in the map, a map is
|
||||
not needed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number if the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||
in the group name table of the first entry for the requred name, followed by
|
||||
the number of groups with the same name.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last three are single-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
myself, can be round, square, curly, or pointy. Hence this usage rather than
|
||||
"parentheses".]
|
||||
|
||||
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
|
||||
capturing brackets and it used a different opcode for each one. From release
|
||||
3.5, the limit was removed by putting the bracket number into the data for
|
||||
higher-numbered brackets. From release 7.0 all capturing brackets are handled
|
||||
this way, using the single opcode OP_CBRA.
|
||||
|
||||
A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
single-unit opcodes that tell the matcher that skipping the following
|
||||
subpattern entirely is a valid branch. In the case of the first two, not
|
||||
skipping the pattern is also valid (greedy and non-greedy). The third is used
|
||||
when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
|
||||
because it may be called as a subroutine from elsewhere in the regex.
|
||||
|
||||
A subpattern with an indefinite maximum repetition is replicated in the
|
||||
compiled data its minimum number of times (or once with OP_BRAZERO if the
|
||||
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
|
||||
as appropriate.
|
||||
|
||||
A subpattern with a bounded maximum repetition is replicated in a nested
|
||||
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
|
||||
before each replication after the minimum, so that, for example, (abc){2,5} is
|
||||
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group
|
||||
has the same number.
|
||||
|
||||
When a repeated subpattern has an unbounded upper limit, it is checked to see
|
||||
whether it could match an empty string. If this is the case, the opcode in the
|
||||
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
When a repeated group (capturing or non-capturing) is marked as possessive by
|
||||
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
|
||||
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
|
||||
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group even after it is complete in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||
mode each character occupies exactly one unit. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
-----------------------
|
||||
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||
not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
-------
|
||||
|
||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a count giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make is possible for pcretest to output useful tracing information
|
||||
using automatic callouts.
|
||||
|
||||
Philip Hazel
|
||||
November 2013
|
||||
@@ -1,93 +0,0 @@
|
||||
PCRE LICENCE
|
||||
------------
|
||||
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself. The data
|
||||
in the testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a set of C++ wrapper functions, and a
|
||||
just-in-time compiler that can be used to optimize pattern matching. These
|
||||
are both optional features that can be omitted when the library is built.
|
||||
|
||||
|
||||
THE BASIC LIBRARY FUNCTIONS
|
||||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
PCRE JUST-IN-TIME COMPILATION SUPPORT
|
||||
-------------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
STACK-LESS JUST-IN-TIME COMPILER
|
||||
--------------------------------
|
||||
|
||||
Written by: Zoltan Herczeg
|
||||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE C++ WRAPPER FUNCTIONS
|
||||
-------------------------
|
||||
|
||||
Contributed by: Google Inc.
|
||||
|
||||
Copyright (c) 2007-2012, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
|
||||
THE "BSD" LICENCE
|
||||
-----------------
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the name of Google
|
||||
Inc. nor the names of their contributors may be used to endorse or
|
||||
promote products derived from this software without specific prior
|
||||
written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
End
|
||||
@@ -1,737 +0,0 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 8.41 13-June-2017
|
||||
-------------------------
|
||||
|
||||
This is a bug-fix release.
|
||||
|
||||
|
||||
Release 8.40 11-January-2017
|
||||
----------------------------
|
||||
|
||||
This is a bug-fix release.
|
||||
|
||||
|
||||
Release 8.39 14-June-2016
|
||||
-------------------------
|
||||
|
||||
Some appropriate PCRE2 JIT improvements have been retro-fitted to PCRE1. Apart
|
||||
from that, this is another bug-fix release. Note that this library (now called
|
||||
PCRE1) is now being maintained for bug fixes only. New projects are advised to
|
||||
use the new PCRE2 libraries.
|
||||
|
||||
|
||||
Release 8.38 23-November-2015
|
||||
-----------------------------
|
||||
|
||||
This is bug-fix release. Note that this library (now called PCRE1) is now being
|
||||
maintained for bug fixes only. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
|
||||
|
||||
Release 8.37 28-April-2015
|
||||
--------------------------
|
||||
|
||||
This is bug-fix release. Note that this library (now called PCRE1) is now being
|
||||
maintained for bug fixes only. New projects are advised to use the new PCRE2
|
||||
libraries.
|
||||
|
||||
|
||||
Release 8.36 26-September-2014
|
||||
------------------------------
|
||||
|
||||
This is primarily a bug-fix release. However, in addition, the Unicode data
|
||||
tables have been updated to Unicode 7.0.0.
|
||||
|
||||
|
||||
Release 8.35 04-April-2014
|
||||
--------------------------
|
||||
|
||||
There have been performance improvements for classes containing non-ASCII
|
||||
characters and the "auto-possessification" feature has been extended. Other
|
||||
minor improvements have been implemented and bugs fixed. There is a new callout
|
||||
feature to enable applications to do detailed stack checks at compile time, to
|
||||
avoid running out of stack for deeply nested parentheses. The JIT compiler has
|
||||
been extended with experimental support for ARM-64, MIPS-64, and PPC-LE.
|
||||
|
||||
|
||||
Release 8.34 15-December-2013
|
||||
-----------------------------
|
||||
|
||||
As well as fixing the inevitable bugs, performance has been improved by
|
||||
refactoring and extending the amount of "auto-possessification" that PCRE does.
|
||||
Other notable changes:
|
||||
|
||||
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||
an empty string. If it can, pcretest shows this in its information output.
|
||||
|
||||
. A back reference to a named subpattern when there is more than one of the
|
||||
same name now checks them in the order in which they appear in the pattern.
|
||||
The first one that is set is used for the reference. Previously only the
|
||||
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||
|
||||
. Unicode character properties were updated from Unicode 6.3.0.
|
||||
|
||||
. The character VT has been added to the set of characters that match \s and
|
||||
are generally treated as white space, following this same change in Perl
|
||||
5.18. There is now no difference between "Perl space" and "POSIX space".
|
||||
|
||||
. Perl has changed its handling of \8 and \9. If there is no previously
|
||||
encountered capturing group of those numbers, they are treated as the
|
||||
literal characters 8 and 9 instead of a binary zero followed by the
|
||||
literals. PCRE now does the same.
|
||||
|
||||
. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||
possible to specify values greater than 0777 and also making them
|
||||
unambiguous.
|
||||
|
||||
. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||
were matched by \h.
|
||||
|
||||
. Add JIT support for the 64 bit TileGX architecture.
|
||||
|
||||
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||
does in Unicode mode.
|
||||
|
||||
. Perl no longer allows group names to start with digits, so I have made this
|
||||
change also in PCRE.
|
||||
|
||||
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||
|
||||
|
||||
Release 8.33 28-May-2013
|
||||
--------------------------
|
||||
|
||||
A number of bugs are fixed, and some performance improvements have been made.
|
||||
There are also some new features, of which these are the most important:
|
||||
|
||||
. The behaviour of the backtracking verbs has been rationalized and
|
||||
documented in more detail.
|
||||
|
||||
. JIT now supports callouts and all of the backtracking verbs.
|
||||
|
||||
. Unicode validation has been updated in the light of Unicode Corrigendum #9,
|
||||
which points out that "non characters" are not "characters that may not
|
||||
appear in Unicode strings" but rather "characters that are reserved for
|
||||
internal use and have only local meaning".
|
||||
|
||||
. (*LIMIT_MATCH=d) and (*LIMIT_RECURSION=d) have been added so that the
|
||||
creator of a pattern can specify lower (but not higher) limits for the
|
||||
matching process.
|
||||
|
||||
. The PCRE_NEVER_UTF option is available to prevent pattern-writers from using
|
||||
the (*UTF) feature, as this could be a security issue.
|
||||
|
||||
|
||||
Release 8.32 30-November-2012
|
||||
-----------------------------
|
||||
|
||||
This release fixes a number of bugs, but also has some new features. These are
|
||||
the highlights:
|
||||
|
||||
. There is now support for 32-bit character strings and UTF-32. Like the
|
||||
16-bit support, this is done by compiling a separate 32-bit library.
|
||||
|
||||
. \X now matches a Unicode extended grapheme cluster.
|
||||
|
||||
. Case-independent matching of Unicode characters that have more than one
|
||||
"other case" now makes all three (or more) characters equivalent. This
|
||||
applies, for example, to Greek Sigma, which has two lowercase versions.
|
||||
|
||||
. Unicode character properties are updated to Unicode 6.2.0.
|
||||
|
||||
. The EBCDIC support, which had decayed, has had a spring clean.
|
||||
|
||||
. A number of JIT optimizations have been added, which give faster JIT
|
||||
execution speed. In addition, a new direct interface to JIT execution is
|
||||
available. This bypasses some of the sanity checks of pcre_exec() to give a
|
||||
noticeable speed-up.
|
||||
|
||||
. A number of issues in pcregrep have been fixed, making it more compatible
|
||||
with GNU grep. In particular, --exclude and --include (and variants) apply
|
||||
to all files now, not just those obtained from scanning a directory
|
||||
recursively. In Windows environments, the default action for directories is
|
||||
now "skip" instead of "read" (which provokes an error).
|
||||
|
||||
. If the --only-matching (-o) option in pcregrep is specified multiple
|
||||
times, each one causes appropriate output. For example, -o1 -o2 outputs the
|
||||
substrings matched by the 1st and 2nd capturing parentheses. A separating
|
||||
string can be specified by --om-separator (default empty).
|
||||
|
||||
. When PCRE is built via Autotools using a version of gcc that has the
|
||||
"visibility" feature, it is used to hide internal library functions that are
|
||||
not part of the public API.
|
||||
|
||||
|
||||
Release 8.31 06-July-2012
|
||||
-------------------------
|
||||
|
||||
This is mainly a bug-fixing release, with a small number of developments:
|
||||
|
||||
. The JIT compiler now supports partial matching and the (*MARK) and
|
||||
(*COMMIT) verbs.
|
||||
|
||||
. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
|
||||
pattern.
|
||||
|
||||
. There should be a performance improvement when using the heap instead of the
|
||||
stack for recursion.
|
||||
|
||||
. pcregrep can now be linked with libedit as an alternative to libreadline.
|
||||
|
||||
. pcregrep now has a --file-list option where the list of files to scan is
|
||||
given as a file.
|
||||
|
||||
. pcregrep now recognizes binary files and there are related options.
|
||||
|
||||
. The Unicode tables have been updated to 6.1.0.
|
||||
|
||||
As always, the full list of changes is in the ChangeLog file.
|
||||
|
||||
|
||||
Release 8.30 04-February-2012
|
||||
-----------------------------
|
||||
|
||||
Release 8.30 introduces a major new feature: support for 16-bit character
|
||||
strings, compiled as a separate library. There are a few changes to the
|
||||
8-bit library, in addition to some bug fixes.
|
||||
|
||||
. The pcre_info() function, which has been obsolete for over 10 years, has
|
||||
been removed.
|
||||
|
||||
. When a compiled pattern was saved to a file and later reloaded on a host
|
||||
with different endianness, PCRE used automatically to swap the bytes in some
|
||||
of the data fields. With the advent of the 16-bit library, where more of this
|
||||
swapping is needed, it is no longer done automatically. Instead, the bad
|
||||
endianness is detected and a specific error is given. The user can then call
|
||||
a new function called pcre_pattern_to_host_byte_order() (or an equivalent
|
||||
16-bit function) to do the swap.
|
||||
|
||||
. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
|
||||
code points and are now faulted. (They are the so-called "surrogates"
|
||||
that are reserved for coding high values in UTF-16.)
|
||||
|
||||
|
||||
Release 8.21 12-Dec-2011
|
||||
------------------------
|
||||
|
||||
This is almost entirely a bug-fix release. The only new feature is the ability
|
||||
to obtain the size of the memory used by the JIT compiler.
|
||||
|
||||
|
||||
Release 8.20 21-Oct-2011
|
||||
------------------------
|
||||
|
||||
The main change in this release is the inclusion of Zoltan Herczeg's
|
||||
just-in-time compiler support, which can be accessed by building PCRE with
|
||||
--enable-jit. Large performance benefits can be had in many situations. 8.20
|
||||
also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
|
||||
a number of infelicities and differences from Perl.
|
||||
|
||||
|
||||
Release 8.13 16-Aug-2011
|
||||
------------------------
|
||||
|
||||
This is mainly a bug-fix release. There has been a lot of internal refactoring.
|
||||
The Unicode tables have been updated. The only new feature in the library is
|
||||
the passing of *MARK information to callouts. Some additions have been made to
|
||||
pcretest to make testing easier and more comprehensive. There is a new option
|
||||
for pcregrep to adjust its internal buffer size.
|
||||
|
||||
|
||||
Release 8.12 15-Jan-2011
|
||||
------------------------
|
||||
|
||||
This release fixes some bugs in pcregrep, one of which caused the tests to fail
|
||||
on 64-bit big-endian systems. There are no changes to the code of the library.
|
||||
|
||||
|
||||
Release 8.11 10-Dec-2010
|
||||
------------------------
|
||||
|
||||
A number of bugs in the library and in pcregrep have been fixed. As always, see
|
||||
ChangeLog for details. The following are the non-bug-fix changes:
|
||||
|
||||
. Added --match-limit and --recursion-limit to pcregrep.
|
||||
|
||||
. Added an optional parentheses number to the -o and --only-matching options
|
||||
of pcregrep.
|
||||
|
||||
. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and
|
||||
\B.
|
||||
|
||||
. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a
|
||||
bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD.
|
||||
|
||||
. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_
|
||||
START_OPTIMIZE option, which is now allowed at compile time
|
||||
|
||||
|
||||
Release 8.10 25-Jun-2010
|
||||
------------------------
|
||||
|
||||
There are two major additions: support for (*MARK) and friends, and the option
|
||||
PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their
|
||||
opposites) so that they make use of Unicode properties. There are also a number
|
||||
of lesser new features, and several bugs have been fixed. A new option,
|
||||
--line-buffered, has been added to pcregrep, for use when it is connected to
|
||||
pipes.
|
||||
|
||||
|
||||
Release 8.02 19-Mar-2010
|
||||
------------------------
|
||||
|
||||
Another bug-fix release.
|
||||
|
||||
|
||||
Release 8.01 19-Jan-2010
|
||||
------------------------
|
||||
|
||||
This is a bug-fix release. Several bugs in the code itself and some bugs and
|
||||
infelicities in the build system have been fixed.
|
||||
|
||||
|
||||
Release 8.00 19-Oct-09
|
||||
----------------------
|
||||
|
||||
Bugs have been fixed in the library and in pcregrep. There are also some
|
||||
enhancements. Restrictions on patterns used for partial matching have been
|
||||
removed, extra information is given for partial matches, the partial matching
|
||||
process has been improved, and an option to make a partial match override a
|
||||
full match is available. The "study" process has been enhanced by finding a
|
||||
lower bound matching length. Groups with duplicate numbers may now have
|
||||
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
|
||||
different names. The documentation has been revised to reflect these changes.
|
||||
The version number has been expanded to 3 digits as it is clear that the rate
|
||||
of change is not slowing down.
|
||||
|
||||
|
||||
Release 7.9 11-Apr-09
|
||||
---------------------
|
||||
|
||||
Mostly bugfixes and tidies with just a couple of minor functional additions.
|
||||
|
||||
|
||||
Release 7.8 05-Sep-08
|
||||
---------------------
|
||||
|
||||
More bug fixes, plus a performance improvement in Unicode character property
|
||||
lookup.
|
||||
|
||||
|
||||
Release 7.7 07-May-08
|
||||
---------------------
|
||||
|
||||
This is once again mainly a bug-fix release, but there are a couple of new
|
||||
features.
|
||||
|
||||
|
||||
Release 7.6 28-Jan-08
|
||||
---------------------
|
||||
|
||||
The main reason for having this release so soon after 7.5 is because it fixes a
|
||||
potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In
|
||||
addition, the CMake configuration files have been brought up to date.
|
||||
|
||||
|
||||
Release 7.5 10-Jan-08
|
||||
---------------------
|
||||
|
||||
This is mainly a bug-fix release. However the ability to link pcregrep with
|
||||
libz or libbz2 and the ability to link pcretest with libreadline have been
|
||||
added. Also the --line-offsets and --file-offsets options were added to
|
||||
pcregrep.
|
||||
|
||||
|
||||
Release 7.4 21-Sep-07
|
||||
---------------------
|
||||
|
||||
The only change of specification is the addition of options to control whether
|
||||
\R matches any Unicode line ending (the default) or just CR, LF, and CRLF.
|
||||
Otherwise, the changes are bug fixes and a refactoring to reduce the number of
|
||||
relocations needed in a shared library. There have also been some documentation
|
||||
updates, in particular, some more information about using CMake to build PCRE
|
||||
has been added to the NON-UNIX-USE file.
|
||||
|
||||
|
||||
Release 7.3 28-Aug-07
|
||||
---------------------
|
||||
|
||||
Most changes are bug fixes. Some that are not:
|
||||
|
||||
1. There is some support for Perl 5.10's experimental "backtracking control
|
||||
verbs" such as (*PRUNE).
|
||||
|
||||
2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
|
||||
restrictive in the strings it accepts.
|
||||
|
||||
3. Checking for potential integer overflow has been made more dynamic, and as a
|
||||
consequence there is no longer a hard limit on the size of a subpattern that
|
||||
has a limited repeat count.
|
||||
|
||||
4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
|
||||
no longer advance by two characters instead of one when an unanchored match
|
||||
fails at CRLF if there are explicit CR or LF matches within the pattern.
|
||||
This gets rid of some anomalous effects that previously occurred.
|
||||
|
||||
5. Some PCRE-specific settings for varying the newline options at the start of
|
||||
a pattern have been added.
|
||||
|
||||
|
||||
Release 7.2 19-Jun-07
|
||||
---------------------
|
||||
|
||||
WARNING: saved patterns that were compiled by earlier versions of PCRE must be
|
||||
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
|
||||
and \V).
|
||||
|
||||
Correction to the notes for 7.1: the note about shared libraries for Windows is
|
||||
wrong. Previously, three libraries were built, but each could function
|
||||
independently. For example, the pcreposix library also included all the
|
||||
functions from the basic pcre library. The change is that the three libraries
|
||||
are no longer independent. They are like the Unix libraries. To use the
|
||||
pcreposix functions, for example, you need to link with both the pcreposix and
|
||||
the basic pcre library.
|
||||
|
||||
Some more features from Perl 5.10 have been added:
|
||||
|
||||
(?-n) and (?+n) relative references for recursion and subroutines.
|
||||
|
||||
(?(-n) and (?(+n) relative references as conditions.
|
||||
|
||||
\k{name} and \g{name} are synonyms for \k<name>.
|
||||
|
||||
\K to reset the start of the matched string; for example, (foo)\Kbar
|
||||
matches bar preceded by foo, but only sets bar as the matched string.
|
||||
|
||||
(?| introduces a group where the capturing parentheses in each alternative
|
||||
start from the same number; for example, (?|(abc)|(xyz)) sets capturing
|
||||
parentheses number 1 in both cases.
|
||||
|
||||
\h, \H, \v, \V match horizontal and vertical whitespace, respectively.
|
||||
|
||||
|
||||
Release 7.1 24-Apr-07
|
||||
---------------------
|
||||
|
||||
There is only one new feature in this release: a linebreak setting of
|
||||
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
|
||||
recognizes only CRLF, CR, and LF as linebreaks.
|
||||
|
||||
A few bugs are fixed (see ChangeLog for details), but the major change is a
|
||||
complete re-implementation of the build system. This now has full Autotools
|
||||
support and so is now "standard" in some sense. It should help with compiling
|
||||
PCRE in a wide variety of environments.
|
||||
|
||||
NOTE: when building shared libraries for Windows, three dlls are now built,
|
||||
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
|
||||
included in a single dll.
|
||||
|
||||
Another important change is that the dftables auxiliary program is no longer
|
||||
compiled and run at "make" time by default. Instead, a default set of character
|
||||
tables (assuming ASCII coding) is used. If you want to use dftables to generate
|
||||
the character tables as previously, add --enable-rebuild-chartables to the
|
||||
"configure" command. You must do this if you are compiling PCRE to run on a
|
||||
system that uses EBCDIC code.
|
||||
|
||||
There is a discussion about character tables in the README file. The default is
|
||||
not to use dftables so that that there is no problem when cross-compiling.
|
||||
|
||||
|
||||
Release 7.0 19-Dec-06
|
||||
---------------------
|
||||
|
||||
This release has a new major number because there have been some internal
|
||||
upheavals to facilitate the addition of new optimizations and other facilities,
|
||||
and to make subsequent maintenance and extension easier. Compilation is likely
|
||||
to be a bit slower, but there should be no major effect on runtime performance.
|
||||
Previously compiled patterns are NOT upwards compatible with this release. If
|
||||
you have saved compiled patterns from a previous release, you will have to
|
||||
re-compile them. Important changes that are visible to users are:
|
||||
|
||||
1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
|
||||
some more scripts.
|
||||
|
||||
2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
|
||||
sequence as a newline.
|
||||
|
||||
3. The \R escape matches a single Unicode newline sequence as a single unit.
|
||||
|
||||
4. New features that will appear in Perl 5.10 are now in PCRE. These include
|
||||
alternative Perl syntax for named parentheses, and Perl syntax for
|
||||
recursion.
|
||||
|
||||
5. The C++ wrapper interface has been extended by the addition of a
|
||||
QuoteMeta function and the ability to allow copy construction and
|
||||
assignment.
|
||||
|
||||
For a complete list of changes, see the ChangeLog file.
|
||||
|
||||
|
||||
Release 6.7 04-Jul-06
|
||||
---------------------
|
||||
|
||||
The main additions to this release are the ability to use the same name for
|
||||
multiple sets of parentheses, and support for CRLF line endings in both the
|
||||
library and pcregrep (and in pcretest for testing).
|
||||
|
||||
Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
|
||||
significantly reduced for certain subject strings.
|
||||
|
||||
|
||||
Release 6.5 01-Feb-06
|
||||
---------------------
|
||||
|
||||
Important changes in this release:
|
||||
|
||||
1. A number of new features have been added to pcregrep.
|
||||
|
||||
2. The Unicode property tables have been updated to Unicode 4.1.0, and the
|
||||
supported properties have been extended with script names such as "Arabic",
|
||||
and the derived properties "Any" and "L&". This has necessitated a change to
|
||||
the interal format of compiled patterns. Any saved compiled patterns that
|
||||
use \p or \P must be recompiled.
|
||||
|
||||
3. The specification of recursion in patterns has been changed so that all
|
||||
recursive subpatterns are automatically treated as atomic groups. Thus, for
|
||||
example, (?R) is treated as if it were (?>(?R)). This is necessary because
|
||||
otherwise there are situations where recursion does not work.
|
||||
|
||||
See the ChangeLog for a complete list of changes, which include a number of bug
|
||||
fixes and tidies.
|
||||
|
||||
|
||||
Release 6.0 07-Jun-05
|
||||
---------------------
|
||||
|
||||
The release number has been increased to 6.0 because of the addition of several
|
||||
major new pieces of functionality.
|
||||
|
||||
A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
|
||||
algorithm, has been added. This has a number of advantages for certain cases,
|
||||
though it does run more slowly, and lacks the ability to capture substrings. On
|
||||
the other hand, it does find all matches, not just the first, and it works
|
||||
better for partial matching. The pcrematching man page discusses the
|
||||
differences.
|
||||
|
||||
The pcretest program has been enhanced so that it can make use of the new
|
||||
pcre_dfa_exec() matching function and the extra features it provides.
|
||||
|
||||
The distribution now includes a C++ wrapper library. This is built
|
||||
automatically if a C++ compiler is found. The pcrecpp man page discusses this
|
||||
interface.
|
||||
|
||||
The code itself has been re-organized into many more files, one for each
|
||||
function, so it no longer requires everything to be linked in when static
|
||||
linkage is used. As a consequence, some internal functions have had to have
|
||||
their names exposed. These functions all have names starting with _pcre_. They
|
||||
are undocumented, and are not intended for use by outside callers.
|
||||
|
||||
The pcregrep program has been enhanced with new functionality such as
|
||||
multiline-matching and options for output more matching context. See the
|
||||
ChangeLog for a complete list of changes to the library and the utility
|
||||
programs.
|
||||
|
||||
|
||||
Release 5.0 13-Sep-04
|
||||
---------------------
|
||||
|
||||
The licence under which PCRE is released has been changed to the more
|
||||
conventional "BSD" licence.
|
||||
|
||||
In the code, some bugs have been fixed, and there are also some major changes
|
||||
in this release (which is why I've increased the number to 5.0). Some changes
|
||||
are internal rearrangements, and some provide a number of new facilities. The
|
||||
new features are:
|
||||
|
||||
1. There's an "automatic callout" feature that inserts callouts before every
|
||||
item in the regex, and there's a new callout field that gives the position
|
||||
in the pattern - useful for debugging and tracing.
|
||||
|
||||
2. The extra_data structure can now be used to pass in a set of character
|
||||
tables at exec time. This is useful if compiled regex are saved and re-used
|
||||
at a later time when the tables may not be at the same address. If the
|
||||
default internal tables are used, the pointer saved with the compiled
|
||||
pattern is now set to NULL, which means that you don't need to do anything
|
||||
special unless you are using custom tables.
|
||||
|
||||
3. It is possible, with some restrictions on the content of the regex, to
|
||||
request "partial" matching. A special return code is given if all of the
|
||||
subject string matched part of the regex. This could be useful for testing
|
||||
an input field as it is being typed.
|
||||
|
||||
4. There is now some optional support for Unicode character properties, which
|
||||
means that the patterns items such as \p{Lu} and \X can now be used. Only
|
||||
the general category properties are supported. If PCRE is compiled with this
|
||||
support, an additional 90K data structure is include, which increases the
|
||||
size of the library dramatically.
|
||||
|
||||
5. There is support for saving compiled patterns and re-using them later.
|
||||
|
||||
6. There is support for running regular expressions that were compiled on a
|
||||
different host with the opposite endianness.
|
||||
|
||||
7. The pcretest program has been extended to accommodate the new features.
|
||||
|
||||
The main internal rearrangement is that sequences of literal characters are no
|
||||
longer handled as strings. Instead, each character is handled on its own. This
|
||||
makes some UTF-8 handling easier, and makes the support of partial matching
|
||||
possible. Compiled patterns containing long literal strings will be larger as a
|
||||
result of this change; I hope that performance will not be much affected.
|
||||
|
||||
|
||||
Release 4.5 01-Dec-03
|
||||
---------------------
|
||||
|
||||
Again mainly a bug-fix and tidying release, with only a couple of new features:
|
||||
|
||||
1. It's possible now to compile PCRE so that it does not use recursive
|
||||
function calls when matching. Instead it gets memory from the heap. This slows
|
||||
things down, but may be necessary on systems with limited stacks.
|
||||
|
||||
2. UTF-8 string checking has been tightened to reject overlong sequences and to
|
||||
check that a starting offset points to the start of a character. Failure of the
|
||||
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.
|
||||
|
||||
3. PCRE can now be compiled for systems that use EBCDIC code.
|
||||
|
||||
|
||||
Release 4.4 21-Aug-03
|
||||
---------------------
|
||||
|
||||
This is mainly a bug-fix and tidying release. The only new feature is that PCRE
|
||||
checks UTF-8 strings for validity by default. There is an option to suppress
|
||||
this, just in case anybody wants that teeny extra bit of performance.
|
||||
|
||||
|
||||
Releases 4.1 - 4.3
|
||||
------------------
|
||||
|
||||
Sorry, I forgot about updating the NEWS file for these releases. Please take a
|
||||
look at ChangeLog.
|
||||
|
||||
|
||||
Release 4.0 17-Feb-03
|
||||
---------------------
|
||||
|
||||
There have been a lot of changes for the 4.0 release, adding additional
|
||||
functionality and mending bugs. Below is a list of the highlights of the new
|
||||
functionality. For full details of these features, please consult the
|
||||
documentation. For a complete list of changes, see the ChangeLog file.
|
||||
|
||||
1. Support for Perl's \Q...\E escapes.
|
||||
|
||||
2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
|
||||
package. They provide some syntactic sugar for simple cases of "atomic
|
||||
grouping".
|
||||
|
||||
3. Support for the \G assertion. It is true when the current matching position
|
||||
is at the start point of the match.
|
||||
|
||||
4. A new feature that provides some of the functionality that Perl provides
|
||||
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
|
||||
is for the caller to provide an optional function, by setting pcre_callout to
|
||||
its entry point. To get the function called, the regex must include (?C) at
|
||||
appropriate points.
|
||||
|
||||
5. Support for recursive calls to individual subpatterns. This makes it really
|
||||
easy to get totally confused.
|
||||
|
||||
6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
|
||||
name a group.
|
||||
|
||||
7. Several extensions to UTF-8 support; it is now fairly complete. There is an
|
||||
option for pcregrep to make it operate in UTF-8 mode.
|
||||
|
||||
8. The single man page has been split into a number of separate man pages.
|
||||
These also give rise to individual HTML pages which are put in a separate
|
||||
directory. There is an index.html page that lists them all. Some hyperlinking
|
||||
between the pages has been installed.
|
||||
|
||||
|
||||
Release 3.5 15-Aug-01
|
||||
---------------------
|
||||
|
||||
1. The configuring system has been upgraded to use later versions of autoconf
|
||||
and libtool. By default it builds both a shared and a static library if the OS
|
||||
supports it. You can use --disable-shared or --disable-static on the configure
|
||||
command if you want only one of them.
|
||||
|
||||
2. The pcretest utility is now installed along with pcregrep because it is
|
||||
useful for users (to test regexs) and by doing this, it automatically gets
|
||||
relinked by libtool. The documentation has been turned into a man page, so
|
||||
there are now .1, .txt, and .html versions in /doc.
|
||||
|
||||
3. Upgrades to pcregrep:
|
||||
(i) Added long-form option names like gnu grep.
|
||||
(ii) Added --help to list all options with an explanatory phrase.
|
||||
(iii) Added -r, --recursive to recurse into sub-directories.
|
||||
(iv) Added -f, --file to read patterns from a file.
|
||||
|
||||
4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
|
||||
script, to force use of CR or LF instead of \n in the source. On non-Unix
|
||||
systems, the value can be set in config.h.
|
||||
|
||||
5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
|
||||
absolute limit. Changed the text of the error message to make this clear, and
|
||||
likewise updated the man page.
|
||||
|
||||
6. The limit of 99 on the number of capturing subpatterns has been removed.
|
||||
The new limit is 65535, which I hope will not be a "real" limit.
|
||||
|
||||
|
||||
Release 3.3 01-Aug-00
|
||||
---------------------
|
||||
|
||||
There is some support for UTF-8 character strings. This is incomplete and
|
||||
experimental. The documentation describes what is and what is not implemented.
|
||||
Otherwise, this is just a bug-fixing release.
|
||||
|
||||
|
||||
Release 3.0 01-Feb-00
|
||||
---------------------
|
||||
|
||||
1. A "configure" script is now used to configure PCRE for Unix systems. It
|
||||
builds a Makefile, a config.h file, and the pcre-config script.
|
||||
|
||||
2. PCRE is built as a shared library by default.
|
||||
|
||||
3. There is support for POSIX classes such as [:alpha:].
|
||||
|
||||
5. There is an experimental recursion feature.
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00
|
||||
|
||||
Please note that there has been a change in the API such that a larger
|
||||
ovector is required at matching time, to provide some additional workspace.
|
||||
The new man page has details. This change was necessary in order to support
|
||||
some of the new functionality in Perl 5.005.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00
|
||||
|
||||
Another (I hope this is the last!) change has been made to the API for the
|
||||
pcre_compile() function. An additional argument has been added to make it
|
||||
possible to pass over a pointer to character tables built in the current
|
||||
locale by pcre_maketables(). To use the default tables, this new argument
|
||||
should be passed as NULL.
|
||||
|
||||
IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05
|
||||
|
||||
Yet another (and again I hope this really is the last) change has been made
|
||||
to the API for the pcre_exec() function. An additional argument has been
|
||||
added to make it possible to start the match other than at the start of the
|
||||
subject string. This is important if there are lookbehinds. The new man
|
||||
page has the details, but you just want to convert existing programs, all
|
||||
you need to do is to stick in a new fifth argument to pcre_exec(), with a
|
||||
value of zero. For example, change
|
||||
|
||||
pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
|
||||
to
|
||||
pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)
|
||||
|
||||
****
|
||||
@@ -1,7 +0,0 @@
|
||||
Compiling PCRE on non-Unix systems
|
||||
----------------------------------
|
||||
|
||||
This has been renamed to better reflect its contents. Please see the file
|
||||
NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.
|
||||
|
||||
####
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,459 +0,0 @@
|
||||
|
||||
#include <php_compat.h>
|
||||
|
||||
#ifdef PHP_WIN32
|
||||
# include <config.w32.h>
|
||||
#else
|
||||
# include <php_config.h>
|
||||
#endif
|
||||
|
||||
#undef PACKAGE_NAME
|
||||
#undef PACKAGE_VERSION
|
||||
#undef PACKAGE_TARNAME
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
#define SUPPORT_UCP
|
||||
#define SUPPORT_UTF8
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C" __attribute__ ((visibility("default")))
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern __attribute__ ((visibility("default")))
|
||||
# endif
|
||||
# define PCRE_EXP_DEFN __attribute__ ((visibility("default")))
|
||||
# define PCRE_EXP_DATA_DEFN __attribute__ ((visibility("default")))
|
||||
#endif
|
||||
|
||||
|
||||
/* Exclude these below definitions when building within PHP */
|
||||
#ifndef ZEND_API
|
||||
|
||||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
|
||||
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the facilities, config.h.in is converted by
|
||||
"configure", or config-cmake.h.in is converted by CMake, into config.h. If you
|
||||
are going to build PCRE "by hand" without using "configure" or CMake, you
|
||||
should copy the distributed config.h.generic to config.h, and then edit the
|
||||
macro definitions to be the way you need them. You must then add
|
||||
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
|
||||
at the start of every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||
#undef BSR_ANYCRLF
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. You must also edit the
|
||||
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
|
||||
#undef EBCDIC
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. The value must also be set in the
|
||||
NEWLINE macro below. On systems that can use "configure" or CMake to set
|
||||
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
|
||||
#undef EBCDIC_NL25
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#ifndef HAVE_BCOPY
|
||||
#define HAVE_BCOPY 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#ifndef HAVE_BZLIB_H
|
||||
#define HAVE_BZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#ifndef HAVE_DIRENT_H
|
||||
#define HAVE_DIRENT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#ifndef HAVE_DLFCN_H
|
||||
#define HAVE_DLFCN_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <editline/readline.h> header file. */
|
||||
/*#undef HAVE_EDITLINE_READLINE_H*/
|
||||
|
||||
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
|
||||
/* #undef HAVE_EDIT_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#ifndef HAVE_INTTYPES_H
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#ifndef HAVE_LIMITS_H
|
||||
#define HAVE_LIMITS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#ifndef HAVE_LONG_LONG
|
||||
#define HAVE_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#define HAVE_MEMMOVE 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#ifndef HAVE_MEMORY_H
|
||||
#define HAVE_MEMORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#ifndef HAVE_READLINE_HISTORY_H
|
||||
#define HAVE_READLINE_HISTORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
#define HAVE_READLINE_READLINE_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#ifndef HAVE_STDINT_H
|
||||
#define HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#ifndef HAVE_STDLIB_H
|
||||
#define HAVE_STDLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#ifndef HAVE_STRERROR
|
||||
#define HAVE_STRERROR 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#ifndef HAVE_STRING
|
||||
#define HAVE_STRING 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#ifndef HAVE_STRINGS_H
|
||||
#define HAVE_STRINGS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#ifndef HAVE_STRING_H
|
||||
#define HAVE_STRING_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have `strtoimax'. */
|
||||
/* #undef HAVE_STRTOIMAX */
|
||||
|
||||
/* Define to 1 if you have `strtoll'. */
|
||||
/* #undef HAVE_STRTOLL */
|
||||
|
||||
/* Define to 1 if you have `strtoq'. */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#ifndef HAVE_SYS_TYPES_H
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#ifndef HAVE_UNISTD_H
|
||||
#define HAVE_UNISTD_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 or 0, depending whether the compiler supports simple visibility
|
||||
declarations. */
|
||||
/* #undef HAVE_VISIBILITY */
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#ifndef HAVE_ZLIB_H
|
||||
#define HAVE_ZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have `_strtoi64'. */
|
||||
/* #undef HAVE__STRTOI64 */
|
||||
|
||||
/* Exclude these above definitions when building within PHP */
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
/* This is ignored unless you are using libtool. */
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. On systems that
|
||||
support it, "configure" can be used to override this default default. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override the default. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* The value of NEWLINE determines the default newline character sequence.
|
||||
PCRE client programs can override this by selecting other values at run
|
||||
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
|
||||
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
|
||||
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
|
||||
0x25) that are used as the NL line terminator that is equivalent to ASCII
|
||||
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
|
||||
or -2 (ANYCRLF). */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE 10
|
||||
#endif
|
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
|
||||
/* #undef NO_MINUS_C_MINUS_O */
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
recursion in the match() function; instead it creates its own stack by
|
||||
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.
|
||||
*/
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 8.41"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "8.41"
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCRECPP_EXP_DECL */
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCRECPP_EXP_DEFN */
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern. */
|
||||
#ifndef PARENS_NEST_LIMIT
|
||||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||
pcregrep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcregrep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
/* #undef PCREGREP_BUFSIZE */
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCREPOSIX_EXP_DECL */
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCREPOSIX_EXP_DEFN */
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCRE_EXP_DATA_DEFN */
|
||||
|
||||
/* to make a symbol visible */
|
||||
/* #undef PCRE_EXP_DECL */
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, a suitable
|
||||
__declspec value is used for Windows systems; in other environments
|
||||
"extern" is used for a C compiler and "extern C" for a C++ compiler.
|
||||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE_STATIC */
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. */
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
/* #undef PTHREAD_CREATE_JOINABLE */
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#ifndef STDC_HEADERS
|
||||
#define STDC_HEADERS 1
|
||||
#endif
|
||||
|
||||
/* Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||
are able to generate code coverage reports. */
|
||||
#undef SUPPORT_GCOV
|
||||
|
||||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
#if HAVE_PCRE_JIT_SUPPORT
|
||||
#define SUPPORT_JIT
|
||||
#endif
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||
#undef SUPPORT_LIBEDIT
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||
able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE library. */
|
||||
/* #undef SUPPORT_PCRE16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE library. */
|
||||
/* #undef SUPPORT_PCRE32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE library. */
|
||||
/* #undef SUPPORT_PCRE8 */
|
||||
|
||||
/* Define to any value to enable JIT support in pcregrep. */
|
||||
/* #undef SUPPORT_PCREGREP_JIT */
|
||||
|
||||
/* Define to enable support for Unicode properties */
|
||||
/* #undef SUPPORT_UCP */
|
||||
|
||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
/* #undef SUPPORT_UTF8 */
|
||||
|
||||
/* Valgrind support to find invalid memory reads. */
|
||||
#if HAVE_PCRE_VALGRIND_SUPPORT
|
||||
#define SUPPORT_VALGRIND 1
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "8.41"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to the type of a signed integer type of width exactly 64 bits if
|
||||
such a type exists and the standard includes do not define it. */
|
||||
/* #undef int64_t */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,677 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 41
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2017-07-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Public options. Some are compile-time only, some are run-time only, and some
|
||||
are both. Most of the compile-time options are saved with the compiled regex so
|
||||
that they can be inspected during studying (and therefore JIT compiling). Note
|
||||
that pcre_study() has its own set of options. Originally, all the options
|
||||
defined here used distinct bits. However, almost all the bits in a 32-bit word
|
||||
are now used, so in order to conserve them, option bits that were previously
|
||||
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
|
||||
also be used for compile-time options that affect only compiling and are not
|
||||
relevant for studying or JIT compiling.
|
||||
|
||||
Some options for pcre_compile() change its behaviour but do not affect the
|
||||
behaviour of the execution functions. Other options are passed through to the
|
||||
execution functions and affect their behaviour, with or without affecting the
|
||||
behaviour of pcre_compile().
|
||||
|
||||
Options that can be passed to pcre_compile() are tagged Cx below, with these
|
||||
variants:
|
||||
|
||||
C1 Affects compile only
|
||||
C2 Does not affect compile; affects exec, dfa_exec
|
||||
C3 Affects compile, exec, dfa_exec
|
||||
C4 Affects compile, exec, dfa_exec, study
|
||||
C5 Affects compile, exec, study
|
||||
|
||||
Options that can be set for pcre_exec() and/or pcre_dfa_exec() are flagged with
|
||||
E and D, respectively. They take precedence over C3, C4, and C5 settings passed
|
||||
from pcre_compile(). Those that are compatible with JIT execution are flagged
|
||||
with J. */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001 /* C1 */
|
||||
#define PCRE_MULTILINE 0x00000002 /* C1 */
|
||||
#define PCRE_DOTALL 0x00000004 /* C1 */
|
||||
#define PCRE_EXTENDED 0x00000008 /* C1 */
|
||||
#define PCRE_ANCHORED 0x00000010 /* C4 E D */
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* C2 */
|
||||
#define PCRE_EXTRA 0x00000040 /* C1 */
|
||||
#define PCRE_NOTBOL 0x00000080 /* E D J */
|
||||
#define PCRE_NOTEOL 0x00000100 /* E D J */
|
||||
#define PCRE_UNGREEDY 0x00000200 /* C1 */
|
||||
#define PCRE_NOTEMPTY 0x00000400 /* E D J */
|
||||
#define PCRE_UTF8 0x00000800 /* C4 ) */
|
||||
#define PCRE_UTF16 0x00000800 /* C4 ) Synonyms */
|
||||
#define PCRE_UTF32 0x00000800 /* C4 ) */
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000 /* C1 */
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000 /* C1 E D J ) */
|
||||
#define PCRE_NO_UTF16_CHECK 0x00002000 /* C1 E D J ) Synonyms */
|
||||
#define PCRE_NO_UTF32_CHECK 0x00002000 /* C1 E D J ) */
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
|
||||
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
|
||||
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
|
||||
|
||||
/* This pair use the same bit. */
|
||||
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
||||
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
||||
|
||||
/* This pair use the same bit. */
|
||||
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
|
||||
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
|
||||
|
||||
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
||||
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
||||
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
||||
#define PCRE_NEWLINE_LF 0x00200000 /* C3 E D */
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000 /* C3 E D */
|
||||
#define PCRE_NEWLINE_ANY 0x00400000 /* C3 E D */
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000 /* C3 E D */
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000 /* C3 E D */
|
||||
#define PCRE_BSR_UNICODE 0x01000000 /* C3 E D */
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000 /* C5 */
|
||||
#define PCRE_NO_START_OPTIMIZE 0x04000000 /* C2 E D ) Synonyms */
|
||||
#define PCRE_NO_START_OPTIMISE 0x04000000 /* C2 E D ) */
|
||||
#define PCRE_PARTIAL_HARD 0x08000000 /* E D J */
|
||||
#define PCRE_NOTEMPTY_ATSTART 0x10000000 /* E D J */
|
||||
#define PCRE_UCP 0x20000000 /* C3 */
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16/32 */
|
||||
#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16/32 */
|
||||
#define PCRE_ERROR_BADUTF32 (-10) /* Same for 8/16/32 */
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
#define PCRE_ERROR_BADOFFSET (-24)
|
||||
#define PCRE_ERROR_SHORTUTF8 (-25)
|
||||
#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */
|
||||
#define PCRE_ERROR_RECURSELOOP (-26)
|
||||
#define PCRE_ERROR_JIT_STACKLIMIT (-27)
|
||||
#define PCRE_ERROR_BADMODE (-28)
|
||||
#define PCRE_ERROR_BADENDIANNESS (-29)
|
||||
#define PCRE_ERROR_DFA_BADRESTART (-30)
|
||||
#define PCRE_ERROR_JIT_BADOPTION (-31)
|
||||
#define PCRE_ERROR_BADLENGTH (-32)
|
||||
#define PCRE_ERROR_UNSET (-33)
|
||||
|
||||
/* Specific error codes for UTF-8 validity checks */
|
||||
|
||||
#define PCRE_UTF8_ERR0 0
|
||||
#define PCRE_UTF8_ERR1 1
|
||||
#define PCRE_UTF8_ERR2 2
|
||||
#define PCRE_UTF8_ERR3 3
|
||||
#define PCRE_UTF8_ERR4 4
|
||||
#define PCRE_UTF8_ERR5 5
|
||||
#define PCRE_UTF8_ERR6 6
|
||||
#define PCRE_UTF8_ERR7 7
|
||||
#define PCRE_UTF8_ERR8 8
|
||||
#define PCRE_UTF8_ERR9 9
|
||||
#define PCRE_UTF8_ERR10 10
|
||||
#define PCRE_UTF8_ERR11 11
|
||||
#define PCRE_UTF8_ERR12 12
|
||||
#define PCRE_UTF8_ERR13 13
|
||||
#define PCRE_UTF8_ERR14 14
|
||||
#define PCRE_UTF8_ERR15 15
|
||||
#define PCRE_UTF8_ERR16 16
|
||||
#define PCRE_UTF8_ERR17 17
|
||||
#define PCRE_UTF8_ERR18 18
|
||||
#define PCRE_UTF8_ERR19 19
|
||||
#define PCRE_UTF8_ERR20 20
|
||||
#define PCRE_UTF8_ERR21 21
|
||||
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
|
||||
|
||||
/* Specific error codes for UTF-16 validity checks */
|
||||
|
||||
#define PCRE_UTF16_ERR0 0
|
||||
#define PCRE_UTF16_ERR1 1
|
||||
#define PCRE_UTF16_ERR2 2
|
||||
#define PCRE_UTF16_ERR3 3
|
||||
#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
|
||||
|
||||
/* Specific error codes for UTF-32 validity checks */
|
||||
|
||||
#define PCRE_UTF32_ERR0 0
|
||||
#define PCRE_UTF32_ERR1 1
|
||||
#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
|
||||
#define PCRE_UTF32_ERR3 3
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
#define PCRE_INFO_MINLENGTH 15
|
||||
#define PCRE_INFO_JIT 16
|
||||
#define PCRE_INFO_JITSIZE 17
|
||||
#define PCRE_INFO_MAXLOOKBEHIND 18
|
||||
#define PCRE_INFO_FIRSTCHARACTER 19
|
||||
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
|
||||
#define PCRE_INFO_REQUIREDCHAR 21
|
||||
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
||||
#define PCRE_INFO_MATCHLIMIT 23
|
||||
#define PCRE_INFO_RECURSIONLIMIT 24
|
||||
#define PCRE_INFO_MATCH_EMPTY 25
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
#define PCRE_CONFIG_JIT 9
|
||||
#define PCRE_CONFIG_UTF16 10
|
||||
#define PCRE_CONFIG_JITTARGET 11
|
||||
#define PCRE_CONFIG_UTF32 12
|
||||
#define PCRE_CONFIG_PARENS_LIMIT 13
|
||||
|
||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_STUDY_JIT_COMPILE 0x0001
|
||||
#define PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE 0x0002
|
||||
#define PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE 0x0004
|
||||
#define PCRE_STUDY_EXTRA_NEEDED 0x0008
|
||||
|
||||
/* Bit flags for the pcre[16|32]_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
#define PCRE_EXTRA_MARK 0x0020
|
||||
#define PCRE_EXTRA_EXECUTABLE_JIT 0x0040
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
struct real_pcre16; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16 pcre16;
|
||||
|
||||
struct real_pcre32; /* declaration; the definition is private */
|
||||
typedef struct real_pcre32 pcre32;
|
||||
|
||||
struct real_pcre_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre_jit_stack pcre_jit_stack;
|
||||
|
||||
struct real_pcre16_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre16_jit_stack pcre16_jit_stack;
|
||||
|
||||
struct real_pcre32_jit_stack; /* declaration; the definition is private */
|
||||
typedef struct real_pcre32_jit_stack pcre32_jit_stack;
|
||||
|
||||
/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
|
||||
a 16 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||
pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||
#ifndef PCRE_UCHAR16
|
||||
#define PCRE_UCHAR16 unsigned short
|
||||
#endif
|
||||
|
||||
#ifndef PCRE_SPTR16
|
||||
#define PCRE_SPTR16 const PCRE_UCHAR16 *
|
||||
#endif
|
||||
|
||||
/* If PCRE is compiled with 32 bit character support, PCRE_UCHAR32 must contain
|
||||
a 32 bit wide signed data type. Otherwise it can be a dummy data type since
|
||||
pcre32 functions are not implemented. There is a check for this in pcre_internal.h. */
|
||||
#ifndef PCRE_UCHAR32
|
||||
#define PCRE_UCHAR32 unsigned int
|
||||
#endif
|
||||
|
||||
#ifndef PCRE_SPTR32
|
||||
#define PCRE_SPTR32 const PCRE_UCHAR32 *
|
||||
#endif
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
unsigned char **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre_extra;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
PCRE_UCHAR16 **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre16_extra;
|
||||
|
||||
/* Same structure as above, but with 32 bit char pointers. */
|
||||
|
||||
typedef struct pcre32_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
PCRE_UCHAR32 **mark; /* For passing back a mark pointer */
|
||||
void *executable_jit; /* Contains a pointer to a compiled jit code */
|
||||
} pcre32_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const unsigned char *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Same structure as above, but with 16 bit char pointers. */
|
||||
|
||||
typedef struct pcre16_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR16 subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre16_callout_block;
|
||||
|
||||
/* Same structure as above, but with 32 bit char pointers. */
|
||||
|
||||
typedef struct pcre32_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR32 subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------- Added for Version 2 -------------------------- */
|
||||
const PCRE_UCHAR32 *mark; /* Pointer to current mark or NULL */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre32_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
PCRE_EXP_DECL int (*pcre_stack_guard)(void);
|
||||
|
||||
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *);
|
||||
PCRE_EXP_DECL int (*pcre16_stack_guard)(void);
|
||||
|
||||
PCRE_EXP_DECL void *(*pcre32_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre32_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre32_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre32_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre32_callout)(pcre32_callout_block *);
|
||||
PCRE_EXP_DECL int (*pcre32_stack_guard)(void);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
PCRE_EXP_DECL int pcre_stack_guard(void);
|
||||
|
||||
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_free(void *);
|
||||
PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *);
|
||||
PCRE_EXP_DECL int pcre16_stack_guard(void);
|
||||
|
||||
PCRE_EXP_DECL void *pcre32_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre32_free(void *);
|
||||
PCRE_EXP_DECL void *pcre32_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre32_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre32_callout(pcre32_callout_block *);
|
||||
PCRE_EXP_DECL int pcre32_stack_guard(void);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* User defined callback which provides a stack just before the match starts. */
|
||||
|
||||
typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
|
||||
typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
|
||||
typedef pcre32_jit_stack *(*pcre32_jit_callback)(void *);
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre32 *pcre32_compile(PCRE_SPTR32, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL pcre32 *pcre32_compile2(PCRE_SPTR32, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre16_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre32_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre32_copy_named_substring(const pcre32 *, PCRE_SPTR32,
|
||||
int *, int, PCRE_SPTR32, PCRE_UCHAR32 *, int);
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int,
|
||||
char *, int);
|
||||
PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_UCHAR16 *, int);
|
||||
PCRE_EXP_DECL int pcre32_copy_substring(PCRE_SPTR32, int *, int, int,
|
||||
PCRE_UCHAR32 *, int);
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre32_dfa_exec(const pcre32 *, const pcre32_extra *,
|
||||
PCRE_SPTR32, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre32_exec(const pcre32 *, const pcre32_extra *,
|
||||
PCRE_SPTR32, int, int, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre_jit_exec(const pcre *, const pcre_extra *,
|
||||
PCRE_SPTR, int, int, int, int *, int,
|
||||
pcre_jit_stack *);
|
||||
PCRE_EXP_DECL int pcre16_jit_exec(const pcre16 *, const pcre16_extra *,
|
||||
PCRE_SPTR16, int, int, int, int *, int,
|
||||
pcre16_jit_stack *);
|
||||
PCRE_EXP_DECL int pcre32_jit_exec(const pcre32 *, const pcre32_extra *,
|
||||
PCRE_SPTR32, int, int, int, int *, int,
|
||||
pcre32_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
|
||||
PCRE_EXP_DECL void pcre32_free_substring(PCRE_SPTR32);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL void pcre32_free_substring_list(PCRE_SPTR32 *);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre32_fullinfo(const pcre32 *, const pcre32_extra *, int,
|
||||
void *);
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
|
||||
int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre32_get_named_substring(const pcre32 *, PCRE_SPTR32,
|
||||
int *, int, PCRE_SPTR32, PCRE_SPTR32 *);
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
|
||||
PCRE_EXP_DECL int pcre32_get_stringnumber(const pcre32 *, PCRE_SPTR32);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
|
||||
PCRE_UCHAR16 **, PCRE_UCHAR16 **);
|
||||
PCRE_EXP_DECL int pcre32_get_stringtable_entries(const pcre32 *, PCRE_SPTR32,
|
||||
PCRE_UCHAR32 **, PCRE_UCHAR32 **);
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int,
|
||||
PCRE_SPTR16 *);
|
||||
PCRE_EXP_DECL int pcre32_get_substring(PCRE_SPTR32, int *, int, int,
|
||||
PCRE_SPTR32 *);
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int,
|
||||
PCRE_SPTR16 **);
|
||||
PCRE_EXP_DECL int pcre32_get_substring_list(PCRE_SPTR32, int *, int,
|
||||
PCRE_SPTR32 **);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
|
||||
PCRE_EXP_DECL const unsigned char *pcre32_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int);
|
||||
PCRE_EXP_DECL int pcre32_refcount(pcre32 *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
|
||||
PCRE_EXP_DECL pcre32_extra *pcre32_study(const pcre32 *, int, const char **);
|
||||
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
|
||||
PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
|
||||
PCRE_EXP_DECL void pcre32_free_study(pcre32_extra *);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
PCRE_EXP_DECL const char *pcre16_version(void);
|
||||
PCRE_EXP_DECL const char *pcre32_version(void);
|
||||
|
||||
/* Utility functions for byte order swaps. */
|
||||
PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *, pcre32_extra *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
|
||||
PCRE_SPTR16, int, int *, int);
|
||||
PCRE_EXP_DECL int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *,
|
||||
PCRE_SPTR32, int, int *, int);
|
||||
|
||||
/* JIT compiler related functions. */
|
||||
|
||||
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL pcre32_jit_stack *pcre32_jit_stack_alloc(int, int);
|
||||
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *);
|
||||
PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
|
||||
pcre_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
||||
pcre16_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
||||
pcre32_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
|
||||
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
|
||||
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,190 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Keep the original link size. */
|
||||
static int real_link_size = LINK_SIZE;
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_config(int what, void *where)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_config(int what, void *where)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_config(int what, void *where)
|
||||
#endif
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UTF16:
|
||||
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UTF32:
|
||||
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
|
||||
*((int *)where) = 0;
|
||||
return PCRE_ERROR_BADOPTION;
|
||||
#else
|
||||
#if defined SUPPORT_UTF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
#endif
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_JITTARGET:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((const char **)where) = PRIV(jit_get_target)();
|
||||
#else
|
||||
*((const char **)where) = NULL;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = real_link_size;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_PARENS_LIMIT:
|
||||
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_config.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,245 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2013 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
int what, void *where)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
|
||||
int what, void *where)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_fullinfo(const pcre32 *argument_re, const pcre32_extra *extra_data,
|
||||
int what, void *where)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
|
||||
means that the pattern is likely compiled with different endianness. */
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
return re->magic_number == REVERSED_MAGIC_NUMBER?
|
||||
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
|
||||
|
||||
/* Check that this pattern was compiled in the correct bit mode */
|
||||
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JITSIZE:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((size_t *)where) =
|
||||
(extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL)?
|
||||
PRIV(jit_get_size)(extra_data->executable_jit) : 0;
|
||||
#else
|
||||
*((size_t *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? (int)re->first_char :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTCHARACTER:
|
||||
*((pcre_uint32 *)where) =
|
||||
(re->flags & PCRE_FIRSTSET) != 0 ? re->first_char : 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTCHARACTERFLAGS:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_FIRSTSET) != 0) ? 1 :
|
||||
((re->flags & PCRE_STARTLINE) != 0) ? 2 : 0;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const pcre_uint8 **)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MINLENGTH:
|
||||
*((int *)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
|
||||
(int)(study->minlength) : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JIT:
|
||||
*((int *)where) = extra_data != NULL &&
|
||||
(extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
|
||||
extra_data->executable_jit != NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0)? (int)re->req_char : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_REQUIREDCHAR:
|
||||
*((pcre_uint32 *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0) ? re->req_char : 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_REQUIREDCHARFLAGS:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0);
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
|
||||
break;
|
||||
|
||||
/* From release 8.00 this will always return TRUE because NOPARTIAL is
|
||||
no longer ever set (the restrictions have been removed). */
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MAXLOOKBEHIND:
|
||||
*((int *)where) = re->max_lookbehind;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MATCHLIMIT:
|
||||
if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
|
||||
*((pcre_uint32 *)where) = re->limit_match;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_RECURSIONLIMIT:
|
||||
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
|
||||
*((pcre_uint32 *)where) = re->limit_recursion;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MATCH_EMPTY:
|
||||
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcre_fullinfo.c */
|
||||
@@ -1,669 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE32
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0) return GET2(entry, 0);
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname,
|
||||
PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr)
|
||||
#endif
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
pcre_uchar *nametable, *lastentry;
|
||||
|
||||
#ifdef COMPILE_PCRE8
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE16
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
#ifdef COMPILE_PCRE32
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
#endif
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
pcre_uchar *entry = nametable + entrysize*mid;
|
||||
int c = STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(entry + IMM2_SIZE));
|
||||
if (c == 0)
|
||||
{
|
||||
pcre_uchar *first = entry;
|
||||
pcre_uchar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (STRCMP_UC_UC((pcre_uchar *)stringname,
|
||||
(pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
#if defined COMPILE_PCRE8
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
#elif defined COMPILE_PCRE16
|
||||
*firstptr = (PCRE_UCHAR16 *)first;
|
||||
*lastptr = (PCRE_UCHAR16 *)last;
|
||||
#elif defined COMPILE_PCRE32
|
||||
*firstptr = (PCRE_UCHAR32 *)first;
|
||||
*lastptr = (PCRE_UCHAR32 *)last;
|
||||
#endif
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
stringcount number of captured substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector,
|
||||
int stringcount)
|
||||
#elif defined COMPILE_PCRE16
|
||||
static int
|
||||
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector,
|
||||
int stringcount)
|
||||
#elif defined COMPILE_PCRE32
|
||||
static int
|
||||
get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector,
|
||||
int stringcount)
|
||||
#endif
|
||||
{
|
||||
const REAL_PCRE *re = (const REAL_PCRE *)code;
|
||||
int entrysize;
|
||||
pcre_uchar *entry;
|
||||
#if defined COMPILE_PCRE8
|
||||
char *first, *last;
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_UCHAR16 *first, *last;
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_UCHAR32 *first, *last;
|
||||
#endif
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#elif defined COMPILE_PCRE16
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre16_get_stringnumber(code, stringname);
|
||||
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#elif defined COMPILE_PCRE32
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre32_get_stringnumber(code, stringname);
|
||||
entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last);
|
||||
#endif
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
|
||||
{
|
||||
int n = GET2(entry, 0);
|
||||
if (n < stringcount && ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return GET2(entry, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_UCHAR16 *buffer, int size)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_UCHAR32 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
char *buffer, int size)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_UCHAR16 *buffer, int size)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR32 stringname,
|
||||
PCRE_UCHAR32 *buffer, int size)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||
if (n <= 0) return n;
|
||||
#if defined COMPILE_PCRE8
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#elif defined COMPILE_PCRE16
|
||||
return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#elif defined COMPILE_PCRE32
|
||||
return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
PCRE_SPTR16 **listptr)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||
PCRE_SPTR32 **listptr)
|
||||
#endif
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(pcre_uchar *);
|
||||
int double_count = stringcount * 2;
|
||||
pcre_uchar **stringlist;
|
||||
pcre_uchar *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
size += sizeof(pcre_uchar *) + IN_UCHARS(1);
|
||||
if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
stringlist = (pcre_uchar **)(PUBL(malloc))(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
*listptr = (const char **)stringlist;
|
||||
#elif defined COMPILE_PCRE16
|
||||
*listptr = (PCRE_SPTR16 *)stringlist;
|
||||
#elif defined COMPILE_PCRE32
|
||||
*listptr = (PCRE_SPTR32 *)stringlist;
|
||||
#endif
|
||||
p = (pcre_uchar *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
|
||||
memcpy(p, subject + ovector[i], IN_UCHARS(len));
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring_list(PCRE_SPTR16 *pointer)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre32_free_substring_list(PCRE_SPTR32 *pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_SPTR16 *stringptr)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount,
|
||||
int stringnumber, PCRE_SPTR32 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int yield;
|
||||
pcre_uchar *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
|
||||
substring[yield] = 0;
|
||||
#if defined COMPILE_PCRE8
|
||||
*stringptr = (const char *)substring;
|
||||
#elif defined COMPILE_PCRE16
|
||||
*stringptr = (PCRE_SPTR16)substring;
|
||||
#elif defined COMPILE_PCRE32
|
||||
*stringptr = (PCRE_SPTR32)substring;
|
||||
#endif
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_named_substring(const pcre *code, const char *subject,
|
||||
int *ovector, int stringcount, const char *stringname,
|
||||
const char **stringptr)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR16 stringname,
|
||||
PCRE_SPTR16 *stringptr)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject,
|
||||
int *ovector, int stringcount, PCRE_SPTR32 stringname,
|
||||
PCRE_SPTR32 *stringptr)
|
||||
#endif
|
||||
{
|
||||
int n = get_first_set(code, stringname, ovector, stringcount);
|
||||
if (n <= 0) return n;
|
||||
#if defined COMPILE_PCRE8
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#elif defined COMPILE_PCRE16
|
||||
return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#elif defined COMPILE_PCRE32
|
||||
return pcre32_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (PUBL(free))()
|
||||
directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring(const char *pointer)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre16_free_substring(PCRE_SPTR16 pointer)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre32_free_substring(PCRE_SPTR32 pointer)
|
||||
#endif
|
||||
{
|
||||
(PUBL(free))((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcre_get.c */
|
||||
@@ -1,86 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2014 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads.
|
||||
|
||||
For MS Visual Studio and Symbian OS, there are problems in initializing these
|
||||
variables to non-local functions. In these cases, therefore, an indirection via
|
||||
a local function is used.
|
||||
|
||||
Also, when compiling for Virtual Pascal, things are done differently, and
|
||||
global variables are not used. */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#if defined _MSC_VER || defined __SYMBIAN32__
|
||||
static void* LocalPcreMalloc(size_t aSize)
|
||||
{
|
||||
return malloc(aSize);
|
||||
}
|
||||
static void LocalPcreFree(void* aPtr)
|
||||
{
|
||||
free(aPtr);
|
||||
}
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
@@ -1,92 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_refcount(), which is an
|
||||
auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Maintain reference count *
|
||||
*************************************************/
|
||||
|
||||
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||
possible to transfer a non-zero count from one host to a different host that
|
||||
has a different byte order - though I can't see why anyone in their right mind
|
||||
would ever want to do that!
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
adjust value to add to the count
|
||||
|
||||
Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre16_refcount(pcre16 *argument_re, int adjust)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre32_refcount(pcre32 *argument_re, int adjust)
|
||||
#endif
|
||||
{
|
||||
REAL_PCRE *re = (REAL_PCRE *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
|
||||
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
|
||||
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||
(adjust + re->ref_count > 65535)? 65535 :
|
||||
re->ref_count + adjust;
|
||||
return re->ref_count;
|
||||
}
|
||||
|
||||
/* End of pcre_refcount.c */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,98 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_version(), which returns a
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test its value. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||
production releases. Originally, it was used naively in this code:
|
||||
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
|
||||
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||
STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what we
|
||||
really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||
handling this, I had resort to some messy hackery that does a test at run time.
|
||||
I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre_version(void)
|
||||
#elif defined COMPILE_PCRE16
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre16_version(void)
|
||||
#elif defined COMPILE_PCRE32
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre32_version(void)
|
||||
#endif
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcre_version.c */
|
||||
@@ -1,406 +0,0 @@
|
||||
/*************************************************
|
||||
* PCRE DEMONSTRATION PROGRAM *
|
||||
*************************************************/
|
||||
|
||||
/* This is a demonstration program to illustrate the most straightforward ways
|
||||
of calling the PCRE regular expression library from a C program. See the
|
||||
pcresample documentation for a short discussion ("man pcresample" if you have
|
||||
the PCRE man pages installed).
|
||||
|
||||
In Unix-like environments, if PCRE is installed in your standard system
|
||||
libraries, you should be able to compile this program using this command:
|
||||
|
||||
gcc -Wall pcredemo.c -lpcre -o pcredemo
|
||||
|
||||
If PCRE is not installed in a standard place, it is likely to be installed with
|
||||
support for the pkg-config mechanism. If you have pkg-config, you can compile
|
||||
this program using this command:
|
||||
|
||||
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
|
||||
|
||||
If you do not have pkg-config, you may have to use this:
|
||||
|
||||
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
|
||||
-R/usr/local/lib -lpcre -o pcredemo
|
||||
|
||||
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
|
||||
library files for PCRE are installed on your system. Only some operating
|
||||
systems (e.g. Solaris) use the -R option.
|
||||
|
||||
Building under Windows:
|
||||
|
||||
If you want to statically link this program against a non-dll .a file, you must
|
||||
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
|
||||
pcre_free() exported functions will be declared __declspec(dllimport), with
|
||||
unwanted results. So in this environment, uncomment the following line. */
|
||||
|
||||
/* #define PCRE_STATIC */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pcre.h>
|
||||
|
||||
#define OVECCOUNT 30 /* should be a multiple of 3 */
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
pcre *re;
|
||||
const char *error;
|
||||
char *pattern;
|
||||
char *subject;
|
||||
unsigned char *name_table;
|
||||
unsigned int option_bits;
|
||||
int erroffset;
|
||||
int find_all;
|
||||
int crlf_is_newline;
|
||||
int namecount;
|
||||
int name_entry_size;
|
||||
int ovector[OVECCOUNT];
|
||||
int subject_length;
|
||||
int rc, i;
|
||||
int utf8;
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* First, sort out the command line. There is only one possible option at *
|
||||
* the moment, "-g" to request repeated matching to find all occurrences, *
|
||||
* like Perl's /g option. We set the variable find_all to a non-zero value *
|
||||
* if the -g option is present. Apart from that, there must be exactly two *
|
||||
* arguments. *
|
||||
**************************************************************************/
|
||||
|
||||
find_all = 0;
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
if (strcmp(argv[i], "-g") == 0) find_all = 1;
|
||||
else break;
|
||||
}
|
||||
|
||||
/* After the options, we require exactly two arguments, which are the pattern,
|
||||
and the subject string. */
|
||||
|
||||
if (argc - i != 2)
|
||||
{
|
||||
printf("Two arguments required: a regex and a subject string\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
pattern = argv[i];
|
||||
subject = argv[i+1];
|
||||
subject_length = (int)strlen(subject);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* Now we are going to compile the regular expression pattern, and handle *
|
||||
* and errors that are detected. *
|
||||
*************************************************************************/
|
||||
|
||||
re = pcre_compile(
|
||||
pattern, /* the pattern */
|
||||
0, /* default options */
|
||||
&error, /* for error message */
|
||||
&erroffset, /* for error offset */
|
||||
NULL); /* use default character tables */
|
||||
|
||||
/* Compilation failed: print the error message and exit */
|
||||
|
||||
if (re == NULL)
|
||||
{
|
||||
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the compilation succeeded, we call PCRE again, in order to do a *
|
||||
* pattern match against the subject string. This does just ONE match. If *
|
||||
* further matching is needed, it will be done below. *
|
||||
*************************************************************************/
|
||||
|
||||
rc = pcre_exec(
|
||||
re, /* the compiled pattern */
|
||||
NULL, /* no extra data - we didn't study the pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
ovector, /* output vector for substring information */
|
||||
OVECCOUNT); /* number of elements in the output vector */
|
||||
|
||||
/* Matching failed: handle error cases */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
switch(rc)
|
||||
{
|
||||
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
|
||||
/*
|
||||
Handle other special cases if you like
|
||||
*/
|
||||
default: printf("Matching error %d\n", rc); break;
|
||||
}
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
|
||||
printf("\nMatch succeeded at offset %d\n", ovector[0]);
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* We have found the first match within the subject string. If the output *
|
||||
* vector wasn't big enough, say so. Then output any substrings that were *
|
||||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough */
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
rc = OVECCOUNT/3;
|
||||
printf("ovector only has room for %d captured substrings\n", rc - 1);
|
||||
}
|
||||
|
||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||
application you might want to do things other than print them. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
char *substring_start = subject + ovector[2*i];
|
||||
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, substring_length, substring_start);
|
||||
}
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
* That concludes the basic part of this demonstration program. We have *
|
||||
* compiled a pattern, and performed a single match. The code that follows *
|
||||
* shows first how to access named substrings, and then how to code for *
|
||||
* repeated matches on the same subject. *
|
||||
**************************************************************************/
|
||||
|
||||
/* See if there are any named substrings, and if so, show them by name. First
|
||||
we have to extract the count of named parentheses from the pattern. */
|
||||
|
||||
(void)pcre_fullinfo(
|
||||
re, /* the compiled pattern */
|
||||
NULL, /* no extra data - we didn't study the pattern */
|
||||
PCRE_INFO_NAMECOUNT, /* number of named substrings */
|
||||
&namecount); /* where to put the answer */
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
unsigned char *tabptr;
|
||||
printf("Named substrings\n");
|
||||
|
||||
/* Before we can access the substrings, we must extract the table for
|
||||
translating names to numbers, and the size of each entry in the table. */
|
||||
|
||||
(void)pcre_fullinfo(
|
||||
re, /* the compiled pattern */
|
||||
NULL, /* no extra data - we didn't study the pattern */
|
||||
PCRE_INFO_NAMETABLE, /* address of the table */
|
||||
&name_table); /* where to put the answer */
|
||||
|
||||
(void)pcre_fullinfo(
|
||||
re, /* the compiled pattern */
|
||||
NULL, /* no extra data - we didn't study the pattern */
|
||||
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
|
||||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. */
|
||||
|
||||
tabptr = name_table;
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
* If the "-g" option was given on the command line, we want to continue *
|
||||
* to search for additional matches in the subject string, in a similar *
|
||||
* way to the /g option in Perl. This turns out to be trickier than you *
|
||||
* might think because of the possibility of matching an empty string. *
|
||||
* What happens is as follows: *
|
||||
* *
|
||||
* If the previous match was NOT for an empty string, we can just start *
|
||||
* the next match at the end of the previous one. *
|
||||
* *
|
||||
* If the previous match WAS for an empty string, we can't do that, as it *
|
||||
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
|
||||
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
|
||||
* The first of these tells PCRE that an empty string at the start of the *
|
||||
* subject is not a valid match; other possibilities must be tried. The *
|
||||
* second flag restricts PCRE to one match attempt at the initial string *
|
||||
* position. If this match succeeds, an alternative to the empty string *
|
||||
* match has been found, and we can print it and proceed round the loop, *
|
||||
* advancing by the length of whatever was found. If this match does not *
|
||||
* succeed, we still stay in the loop, advancing by just one character. *
|
||||
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
|
||||
* more than one byte. *
|
||||
* *
|
||||
* However, there is a complication concerned with newlines. When the *
|
||||
* newline convention is such that CRLF is a valid newline, we must *
|
||||
* advance by two characters rather than one. The newline convention can *
|
||||
* be set in the regex by (*CR), etc.; if not, we must find the default. *
|
||||
*************************************************************************/
|
||||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre_free(re); /* Release the memory used for the compiled pattern */
|
||||
return 0; /* Finish unless -g was given */
|
||||
}
|
||||
|
||||
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
|
||||
sequence. First, find the options with which the regex was compiled; extract
|
||||
the UTF-8 state, and mask off all but the newline options. */
|
||||
|
||||
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
|
||||
utf8 = option_bits & PCRE_UTF8;
|
||||
option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
|
||||
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
|
||||
|
||||
/* If no newline options were set, find the default newline convention from the
|
||||
build configuration. */
|
||||
|
||||
if (option_bits == 0)
|
||||
{
|
||||
int d;
|
||||
(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
|
||||
/* Note that these values are always the ASCII ones, even in
|
||||
EBCDIC environments. CR = 13, NL = 10. */
|
||||
option_bits = (d == 13)? PCRE_NEWLINE_CR :
|
||||
(d == 10)? PCRE_NEWLINE_LF :
|
||||
(d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
|
||||
(d == -2)? PCRE_NEWLINE_ANYCRLF :
|
||||
(d == -1)? PCRE_NEWLINE_ANY : 0;
|
||||
}
|
||||
|
||||
/* See if CRLF is a valid newline sequence. */
|
||||
|
||||
crlf_is_newline =
|
||||
option_bits == PCRE_NEWLINE_ANY ||
|
||||
option_bits == PCRE_NEWLINE_CRLF ||
|
||||
option_bits == PCRE_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int options = 0; /* Normally no options */
|
||||
int start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
same point to see if a non-empty match can be found. */
|
||||
|
||||
if (ovector[0] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == subject_length) break;
|
||||
options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
|
||||
}
|
||||
|
||||
/* Run the next matching operation */
|
||||
|
||||
rc = pcre_exec(
|
||||
re, /* the compiled pattern */
|
||||
NULL, /* no extra data - we didn't study the pattern */
|
||||
subject, /* the subject string */
|
||||
subject_length, /* the length of the subject */
|
||||
start_offset, /* starting offset in the subject */
|
||||
options, /* options */
|
||||
ovector, /* output vector for substring information */
|
||||
OVECCOUNT); /* number of elements in the output vector */
|
||||
|
||||
/* This time, a result of NOMATCH isn't an error. If the value in "options"
|
||||
is zero, it just means we have found all possible matches, so the loop ends.
|
||||
Otherwise, it means we have failed to find a non-empty-string match at a
|
||||
point where there was a previous empty-string match. In this case, we do what
|
||||
Perl does: advance the matching position by one character, and continue. We
|
||||
do this by setting the "end of previous match" offset, because that is picked
|
||||
up at the top of the loop as the point at which to start again.
|
||||
|
||||
There are two complications: (a) When CRLF is a valid newline sequence, and
|
||||
the current position is just before it, advance by an extra byte. (b)
|
||||
Otherwise we must ensure that we skip an entire UTF-8 character if we are in
|
||||
UTF-8 mode. */
|
||||
|
||||
if (rc == PCRE_ERROR_NOMATCH)
|
||||
{
|
||||
if (options == 0) break; /* All matches found */
|
||||
ovector[1] = start_offset + 1; /* Advance one byte */
|
||||
if (crlf_is_newline && /* If CRLF is newline & */
|
||||
start_offset < subject_length - 1 && /* we are at CRLF, */
|
||||
subject[start_offset] == '\r' &&
|
||||
subject[start_offset + 1] == '\n')
|
||||
ovector[1] += 1; /* Advance by one more. */
|
||||
else if (utf8) /* Otherwise, ensure we */
|
||||
{ /* advance a whole UTF-8 */
|
||||
while (ovector[1] < subject_length) /* character. */
|
||||
{
|
||||
if ((subject[ovector[1]] & 0xc0) != 0x80) break;
|
||||
ovector[1] += 1;
|
||||
}
|
||||
}
|
||||
continue; /* Go round the loop again */
|
||||
}
|
||||
|
||||
/* Other matching errors are not recoverable. */
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
printf("Matching error %d\n", rc);
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded */
|
||||
|
||||
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
|
||||
|
||||
/* The match succeeded, but the output vector wasn't big enough. */
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
rc = OVECCOUNT/3;
|
||||
printf("ovector only has room for %d captured substrings\n", rc - 1);
|
||||
}
|
||||
|
||||
/* As before, show substrings stored in the output vector by number, and then
|
||||
also any named substrings. */
|
||||
|
||||
for (i = 0; i < rc; i++)
|
||||
{
|
||||
char *substring_start = subject + ovector[2*i];
|
||||
int substring_length = ovector[2*i+1] - ovector[2*i];
|
||||
printf("%2d: %.*s\n", i, substring_length, substring_start);
|
||||
}
|
||||
|
||||
if (namecount <= 0) printf("No named substrings\n"); else
|
||||
{
|
||||
unsigned char *tabptr = name_table;
|
||||
printf("Named substrings\n");
|
||||
for (i = 0; i < namecount; i++)
|
||||
{
|
||||
int n = (tabptr[0] << 8) | tabptr[1];
|
||||
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
|
||||
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
|
||||
tabptr += name_entry_size;
|
||||
}
|
||||
}
|
||||
} /* End of loop to find second and subsequent matches */
|
||||
|
||||
printf("\n");
|
||||
pcre_free(re); /* Release memory used for the compiled pattern */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcredemo.c */
|
||||
@@ -1,432 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module is a wrapper that provides a POSIX API to the underlying PCRE
|
||||
functions. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for
|
||||
compiling these functions. This must come before including pcreposix.h, where
|
||||
they are set for an application (using these functions) if they have not
|
||||
previously been set. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllexport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllexport)
|
||||
#endif
|
||||
|
||||
/* We include pcre.h before pcre_internal.h so that the PCRE library functions
|
||||
are declared as "import" for Windows by defining PCRE_EXP_DECL as "import".
|
||||
This is needed even though pcre_internal.h itself includes pcre.h, because it
|
||||
does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */
|
||||
|
||||
#include "pcre.h"
|
||||
#include "pcre_internal.h"
|
||||
#include "pcreposix.h"
|
||||
|
||||
|
||||
/* Table to translate PCRE compile time error codes into POSIX error codes. */
|
||||
|
||||
static const int eint[] = {
|
||||
0, /* no error */
|
||||
REG_EESCAPE, /* \ at end of pattern */
|
||||
REG_EESCAPE, /* \c at end of pattern */
|
||||
REG_EESCAPE, /* unrecognized character follows \ */
|
||||
REG_BADBR, /* numbers out of order in {} quantifier */
|
||||
/* 5 */
|
||||
REG_BADBR, /* number too big in {} quantifier */
|
||||
REG_EBRACK, /* missing terminating ] for character class */
|
||||
REG_ECTYPE, /* invalid escape sequence in character class */
|
||||
REG_ERANGE, /* range out of order in character class */
|
||||
REG_BADRPT, /* nothing to repeat */
|
||||
/* 10 */
|
||||
REG_BADRPT, /* operand of unlimited repeat could match the empty string */
|
||||
REG_ASSERT, /* internal error: unexpected repeat */
|
||||
REG_BADPAT, /* unrecognized character after (? */
|
||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||
REG_EPAREN, /* missing ) */
|
||||
/* 15 */
|
||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||
REG_INVARG, /* erroffset passed as NULL */
|
||||
REG_INVARG, /* unknown option bit(s) set */
|
||||
REG_EPAREN, /* missing ) after comment */
|
||||
REG_ESIZE, /* parentheses nested too deeply */
|
||||
/* 20 */
|
||||
REG_ESIZE, /* regular expression too large */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_EPAREN, /* unmatched parentheses */
|
||||
REG_ASSERT, /* internal error: code overflow */
|
||||
REG_BADPAT, /* unrecognized character after (?< */
|
||||
/* 25 */
|
||||
REG_BADPAT, /* lookbehind assertion is not fixed length */
|
||||
REG_BADPAT, /* malformed number or name after (?( */
|
||||
REG_BADPAT, /* conditional group contains more than two branches */
|
||||
REG_BADPAT, /* assertion expected after (?( */
|
||||
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */
|
||||
/* 30 */
|
||||
REG_ECTYPE, /* unknown POSIX class name */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */
|
||||
REG_BADPAT, /* spare error */
|
||||
REG_BADPAT, /* character value in \x{} or \o{} is too large */
|
||||
/* 35 */
|
||||
REG_BADPAT, /* invalid condition (?(0) */
|
||||
REG_BADPAT, /* \C not allowed in lookbehind assertion */
|
||||
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
|
||||
REG_BADPAT, /* number after (?C is > 255 */
|
||||
REG_BADPAT, /* closing ) for (?C expected */
|
||||
/* 40 */
|
||||
REG_BADPAT, /* recursive call could loop indefinitely */
|
||||
REG_BADPAT, /* unrecognized character after (?P */
|
||||
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */
|
||||
REG_BADPAT, /* two named subpatterns have the same name */
|
||||
REG_BADPAT, /* invalid UTF-8 string */
|
||||
/* 45 */
|
||||
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
|
||||
REG_BADPAT, /* malformed \P or \p sequence */
|
||||
REG_BADPAT, /* unknown property name after \P or \p */
|
||||
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
|
||||
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
|
||||
/* 50 */
|
||||
REG_BADPAT, /* repeated subpattern is too long */
|
||||
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */
|
||||
REG_BADPAT, /* internal error: overran compiling workspace */
|
||||
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */
|
||||
REG_BADPAT, /* DEFINE group contains more than one branch */
|
||||
/* 55 */
|
||||
REG_BADPAT, /* repeating a DEFINE group is not allowed */
|
||||
REG_INVARG, /* inconsistent NEWLINE options */
|
||||
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
|
||||
REG_BADPAT, /* a numbered reference must not be zero */
|
||||
REG_BADPAT, /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
|
||||
/* 60 */
|
||||
REG_BADPAT, /* (*VERB) not recognized */
|
||||
REG_BADPAT, /* number is too big */
|
||||
REG_BADPAT, /* subpattern name expected */
|
||||
REG_BADPAT, /* digit expected after (?+ */
|
||||
REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */
|
||||
/* 65 */
|
||||
REG_BADPAT, /* different names for subpatterns of the same number are not allowed */
|
||||
REG_BADPAT, /* (*MARK) must have an argument */
|
||||
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UCP support */
|
||||
REG_BADPAT, /* \c must be followed by an ASCII character */
|
||||
REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */
|
||||
/* 70 */
|
||||
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
|
||||
REG_BADPAT, /* \N is not supported in a class */
|
||||
REG_BADPAT, /* too many forward references */
|
||||
REG_BADPAT, /* disallowed UTF-8/16/32 code point (>= 0xd800 && <= 0xdfff) */
|
||||
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
|
||||
/* 75 */
|
||||
REG_BADPAT, /* overlong MARK name */
|
||||
REG_BADPAT, /* character value in \u.... sequence is too large */
|
||||
REG_BADPAT, /* invalid UTF-32 string (should not occur) */
|
||||
REG_BADPAT, /* setting UTF is disabled by the application */
|
||||
REG_BADPAT, /* non-hex character in \\x{} (closing brace missing?) */
|
||||
/* 80 */
|
||||
REG_BADPAT, /* non-octal character in \o{} (closing brace missing?) */
|
||||
REG_BADPAT, /* missing opening brace after \o */
|
||||
REG_BADPAT, /* parentheses too deeply nested */
|
||||
REG_BADPAT, /* invalid range in character class */
|
||||
REG_BADPAT, /* group name must start with a non-digit */
|
||||
/* 85 */
|
||||
REG_BADPAT, /* parentheses too deeply nested (stack check) */
|
||||
REG_BADPAT, /* missing digits in \x{} or \o{} */
|
||||
REG_BADPAT /* pattern too complicated */
|
||||
};
|
||||
|
||||
/* Table of texts corresponding to POSIX error codes */
|
||||
|
||||
static const char *const pstring[] = {
|
||||
"", /* Dummy for value 0 */
|
||||
"internal error", /* REG_ASSERT */
|
||||
"invalid repeat counts in {}", /* BADBR */
|
||||
"pattern error", /* BADPAT */
|
||||
"? * + invalid", /* BADRPT */
|
||||
"unbalanced {}", /* EBRACE */
|
||||
"unbalanced []", /* EBRACK */
|
||||
"collation error - not relevant", /* ECOLLATE */
|
||||
"bad class", /* ECTYPE */
|
||||
"bad escape sequence", /* EESCAPE */
|
||||
"empty expression", /* EMPTY */
|
||||
"unbalanced ()", /* EPAREN */
|
||||
"bad range inside []", /* ERANGE */
|
||||
"expression too big", /* ESIZE */
|
||||
"failed to get memory", /* ESPACE */
|
||||
"bad back reference", /* ESUBREG */
|
||||
"bad argument", /* INVARG */
|
||||
"match failed" /* NOMATCH */
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Translate error code to string *
|
||||
*************************************************/
|
||||
|
||||
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION
|
||||
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
|
||||
{
|
||||
const char *message, *addmessage;
|
||||
size_t length, addlength;
|
||||
|
||||
message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
|
||||
"unknown error code" : pstring[errcode];
|
||||
length = strlen(message) + 1;
|
||||
|
||||
addmessage = " at offset ";
|
||||
addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
|
||||
strlen(addmessage) + 6 : 0;
|
||||
|
||||
if (errbuf_size > 0)
|
||||
{
|
||||
if (addlength > 0 && errbuf_size >= length + addlength)
|
||||
sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
|
||||
else
|
||||
{
|
||||
strncpy(errbuf, message, errbuf_size - 1);
|
||||
errbuf[errbuf_size-1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return length + addlength;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store held by a regex *
|
||||
*************************************************/
|
||||
|
||||
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
regfree(regex_t *preg)
|
||||
{
|
||||
(PUBL(free))(preg->re_pcre);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Compile a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
Arguments:
|
||||
preg points to a structure for recording the compiled expression
|
||||
pattern the pattern to compile
|
||||
cflags compilation flags
|
||||
|
||||
Returns: 0 on success
|
||||
various non-zero codes on failure
|
||||
*/
|
||||
|
||||
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
const char *errorptr;
|
||||
int erroffset;
|
||||
int errorcode;
|
||||
int options = 0;
|
||||
int re_nsub = 0;
|
||||
|
||||
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
|
||||
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
|
||||
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL;
|
||||
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE;
|
||||
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8;
|
||||
if ((cflags & REG_UCP) != 0) options |= PCRE_UCP;
|
||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
|
||||
|
||||
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
|
||||
&erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
/* Safety: if the error code is too big for the translation vector (which
|
||||
should not happen, but we all make mistakes), return REG_BADPAT. */
|
||||
|
||||
if (preg->re_pcre == NULL)
|
||||
{
|
||||
return (errorcode < (int)(sizeof(eint)/sizeof(const int)))?
|
||||
eint[errorcode] : REG_BADPAT;
|
||||
}
|
||||
|
||||
(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
|
||||
&re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match a regular expression *
|
||||
*************************************************/
|
||||
|
||||
/* Unfortunately, PCRE requires 3 ints of working space for each captured
|
||||
substring, so we have to get and release working store instead of just using
|
||||
the POSIX structures as was done in earlier releases when PCRE needed only 2
|
||||
ints. However, if the number of possible capturing brackets is small, use a
|
||||
block of store on the stack, to reduce the use of malloc/free. The threshold is
|
||||
in a macro that can be changed at configure time.
|
||||
|
||||
If REG_NOSUB was specified at compile time, the PCRE_NO_AUTO_CAPTURE flag will
|
||||
be set. When this is the case, the nmatch and pmatch arguments are ignored, and
|
||||
the only result is yes/no/error. */
|
||||
|
||||
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
regexec(const regex_t *preg, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
int rc, so, eo;
|
||||
int options = 0;
|
||||
int *ovector = NULL;
|
||||
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
|
||||
BOOL allocated_ovector = FALSE;
|
||||
BOOL nosub =
|
||||
(REAL_PCRE_OPTIONS((const pcre *)preg->re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0;
|
||||
|
||||
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
|
||||
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY;
|
||||
|
||||
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
|
||||
/* When no string data is being returned, or no vector has been passed in which
|
||||
to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
|
||||
the return data is large enough. */
|
||||
|
||||
if (nosub || pmatch == NULL) nmatch = 0;
|
||||
|
||||
else if (nmatch > 0)
|
||||
{
|
||||
if (nmatch <= POSIX_MALLOC_THRESHOLD)
|
||||
{
|
||||
ovector = &(small_ovector[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nmatch > INT_MAX/(sizeof(int) * 3)) return REG_ESPACE;
|
||||
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
|
||||
if (ovector == NULL) return REG_ESPACE;
|
||||
allocated_ovector = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings.
|
||||
The man page from OS X says "REG_STARTEND affects only the location of the
|
||||
string, not how it is matched". That is why the "so" value is used to bump the
|
||||
start location rather than being passed as a PCRE "starting offset". */
|
||||
|
||||
if ((eflags & REG_STARTEND) != 0)
|
||||
{
|
||||
if (pmatch == NULL) return REG_INVARG;
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
else
|
||||
{
|
||||
so = 0;
|
||||
eo = (int)strlen(string);
|
||||
}
|
||||
|
||||
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
|
||||
0, options, ovector, (int)(nmatch * 3));
|
||||
|
||||
if (rc == 0) rc = (int)nmatch; /* All captured slots were filled in */
|
||||
|
||||
/* Successful match */
|
||||
|
||||
if (rc >= 0)
|
||||
{
|
||||
size_t i;
|
||||
if (!nosub)
|
||||
{
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = ovector[i*2] + so;
|
||||
pmatch[i].rm_eo = ovector[i*2+1] + so;
|
||||
}
|
||||
if (allocated_ovector) free(ovector);
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unsuccessful match */
|
||||
|
||||
if (allocated_ovector) free(ovector);
|
||||
switch(rc)
|
||||
{
|
||||
/* ========================================================================== */
|
||||
/* These cases are never obeyed. This is a fudge that causes a compile-time
|
||||
error if the vector eint, which is indexed by compile-time error number, is
|
||||
not the correct length. It seems to be the only way to do such a check at
|
||||
compile time, as the sizeof() operator does not work in the C preprocessor.
|
||||
As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
|
||||
|
||||
case 0:
|
||||
case (sizeof(eint)/sizeof(int) == ERRCOUNT):
|
||||
return REG_ASSERT;
|
||||
/* ========================================================================== */
|
||||
|
||||
case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
|
||||
case PCRE_ERROR_NULL: return REG_INVARG;
|
||||
case PCRE_ERROR_BADOPTION: return REG_INVARG;
|
||||
case PCRE_ERROR_BADMAGIC: return REG_INVARG;
|
||||
case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
|
||||
case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
|
||||
case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
|
||||
case PCRE_ERROR_BADUTF8: return REG_INVARG;
|
||||
case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
|
||||
case PCRE_ERROR_BADMODE: return REG_INVARG;
|
||||
default: return REG_ASSERT;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcreposix.c */
|
||||
@@ -1,146 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _PCREPOSIX_H
|
||||
#define _PCREPOSIX_H
|
||||
|
||||
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
|
||||
Compatible Regular Expression library. It defines the things POSIX says should
|
||||
be there. I hope.
|
||||
|
||||
Copyright (c) 1997-2012 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options, mostly defined by POSIX, but with some extras. */
|
||||
|
||||
#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
|
||||
#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
|
||||
#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
|
||||
#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
|
||||
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
|
||||
#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
|
||||
#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
|
||||
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
|
||||
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
|
||||
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
|
||||
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE_UCP */
|
||||
|
||||
/* This is not used by PCRE, but by defining it we make it easier
|
||||
to slot PCRE into existing programs that make POSIX calls. */
|
||||
|
||||
#define REG_EXTENDED 0
|
||||
|
||||
/* Error values. Not all these are relevant or used by the wrapper. */
|
||||
|
||||
enum {
|
||||
REG_ASSERT = 1, /* internal error ? */
|
||||
REG_BADBR, /* invalid repeat counts in {} */
|
||||
REG_BADPAT, /* pattern error */
|
||||
REG_BADRPT, /* ? * + invalid */
|
||||
REG_EBRACE, /* unbalanced {} */
|
||||
REG_EBRACK, /* unbalanced [] */
|
||||
REG_ECOLLATE, /* collation error - not relevant */
|
||||
REG_ECTYPE, /* bad class */
|
||||
REG_EESCAPE, /* bad escape sequence */
|
||||
REG_EMPTY, /* empty expression */
|
||||
REG_EPAREN, /* unbalanced () */
|
||||
REG_ERANGE, /* bad range inside [] */
|
||||
REG_ESIZE, /* expression too big */
|
||||
REG_ESPACE, /* failed to get memory */
|
||||
REG_ESUBREG, /* bad back reference */
|
||||
REG_INVARG, /* bad argument */
|
||||
REG_NOMATCH /* match failed */
|
||||
};
|
||||
|
||||
|
||||
/* The structure representing a compiled regular expression. */
|
||||
|
||||
typedef struct {
|
||||
void *re_pcre;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
} regex_t;
|
||||
|
||||
/* The structure in which a captured offset is returned. */
|
||||
|
||||
typedef int regoff_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so;
|
||||
regoff_t rm_eo;
|
||||
} regmatch_t;
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export settings are needed, and are set in pcreposix.c before including this
|
||||
file. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
|
||||
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
|
||||
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCREPOSIX_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCREPOSIX_EXP_DECL extern "C"
|
||||
# define PCREPOSIX_EXP_DEFN extern "C"
|
||||
# else
|
||||
# define PCREPOSIX_EXP_DECL extern
|
||||
# define PCREPOSIX_EXP_DEFN extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions */
|
||||
|
||||
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
|
||||
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
|
||||
regmatch_t *, int);
|
||||
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
|
||||
PCREPOSIX_EXP_DECL void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcreposix.h */
|
||||
BIN
ext/pcre/pcrelib/testdata/grepbinary
vendored
BIN
ext/pcre/pcrelib/testdata/grepbinary
vendored
Binary file not shown.
3
ext/pcre/pcrelib/testdata/grepfilelist
vendored
3
ext/pcre/pcrelib/testdata/grepfilelist
vendored
@@ -1,3 +0,0 @@
|
||||
testdata/grepinputv
|
||||
|
||||
testdata/grepinputx
|
||||
611
ext/pcre/pcrelib/testdata/grepinput
vendored
611
ext/pcre/pcrelib/testdata/grepinput
vendored
@@ -1,611 +0,0 @@
|
||||
This is a file of miscellaneous text that is used as test data for checking
|
||||
that the pcregrep command is working correctly. The file must be more than 24K
|
||||
long so that it needs more than a single read() call to process it. New
|
||||
features should be added at the end, because some of the tests involve the
|
||||
output of line numbers, and we don't want these to change.
|
||||
|
||||
PATTERN at the start of a line.
|
||||
In the middle of a line, PATTERN appears.
|
||||
|
||||
This pattern is in lower case.
|
||||
|
||||
Here follows a whole lot of stuff that makes the file over 24K long.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
|
||||
The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the
|
||||
lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox
|
||||
jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick
|
||||
brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
aaaaa0
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
bbbbbb
|
||||
cccccccccccccccccccccccccccccccccccccccccc
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
eeeee
|
||||
aaaaa2
|
||||
ffffffffff
|
||||
|
||||
This is a line before the binary zero.
|
||||
This line contains a binary zero here > | ||||