1
0
mirror of https://github.com/php/php-src.git synced 2026-03-30 20:22:36 +02:00

MFB: Replace ereg code with pcre, fix duplicate macro names and segfault. Patch by Mikko

This commit is contained in:
Scott MacVicar
2008-08-29 09:50:57 +00:00
parent a69b38f7e9
commit 83abc6c537
4 changed files with 226 additions and 70 deletions

View File

@@ -1118,7 +1118,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
m->mask_op = 0;
if (*l == '~') {
if (!IS_STRING(m->type))
if (!IS_LIBMAGIC_STRING(m->type))
m->mask_op |= FILE_OPINVERSE;
else if (ms->flags & MAGIC_CHECK)
file_magwarn(ms, "'~' invalid for string types");
@@ -1128,7 +1128,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
m->str_flags = 0;
m->num_mask = 0;
if ((op = get_op(*l)) != -1) {
if (!IS_STRING(m->type)) {
if (!IS_LIBMAGIC_STRING(m->type)) {
uint64_t val;
++l;
m->mask_op |= op;
@@ -2066,7 +2066,7 @@ bs1(struct magic *m)
m->offset = swap4((uint32_t)m->offset);
m->in_offset = swap4((uint32_t)m->in_offset);
m->lineno = swap4((uint32_t)m->lineno);
if (IS_STRING(m->type)) {
if (IS_LIBMAGIC_STRING(m->type)) {
m->str_range = swap4(m->str_range);
m->str_flags = swap4(m->str_flags);
}

View File

@@ -46,7 +46,11 @@
#ifdef HAVE_INTTYPES_H
#include <inttypes.h>
#endif
#include <regex.h>
#include "php.h"
#include "ext/standard/php_string.h"
#include "ext/pcre/php_pcre.h"
#include <sys/types.h>
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
@@ -165,7 +169,7 @@ struct magic {
#define FILE_LEDOUBLE 38
#define FILE_NAMES_SIZE 39/* size of array to contain all names */
#define IS_STRING(t) \
#define IS_LIBMAGIC_STRING(t) \
((t) == FILE_STRING || \
(t) == FILE_PSTRING || \
(t) == FILE_BESTRING16 || \

View File

@@ -74,7 +74,7 @@ file_mdump(struct magic *m)
if (m->mask_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
if (IS_STRING(m->type)) {
if (IS_LIBMAGIC_STRING(m->type)) {
if (m->str_flags) {
(void) fputc('/', stderr);
if (m->str_flags & STRING_COMPACT_BLANK)

View File

@@ -36,6 +36,10 @@
#include <stdlib.h>
#include <time.h>
#ifndef PREG_OFFSET_CAPTURE
# define PREG_OFFSET_CAPTURE (1<<8)
#endif
#ifndef lint
FILE_RCSID("@(#)$File: softmagic.c,v 1.117 2008/03/01 22:21:49 rrt Exp $")
@@ -281,25 +285,18 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
private int
check_fmt(struct magic_set *ms, struct magic *m)
{
regex_t rx = {0};
int rc;
pcre *pce;
int re_options;
pcre_extra *re_extra;
if (strchr(MAGIC_DESC, '%') == NULL) {
return 0;
}
rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
if (rc) {
char errmsg[512];
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
if ((pce = pcre_get_compiled_regex("~%[-0-9.]*s~", &re_extra, &re_options TSRMLS_CC)) == NULL) {
return -1;
} else {
regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
rc = regexec(&rx, MAGIC_DESC, rx.re_nsub + 1, pmatch, 0);
efree(pmatch);
regfree(&rx);
return !rc;
return !pcre_exec(pce, re_extra, MAGIC_DESC, strlen(MAGIC_DESC), 0, re_options, NULL, 0);
}
}
@@ -1488,6 +1485,66 @@ file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags)
return file_strncmp(a, b, len, flags);
}
private void
convert_libmagic_pattern(zval *pattern, int options)
{
int i, j=0;
char *t;
t = (char *) safe_emalloc(Z_STRLEN_P(pattern), 2, 5);
memset(t, '\0', sizeof(t));
t[j++] = '~';
for (i=0; i<Z_STRLEN_P(pattern); i++, j++) {
switch (Z_STRVAL_P(pattern)[i]) {
case '?':
t[j] = '.';
break;
case '*':
t[j++] = '.';
t[j] = '*';
break;
case '.':
t[j++] = '\\';
t[j] = '.';
break;
case '\\':
t[j++] = '\\';
t[j] = '\\';
break;
case '(':
t[j++] = '\\';
t[j] = '(';
break;
case ')':
t[j++] = '\\';
t[j] = ')';
break;
case '~':
t[j++] = '\\';
t[j] = '~';
break;
default:
t[j] = Z_STRVAL_P(pattern)[i];
break;
}
}
t[j++] = '~';
if (options & PCRE_CASELESS)
t[j++] = 'm';
if (options & PCRE_MULTILINE)
t[j++] = 'i';
t[j]=0;
Z_STRVAL_P(pattern) = t;
Z_STRLEN_P(pattern) = j;
}
private int
magiccheck(struct magic_set *ms, struct magic *m)
{
@@ -1642,61 +1699,156 @@ magiccheck(struct magic_set *ms, struct magic *m)
}
break;
}
case FILE_REGEX: {
int rc;
regex_t rx = {0};
char errmsg[512];
if (ms->search.s == NULL)
return 0;
l = 0;
rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NEWLINE|((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
if (rc) {
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
v = (uint64_t)-1;
} else {
regmatch_t *pmatch = (regmatch_t *)ecalloc(sizeof(regmatch_t), rx.re_nsub + 1);
#ifndef REG_STARTEND
#define REG_STARTEND 0
size_t l = ms->search.s_len - 1;
char c = ms->search.s[l];
((char *)(intptr_t)ms->search.s)[l] = '\0';
#else
pmatch[0].rm_so = 0;
pmatch[0].rm_eo = ms->search.s_len;
#endif
rc = regexec(&rx, (const char *)ms->search.s, 1, pmatch, REG_STARTEND);
#if REG_STARTEND == 0
((char *)(intptr_t)ms->search.s)[l] = c;
#endif
switch (rc) {
case 0:
ms->search.s += (int)pmatch[0].rm_so;
ms->search.offset += (size_t)pmatch[0].rm_so;
ms->search.rm_len = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
v = 0;
break;
case REG_NOMATCH:
v = 1;
break;
default:
(void)regerror(rc, &rx, errmsg, sizeof(errmsg));
file_magerror(ms, "regexec error %d, (%s)", rc, errmsg);
v = (uint64_t)-1;
break;
}
regfree(&rx);
efree(pmatch);
zval *pattern;
int options = 0;
pcre_cache_entry *pce;
MAKE_STD_ZVAL(pattern);
Z_STRVAL_P(pattern) = (char *)m->value.s;
Z_STRLEN_P(pattern) = m->vallen;
Z_TYPE_P(pattern) = IS_STRING;
options |= PCRE_MULTILINE;
if (m->str_flags & STRING_IGNORE_CASE) {
options |= PCRE_CASELESS;
}
if (v == (uint64_t)-1) {
convert_libmagic_pattern(pattern, options);
if ((pce = pcre_get_compiled_regex_cache(Z_STRVAL_P(pattern), Z_STRLEN_P(pattern) TSRMLS_CC)) == NULL) {
return -1;
} else {
/* pce now contains the compiled regex */
zval *retval;
zval *subpats;
char *haystack;
MAKE_STD_ZVAL(retval);
ALLOC_INIT_ZVAL(subpats);
/* Cut the search len from haystack, equals to REG_STARTEND */
haystack = estrndup(ms->search.s, ms->search.s_len);
/* match v = 0, no match v = 1 */
php_pcre_match_impl(pce, haystack, ms->search.s_len, retval, subpats, 1, 1, PREG_OFFSET_CAPTURE, 0 TSRMLS_CC);
/* Free haystack */
efree(haystack);
if (Z_LVAL_P(retval) < 0) {
zval_ptr_dtor(&subpats);
FREE_ZVAL(retval);
efree(Z_STRVAL_P(pattern));
efree(pattern);
return -1;
} else if ((Z_LVAL_P(retval) > 0) && (Z_TYPE_P(subpats) == IS_ARRAY)) {
/* Need to fetch global match which equals pmatch[0] */
HashTable *ht = Z_ARRVAL_P(subpats);
HashPosition outer_pos;
zval *pattern_match = NULL, *pattern_offset = NULL;
zend_hash_internal_pointer_reset_ex(ht, &outer_pos);
if (zend_hash_has_more_elements_ex(ht, &outer_pos) == SUCCESS &&
zend_hash_move_forward_ex(ht, &outer_pos)) {
zval **ppzval;
/* The first element (should be) is the global match
Need to move to the inner array to get the global match */
if (zend_hash_get_current_data_ex(ht, (void**)&ppzval, &outer_pos) != FAILURE) {
HashTable *inner_ht;
HashPosition inner_pos;
zval **match, **offset;
zval tmpcopy = **ppzval, matchcopy, offsetcopy;
zval_copy_ctor(&tmpcopy);
INIT_PZVAL(&tmpcopy);
inner_ht = Z_ARRVAL(tmpcopy);
/* If everything goes according to the master plan
tmpcopy now contains two elements:
0 = the match
1 = starting position of the match */
zend_hash_internal_pointer_reset_ex(inner_ht, &inner_pos);
if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
if (zend_hash_get_current_data_ex(inner_ht, (void**)&match, &inner_pos) != FAILURE) {
matchcopy = **match;
zval_copy_ctor(&matchcopy);
INIT_PZVAL(&matchcopy);
convert_to_string(&matchcopy);
MAKE_STD_ZVAL(pattern_match);
Z_STRVAL_P(pattern_match) = (char *)Z_STRVAL(matchcopy);
Z_STRLEN_P(pattern_match) = Z_STRLEN(matchcopy);
Z_TYPE_P(pattern_match) = IS_STRING;
zval_dtor(&matchcopy);
}
}
if (zend_hash_has_more_elements_ex(inner_ht, &inner_pos) == SUCCESS &&
zend_hash_move_forward_ex(inner_ht, &inner_pos)) {
if (zend_hash_get_current_data_ex(inner_ht, (void**)&offset, &inner_pos) != FAILURE) {
offsetcopy = **offset;
zval_copy_ctor(&offsetcopy);
INIT_PZVAL(&offsetcopy);
convert_to_long(&offsetcopy);
MAKE_STD_ZVAL(pattern_offset);
Z_LVAL_P(pattern_offset) = Z_LVAL(offsetcopy);
Z_TYPE_P(pattern_offset) = IS_LONG;
zval_dtor(&offsetcopy);
}
}
zval_dtor(&tmpcopy);
}
if ((pattern_match != NULL) && (pattern_offset != NULL)) {
ms->search.s += (int)Z_LVAL_P(pattern_offset); /* this is where the match starts */
ms->search.offset += (size_t)Z_LVAL_P(pattern_offset); /* this is where the match starts as size_t */
ms->search.rm_len = Z_STRLEN_P(pattern_match) /* This is the length of the matched pattern */;
v = 0;
efree(pattern_match);
efree(pattern_offset);
} else {
zval_ptr_dtor(&subpats);
FREE_ZVAL(retval);
efree(Z_STRVAL_P(pattern));
efree(pattern);
return -1;
}
}
} else {
v = 1;
}
zval_ptr_dtor(&subpats);
FREE_ZVAL(retval);
}
break;
efree(Z_STRVAL_P(pattern));
efree(pattern);
break;
}
default:
file_magerror(ms, "invalid type %d in magiccheck()", m->type);
return -1;