mirror of
https://github.com/php/php-src.git
synced 2026-03-28 10:12:18 +01:00
The change reduces the input size on patterns using posix character classes. It is still better than reverting to the patterns from the older version, as the upstream data uses posix classes also in the later versions. The input reduction speeds up the pattern matching in some cases. This patch is functionally almost same as upstream, but might show some diff when the input is very long. While the magic data in the newer versions is functionally an improvement, with jit=0 it might show a performance regression. The slowdown is negligible in the normal usage and is still acceptable for the malicious input. If some functional regressions show up, this patch should be reverted and the tests timing should be adapted instead.
251 lines
9.9 KiB
Diff
251 lines
9.9 KiB
Diff
diff -u a/magic/Magdir/c-lang b/magic/Magdir/c-lang
|
|
--- a/magic/Magdir/c-lang 2017-08-14 09:40:38.000000000 +0200
|
|
+++ b/magic/Magdir/c-lang 2018-11-10 13:06:33.171398452 +0100
|
|
@@ -13,61 +13,61 @@
|
|
# C
|
|
# Check for class if include is found, otherwise class is beaten by include becouse of lowered strength
|
|
0 regex \^#include C
|
|
->0 regex \^class[[:space:]]+
|
|
+>0 regex/4096 \^class[[:space:]]+
|
|
>>&0 regex \\{[\.\*]\\}(;)?$ \b++
|
|
>&0 clear x source text
|
|
!:strength + 13
|
|
!:mime text/x-c
|
|
-0 regex \^#[[:space:]]*pragma C source text
|
|
+0 regex/4096 \^#[[:space:]]*pragma C source text
|
|
!:mime text/x-c
|
|
-0 regex \^#[[:space:]]*(if\|ifn)def
|
|
->&0 regex \^#[[:space:]]*endif$ C source text
|
|
+0 regex/4096 \^#[[:space:]]*(if\|ifn)def
|
|
+>&0 regex/4096 \^#[[:space:]]*endif$ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^#[[:space:]]*(if\|ifn)def
|
|
->&0 regex \^#[[:space:]]*define C source text
|
|
+0 regex/4096 \^#[[:space:]]*(if\|ifn)def
|
|
+>&0 regex/4096 \^#[[:space:]]*define C source text
|
|
!:mime text/x-c
|
|
-0 regex \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
+0 regex/4096 \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
+0 regex/4096 \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^[[:space:]]*extern[[:space:]]+ C source text
|
|
+0 regex/4096 \^[[:space:]]*extern[[:space:]]+ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
+0 regex/4096 \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^struct[[:space:]]+ C source text
|
|
+0 regex/4096 \^struct[[:space:]]+ C source text
|
|
!:mime text/x-c
|
|
-0 regex \^union[[:space:]]+ C source text
|
|
+0 regex/4096 \^union[[:space:]]+ C source text
|
|
!:mime text/x-c
|
|
0 search/8192 main(
|
|
->&0 regex \\)[[:space:]]*\\{ C source text
|
|
+>&0 regex/4096 \\)[[:space:]]*\\{ C source text
|
|
!:mime text/x-c
|
|
|
|
# C++
|
|
# The strength of these rules is increased so they beat the C rules above
|
|
-0 regex \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{ C++ source text
|
|
+0 regex/4096 \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{ C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
# using namespace [namespace] or using std::[lib]
|
|
-0 regex \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*; C++ source text
|
|
+0 regex/4096 \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*; C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
-0 regex \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$ C++ source text
|
|
+0 regex/4096 \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$ C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
-0 regex \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$ C++ source text
|
|
+0 regex/4096 \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$ C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
# But class alone is reduced to avoid beating php (Jens Schleusener)
|
|
-0 regex \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$ C++ source text
|
|
+0 regex/4096 \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$ C++ source text
|
|
!:strength + 13
|
|
!:mime text/x-c++
|
|
-0 regex \^[[:space:]]*public: C++ source text
|
|
+0 regex/4096 \^[[:space:]]*public: C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
-0 regex \^[[:space:]]*private: C++ source text
|
|
+0 regex/4096 \^[[:space:]]*private: C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
-0 regex \^[[:space:]]*protected: C++ source text
|
|
+0 regex/4096 \^[[:space:]]*protected: C++ source text
|
|
!:strength + 30
|
|
!:mime text/x-c++
|
|
|
|
diff -u a/magic/Magdir/elf b/magic/Magdir/elf
|
|
--- a/magic/Magdir/elf 2018-03-11 01:46:42.000000000 +0100
|
|
+++ b/magic/Magdir/elf 2018-11-10 12:27:29.858619326 +0100
|
|
@@ -48,9 +48,8 @@
|
|
!:mime application/x-object
|
|
>16 leshort 2 executable,
|
|
!:mime application/x-executable
|
|
->16 leshort 3 ${x?pie executable:shared object}
|
|
-
|
|
-!:mime application/x-${x?pie-executable:sharedlib}
|
|
+>16 leshort 3 shared object,
|
|
+!:mime application/x-sharedlib
|
|
>16 leshort 4 core file
|
|
!:mime application/x-coredump
|
|
# Core file detection is not reliable.
|
|
diff -u a/magic/Magdir/fsav b/magic/Magdir/fsav
|
|
--- a/magic/Magdir/fsav 2017-03-17 22:35:28.000000000 +0100
|
|
+++ b/magic/Magdir/fsav 2018-11-10 12:27:29.858619326 +0100
|
|
@@ -48,13 +48,15 @@
|
|
>11 string >\0 Clam AntiVirus database %-.23s
|
|
>>34 string :
|
|
>>>35 string !: \b, version
|
|
->>>>35 string x \b%-.1s
|
|
->>>>>36 string !:
|
|
+>>>>35 string x \b %-.1s
|
|
+>>>>>36 string !:
|
|
>>>>>>36 string x \b%-.1s
|
|
>>>>>>>37 string !:
|
|
>>>>>>>>37 string x \b%-.1s
|
|
>>>>>>>>>38 string !:
|
|
>>>>>>>>>>38 string x \b%-.1s
|
|
+>>>>>>>>>>>39 string !:
|
|
+>>>>>>>>>>>>39 string x \b%-.1s
|
|
>512 string \037\213 \b, gzipped
|
|
>769 string ustar\0 \b, tarred
|
|
|
|
diff -u a/magic/Magdir/images b/magic/Magdir/images
|
|
--- a/magic/Magdir/images 2018-03-11 01:46:42.000000000 +0100
|
|
+++ b/magic/Magdir/images 2018-11-10 12:27:29.858619326 +0100
|
|
@@ -464,7 +464,9 @@
|
|
!:mime image/x-unknown
|
|
|
|
# GIF
|
|
+# Strength set up to beat 0x55AA DOS/MBR signature word lookups (+65)
|
|
0 string GIF8 GIF image data
|
|
+!:strength +80
|
|
!:mime image/gif
|
|
!:apple 8BIMGIFf
|
|
>4 string 7a \b, version 8%s,
|
|
Only in b/magic/Magdir/: images.orig
|
|
diff -u a/magic/Magdir/linux b/magic/Magdir/linux
|
|
--- a/magic/Magdir/linux 2017-03-17 22:35:28.000000000 +0100
|
|
+++ b/magic/Magdir/linux 2018-11-10 12:27:29.862619309 +0100
|
|
@@ -94,6 +94,16 @@
|
|
# From Daniel Novotny <dnovotny@redhat.com>
|
|
# swap file for PowerPC
|
|
65526 string SWAPSPACE2 Linux/ppc swap file
|
|
+>0x400 long x version %d,
|
|
+>0x404 long x size %d pages,
|
|
+>1052 string \0 no label,
|
|
+>1052 string >\0 LABEL=%s,
|
|
+>0x40c belong x UUID=%08x
|
|
+>0x410 beshort x \b-%04x
|
|
+>0x412 beshort x \b-%04x
|
|
+>0x414 beshort x \b-%04x
|
|
+>0x416 belong x \b-%08x
|
|
+>0x41a beshort x \b%04x
|
|
16374 string SWAPSPACE2 Linux/ia64 swap file
|
|
#
|
|
# Linux kernel boot images, from Albert Cahalan <acahalan@cs.uml.edu>
|
|
diff -u a/magic/Magdir/python b/magic/Magdir/python
|
|
--- a/magic/Magdir/python 2017-08-14 09:40:38.000000000 +0200
|
|
+++ b/magic/Magdir/python 2018-11-10 13:06:36.799268528 +0100
|
|
@@ -63,7 +63,7 @@
|
|
!:mime text/x-python
|
|
|
|
# import module [as abrev]
|
|
-0 regex \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
|
|
+0 regex/4096 \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
|
|
!:mime text/x-python
|
|
|
|
# comments
|
|
@@ -79,7 +79,7 @@
|
|
# except: or finally:
|
|
# block
|
|
0 search/4096 try:
|
|
->&0 regex \^[[:space:]]*except.*:$ Python script text executable
|
|
+>&0 regex/4096 \^[[:space:]]*except.*:$ Python script text executable
|
|
!:strength + 15
|
|
!:mime text/x-python
|
|
>&0 search/4096 finally: Python script text executable
|
|
@@ -91,7 +91,7 @@
|
|
!:mime text/x-python
|
|
|
|
# def name(*args, **kwargs):
|
|
-0 regex \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
|
|
+0 regex/4096 \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
|
|
>&0 regex \\(([[:alpha:]*_,\ ]){0,255}\\):$ Python script text executable
|
|
!:strength + 15
|
|
!:mime text/x-python
|
|
diff -u a/magic/Magdir/rpm b/magic/Magdir/rpm
|
|
--- a/magic/Magdir/rpm 2013-01-11 17:45:23.000000000 +0100
|
|
+++ b/magic/Magdir/rpm 2018-11-10 12:27:29.858619326 +0100
|
|
@@ -29,6 +29,7 @@
|
|
>>8 beshort 17 SuperH
|
|
>>8 beshort 18 Xtensa
|
|
>>8 beshort 255 noarch
|
|
+>>10 string x %s
|
|
|
|
#delta RPM Daniel Novotny (dnovotny@redhat.com)
|
|
0 string drpm Delta RPM
|
|
diff -u a/magic/Magdir/ruby b/magic/Magdir/ruby
|
|
--- a/magic/Magdir/ruby 2017-08-14 15:39:18.000000000 +0200
|
|
+++ b/magic/Magdir/ruby 2018-11-10 13:06:28.703557761 +0100
|
|
@@ -22,30 +22,30 @@
|
|
# What looks like ruby, but does not have a shebang
|
|
# (modules and such)
|
|
# From: Lubomir Rintel <lkundrak@v3.sk>
|
|
-0 regex \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
|
|
+0 regex/4096 \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
|
|
>0 regex def\ [a-z]|\ do$
|
|
->>&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
+>>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
!:strength + 30
|
|
!:mime text/x-ruby
|
|
-0 regex \^[[:space:]]*(class|module)[[:space:]][A-Z]
|
|
+0 regex/4096 \^[[:space:]]*(class|module)[[:space:]][A-Z]
|
|
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
|
|
->>&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
+>>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
!:strength + 30
|
|
!:mime text/x-ruby
|
|
# Classes with no modules or defs, beats simple ASCII
|
|
-0 regex \^[[:space:]]*(class|module)[[:space:]][A-Z]
|
|
->&0 regex \^[[:space:]]*end([[:space:]]+[;#if].*)?$ Ruby script text
|
|
+0 regex/4096 \^[[:space:]]*(class|module)[[:space:]][A-Z]
|
|
+>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#if].*)?$ Ruby script text
|
|
!:strength + 10
|
|
!:mime text/x-ruby
|
|
# Looks for function definition to balance python magic
|
|
# def name (args)
|
|
# end
|
|
-0 regex \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
|
|
->&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
+0 regex/4096 \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
|
|
+>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
|
|
!:strength + 10
|
|
!:mime text/x-ruby
|
|
|
|
-0 regex \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+' Ruby script text
|
|
+0 regex/4096 \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+' Ruby script text
|
|
!:mime text/x-ruby
|
|
-0 regex \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+ Ruby script text
|
|
+0 regex/4096 \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+ Ruby script text
|
|
!:mime text/x-ruby
|
|
diff -u a/magic/Magdir/securitycerts b/magic/Magdir/securitycerts
|
|
--- a/magic/Magdir/securitycerts 2009-09-19 18:28:12.000000000 +0200
|
|
+++ b/magic/Magdir/securitycerts 2018-11-10 12:27:29.858619326 +0100
|
|
@@ -4,3 +4,5 @@
|
|
0 search/1 -----BEGIN\ CERTIFICATE------ RFC1421 Security Certificate text
|
|
0 search/1 -----BEGIN\ NEW\ CERTIFICATE RFC1421 Security Certificate Signing Request text
|
|
0 belong 0xedfeedfe Sun 'jks' Java Keystore File data
|
|
+
|
|
+0 string \0volume_key volume_key escrow packet
|