1
0
mirror of https://github.com/php/php-src.git synced 2026-03-28 10:12:18 +01:00
Files
archived-php-src/ext/fileinfo/magicdata.patch
Anatol Belski 2a062f3c02 Rework magic data once more
The change reduces the input size on patterns using posix character
classes. It is still better than reverting to the patterns from the
older version, as the upstream data uses posix classes also in the later
versions. The input reduction speeds up the pattern matching in some
cases.

This patch is functionally almost same as upstream, but might show some diff
when the input is very long. While the magic data in the newer versions is
functionally an improvement, with jit=0 it might show a performance regression.
The slowdown is negligible in the normal usage and is still acceptable for the
malicious input. If some functional regressions show up, this patch should be
reverted and the tests timing should be adapted instead.
2018-11-14 22:23:24 +01:00

251 lines
9.9 KiB
Diff

diff -u a/magic/Magdir/c-lang b/magic/Magdir/c-lang
--- a/magic/Magdir/c-lang 2017-08-14 09:40:38.000000000 +0200
+++ b/magic/Magdir/c-lang 2018-11-10 13:06:33.171398452 +0100
@@ -13,61 +13,61 @@
# C
# Check for class if include is found, otherwise class is beaten by include becouse of lowered strength
0 regex \^#include C
->0 regex \^class[[:space:]]+
+>0 regex/4096 \^class[[:space:]]+
>>&0 regex \\{[\.\*]\\}(;)?$ \b++
>&0 clear x source text
!:strength + 13
!:mime text/x-c
-0 regex \^#[[:space:]]*pragma C source text
+0 regex/4096 \^#[[:space:]]*pragma C source text
!:mime text/x-c
-0 regex \^#[[:space:]]*(if\|ifn)def
->&0 regex \^#[[:space:]]*endif$ C source text
+0 regex/4096 \^#[[:space:]]*(if\|ifn)def
+>&0 regex/4096 \^#[[:space:]]*endif$ C source text
!:mime text/x-c
-0 regex \^#[[:space:]]*(if\|ifn)def
->&0 regex \^#[[:space:]]*define C source text
+0 regex/4096 \^#[[:space:]]*(if\|ifn)def
+>&0 regex/4096 \^#[[:space:]]*define C source text
!:mime text/x-c
-0 regex \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
+0 regex/4096 \^[[:space:]]*char(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
!:mime text/x-c
-0 regex \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
+0 regex/4096 \^[[:space:]]*double(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
!:mime text/x-c
-0 regex \^[[:space:]]*extern[[:space:]]+ C source text
+0 regex/4096 \^[[:space:]]*extern[[:space:]]+ C source text
!:mime text/x-c
-0 regex \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
+0 regex/4096 \^[[:space:]]*float(\ \\*|\\*)(.+)(=.*)?;[[:space:]]*$ C source text
!:mime text/x-c
-0 regex \^struct[[:space:]]+ C source text
+0 regex/4096 \^struct[[:space:]]+ C source text
!:mime text/x-c
-0 regex \^union[[:space:]]+ C source text
+0 regex/4096 \^union[[:space:]]+ C source text
!:mime text/x-c
0 search/8192 main(
->&0 regex \\)[[:space:]]*\\{ C source text
+>&0 regex/4096 \\)[[:space:]]*\\{ C source text
!:mime text/x-c
# C++
# The strength of these rules is increased so they beat the C rules above
-0 regex \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{ C++ source text
+0 regex/4096 \^namespace[[:space:]]+[_[:alpha:]]{1,30}[[:space:]]*\\{ C++ source text
!:strength + 30
!:mime text/x-c++
# using namespace [namespace] or using std::[lib]
-0 regex \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*; C++ source text
+0 regex/4096 \^using[[:space:]]+(namespace\ )?std(::)?[[:alpha:]]*[[:space:]]*; C++ source text
!:strength + 30
!:mime text/x-c++
-0 regex \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$ C++ source text
+0 regex/4096 \^[[:space:]]*template[[:space:]]*<.*>[[:space:]]*$ C++ source text
!:strength + 30
!:mime text/x-c++
-0 regex \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$ C++ source text
+0 regex/4096 \^[[:space:]]*virtual[[:space:]]+.*[};][[:space:]]*$ C++ source text
!:strength + 30
!:mime text/x-c++
# But class alone is reduced to avoid beating php (Jens Schleusener)
-0 regex \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$ C++ source text
+0 regex/4096 \^[[:space:]]*class[[:space:]]+[[:digit:][:alpha:]:_]+[[:space:]]*\\{(.*[\n]*)*\\}(;)?$ C++ source text
!:strength + 13
!:mime text/x-c++
-0 regex \^[[:space:]]*public: C++ source text
+0 regex/4096 \^[[:space:]]*public: C++ source text
!:strength + 30
!:mime text/x-c++
-0 regex \^[[:space:]]*private: C++ source text
+0 regex/4096 \^[[:space:]]*private: C++ source text
!:strength + 30
!:mime text/x-c++
-0 regex \^[[:space:]]*protected: C++ source text
+0 regex/4096 \^[[:space:]]*protected: C++ source text
!:strength + 30
!:mime text/x-c++
diff -u a/magic/Magdir/elf b/magic/Magdir/elf
--- a/magic/Magdir/elf 2018-03-11 01:46:42.000000000 +0100
+++ b/magic/Magdir/elf 2018-11-10 12:27:29.858619326 +0100
@@ -48,9 +48,8 @@
!:mime application/x-object
>16 leshort 2 executable,
!:mime application/x-executable
->16 leshort 3 ${x?pie executable:shared object}
-
-!:mime application/x-${x?pie-executable:sharedlib}
+>16 leshort 3 shared object,
+!:mime application/x-sharedlib
>16 leshort 4 core file
!:mime application/x-coredump
# Core file detection is not reliable.
diff -u a/magic/Magdir/fsav b/magic/Magdir/fsav
--- a/magic/Magdir/fsav 2017-03-17 22:35:28.000000000 +0100
+++ b/magic/Magdir/fsav 2018-11-10 12:27:29.858619326 +0100
@@ -48,13 +48,15 @@
>11 string >\0 Clam AntiVirus database %-.23s
>>34 string :
>>>35 string !: \b, version
->>>>35 string x \b%-.1s
->>>>>36 string !:
+>>>>35 string x \b %-.1s
+>>>>>36 string !:
>>>>>>36 string x \b%-.1s
>>>>>>>37 string !:
>>>>>>>>37 string x \b%-.1s
>>>>>>>>>38 string !:
>>>>>>>>>>38 string x \b%-.1s
+>>>>>>>>>>>39 string !:
+>>>>>>>>>>>>39 string x \b%-.1s
>512 string \037\213 \b, gzipped
>769 string ustar\0 \b, tarred
diff -u a/magic/Magdir/images b/magic/Magdir/images
--- a/magic/Magdir/images 2018-03-11 01:46:42.000000000 +0100
+++ b/magic/Magdir/images 2018-11-10 12:27:29.858619326 +0100
@@ -464,7 +464,9 @@
!:mime image/x-unknown
# GIF
+# Strength set up to beat 0x55AA DOS/MBR signature word lookups (+65)
0 string GIF8 GIF image data
+!:strength +80
!:mime image/gif
!:apple 8BIMGIFf
>4 string 7a \b, version 8%s,
Only in b/magic/Magdir/: images.orig
diff -u a/magic/Magdir/linux b/magic/Magdir/linux
--- a/magic/Magdir/linux 2017-03-17 22:35:28.000000000 +0100
+++ b/magic/Magdir/linux 2018-11-10 12:27:29.862619309 +0100
@@ -94,6 +94,16 @@
# From Daniel Novotny <dnovotny@redhat.com>
# swap file for PowerPC
65526 string SWAPSPACE2 Linux/ppc swap file
+>0x400 long x version %d,
+>0x404 long x size %d pages,
+>1052 string \0 no label,
+>1052 string >\0 LABEL=%s,
+>0x40c belong x UUID=%08x
+>0x410 beshort x \b-%04x
+>0x412 beshort x \b-%04x
+>0x414 beshort x \b-%04x
+>0x416 belong x \b-%08x
+>0x41a beshort x \b%04x
16374 string SWAPSPACE2 Linux/ia64 swap file
#
# Linux kernel boot images, from Albert Cahalan <acahalan@cs.uml.edu>
diff -u a/magic/Magdir/python b/magic/Magdir/python
--- a/magic/Magdir/python 2017-08-14 09:40:38.000000000 +0200
+++ b/magic/Magdir/python 2018-11-10 13:06:36.799268528 +0100
@@ -63,7 +63,7 @@
!:mime text/x-python
# import module [as abrev]
-0 regex \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
+0 regex/4096 \^import\ [_[:alpha:]]+\ as\ [[:alpha:]][[:space:]]*$ Python script text executable
!:mime text/x-python
# comments
@@ -79,7 +79,7 @@
# except: or finally:
# block
0 search/4096 try:
->&0 regex \^[[:space:]]*except.*:$ Python script text executable
+>&0 regex/4096 \^[[:space:]]*except.*:$ Python script text executable
!:strength + 15
!:mime text/x-python
>&0 search/4096 finally: Python script text executable
@@ -91,7 +91,7 @@
!:mime text/x-python
# def name(*args, **kwargs):
-0 regex \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
+0 regex/4096 \^[[:space:]]{0,50}def\ {1,50}[_a-zA-Z]{1,100}
>&0 regex \\(([[:alpha:]*_,\ ]){0,255}\\):$ Python script text executable
!:strength + 15
!:mime text/x-python
diff -u a/magic/Magdir/rpm b/magic/Magdir/rpm
--- a/magic/Magdir/rpm 2013-01-11 17:45:23.000000000 +0100
+++ b/magic/Magdir/rpm 2018-11-10 12:27:29.858619326 +0100
@@ -29,6 +29,7 @@
>>8 beshort 17 SuperH
>>8 beshort 18 Xtensa
>>8 beshort 255 noarch
+>>10 string x %s
#delta RPM Daniel Novotny (dnovotny@redhat.com)
0 string drpm Delta RPM
diff -u a/magic/Magdir/ruby b/magic/Magdir/ruby
--- a/magic/Magdir/ruby 2017-08-14 15:39:18.000000000 +0200
+++ b/magic/Magdir/ruby 2018-11-10 13:06:28.703557761 +0100
@@ -22,30 +22,30 @@
# What looks like ruby, but does not have a shebang
# (modules and such)
# From: Lubomir Rintel <lkundrak@v3.sk>
-0 regex \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
+0 regex/4096 \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+'
>0 regex def\ [a-z]|\ do$
->>&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
+>>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
!:strength + 30
!:mime text/x-ruby
-0 regex \^[[:space:]]*(class|module)[[:space:]][A-Z]
+0 regex/4096 \^[[:space:]]*(class|module)[[:space:]][A-Z]
>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
->>&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
+>>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
!:strength + 30
!:mime text/x-ruby
# Classes with no modules or defs, beats simple ASCII
-0 regex \^[[:space:]]*(class|module)[[:space:]][A-Z]
->&0 regex \^[[:space:]]*end([[:space:]]+[;#if].*)?$ Ruby script text
+0 regex/4096 \^[[:space:]]*(class|module)[[:space:]][A-Z]
+>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#if].*)?$ Ruby script text
!:strength + 10
!:mime text/x-ruby
# Looks for function definition to balance python magic
# def name (args)
# end
-0 regex \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
->&0 regex \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
+0 regex/4096 \^[[:space:]]*def\ [a-z]|def\ [[:alpha:]]+::[a-z]
+>&0 regex/4096 \^[[:space:]]*end([[:space:]]+[;#].*)?$ Ruby script text
!:strength + 10
!:mime text/x-ruby
-0 regex \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+' Ruby script text
+0 regex/4096 \^[[:space:]]*require[[:space:]]'[A-Za-z_/]+' Ruby script text
!:mime text/x-ruby
-0 regex \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+ Ruby script text
+0 regex/4096 \^[[:space:]]*include\ ([A-Z]+[a-z]*(::))+ Ruby script text
!:mime text/x-ruby
diff -u a/magic/Magdir/securitycerts b/magic/Magdir/securitycerts
--- a/magic/Magdir/securitycerts 2009-09-19 18:28:12.000000000 +0200
+++ b/magic/Magdir/securitycerts 2018-11-10 12:27:29.858619326 +0100
@@ -4,3 +4,5 @@
0 search/1 -----BEGIN\ CERTIFICATE------ RFC1421 Security Certificate text
0 search/1 -----BEGIN\ NEW\ CERTIFICATE RFC1421 Security Certificate Signing Request text
0 belong 0xedfeedfe Sun 'jks' Java Keystore File data
+
+0 string \0volume_key volume_key escrow packet