mirror of
https://github.com/php/php-src.git
synced 2026-04-29 03:03:26 +02:00
3e7acf901d
mbstring had an 'identify filter' for almost every supported text encoding which was used when auto-detecting the most likely encoding for a string. It would run over the string and set a 'flag' if it saw anything which did not appear likely to be the encoding in question. One problem with this scheme was that encodings which merely appeared less likely to be the correct one were completely rejected, even if there was no better candidate. Another problem was that the 'identify filters' had a huge amount of code duplication with the 'conversion filters'. Eliminate the identify filters. Instead, when auto-detecting text encoding, use conversion filters to see whether the input string is valid in candidate encodings or not. At the same type, watch the type of codepoints which the string decodes to and mark it as less likely if non-printable characters (ESC, form feed, bell, etc.) or 'private use area' codepoints are seen. Interestingly, one old test case in which JIS text was misidentified as UTF-8 (and this wrong behavior was enshrined in the test) was 'fixed' and the JIS string is now auto-detected as JIS.
73 lines
3.6 KiB
JavaScript
73 lines
3.6 KiB
JavaScript
// vim:ft=javascript
|
|
|
|
ARG_ENABLE("mbstring", "multibyte string functions", "no");
|
|
ARG_ENABLE("mbregex", "multibyte regex support", "no");
|
|
|
|
if (PHP_MBSTRING != "no") {
|
|
|
|
if (CHECK_HEADER_ADD_INCLUDE("mbstring.h", "CFLAGS_MBSTRING", PHP_MBSTRING + ";" + PHP_PHP_BUILD + "\\include")) {
|
|
EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", PHP_MBSTRING_SHARED);
|
|
|
|
STDOUT.WriteLine("Using bundled libmbfl...");
|
|
|
|
ADD_FLAG("CFLAGS_MBSTRING", "-Iext/mbstring -Iext/mbstring/libmbfl -Iext/mbstring/libmbfl/mbfl \
|
|
/D HAVE_STRICMP /D MBFL_DLL_EXPORT=1 /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1")
|
|
|
|
FSO.CopyFile("ext\\mbstring\\libmbfl\\config.h.w32",
|
|
"ext\\mbstring\\libmbfl\\config.h", true);
|
|
|
|
ADD_SOURCES("ext/mbstring/libmbfl/filters", "html_entities.c \
|
|
mbfilter_7bit.c mbfilter_ascii.c mbfilter_base64.c mbfilter_big5.c \
|
|
mbfilter_cp1251.c mbfilter_cp1252.c \
|
|
mbfilter_cp866.c mbfilter_cp932.c mbfilter_cp936.c mbfilter_cp51932.c \
|
|
mbfilter_euc_cn.c mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \
|
|
mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \
|
|
mbfilter_iso8859_1.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c \
|
|
mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c \
|
|
mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
|
|
mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
|
|
mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
|
|
mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c mbfilter_sjis_2004.c \
|
|
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
|
|
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
|
|
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c mbfilter_utf8_mobile.c \
|
|
mbfilter_koi8u.c mbfilter_cp1254.c mbfilter_euc_jp_2004.c \
|
|
mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c \
|
|
mbfilter_cp5022x.c mbfilter_sjis_open.c mbfilter_sjis_mobile.c \
|
|
mbfilter_sjis_mac.c \
|
|
mbfilter_iso2022jp_2004.c mbfilter_iso2022jp_mobile.c \
|
|
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");
|
|
|
|
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
|
|
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
|
|
mbfl_filter_output.c mbfl_language.c mbfl_memory_device.c \
|
|
mbfl_string.c", "mbstring");
|
|
|
|
ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \
|
|
nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \
|
|
nls_ua.c nls_tr.c", "mbstring");
|
|
|
|
PHP_INSTALL_HEADERS("ext/mbstring", "mbstring.h libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h");
|
|
|
|
AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support');
|
|
|
|
if (PHP_MBREGEX != "no") {
|
|
if (CHECK_HEADER_ADD_INCLUDE("oniguruma.h", "CFLAGS_MBSTRING", PHP_MBREGEX) &&
|
|
CHECK_LIB("onig_a.lib;libonig_a.lib", "mbstring", PHP_MBSTRING)) {
|
|
AC_DEFINE('HAVE_MBREGEX', 1);
|
|
|
|
/* XXX libonig is only usable as a static library ATM, code change required to link with a DLL. */
|
|
ADD_FLAG("CFLAGS_MBSTRING", "/DONIG_EXTERN=extern /DPHP_ONIG_BAD_KOI8_ENTRY=1 /DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
|
|
|
|
ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring");
|
|
PHP_INSTALL_HEADERS("ext/mbstring", "php_mbregex.h php_onig_compat.h");
|
|
} else {
|
|
WARNING("mbregex not enabled; libraries and headers not found");
|
|
}
|
|
}
|
|
|
|
} else {
|
|
WARNING("mbstring not enabled; libraries and headers not found");
|
|
}
|
|
}
|