mirror of
https://github.com/php/php-src.git
synced 2026-04-27 18:23:26 +02:00
This commit was manufactured by cvs2svn to create branch 'PHP_5_0'.
This commit is contained in:
+136
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
* "streamable kanji code filter and converter"
|
||||
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
|
||||
*
|
||||
* LICENSE NOTICES
|
||||
*
|
||||
* This file is part of "streamable kanji code filter and converter",
|
||||
* which is distributed under the terms of GNU Lesser General Public
|
||||
* License (version 2) as published by the Free Software Foundation.
|
||||
*
|
||||
* This software is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with "streamable kanji code filter and converter";
|
||||
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
||||
* Suite 330, Boston, MA 02111-1307 USA
|
||||
*
|
||||
* The author of this file:
|
||||
*
|
||||
*/
|
||||
/*
|
||||
* The source code included in this files was separated from mbfilter.c
|
||||
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "mbfilter.h"
|
||||
#include "mbfilter_iso8859_16.h"
|
||||
#include "unicode_table_iso8859_16.h"
|
||||
|
||||
static const char *mbfl_encoding_8859_16_aliases[] = {"ISO_8859-16", NULL};
|
||||
|
||||
const mbfl_encoding mbfl_encoding_8859_16 = {
|
||||
mbfl_no_encoding_8859_16,
|
||||
"ISO-8859-16",
|
||||
"ISO-8859-16",
|
||||
(const char *(*)[])&mbfl_encoding_8859_16_aliases,
|
||||
NULL,
|
||||
MBFL_ENCTYPE_SBCS
|
||||
};
|
||||
|
||||
const struct mbfl_identify_vtbl vtbl_identify_8859_16 = {
|
||||
mbfl_no_encoding_8859_16,
|
||||
mbfl_filt_ident_common_ctor,
|
||||
mbfl_filt_ident_common_dtor,
|
||||
mbfl_filt_ident_true
|
||||
};
|
||||
|
||||
const struct mbfl_convert_vtbl vtbl_8859_16_wchar = {
|
||||
mbfl_no_encoding_8859_16,
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_8859_16_wchar,
|
||||
mbfl_filt_conv_common_flush
|
||||
};
|
||||
|
||||
const struct mbfl_convert_vtbl vtbl_wchar_8859_16 = {
|
||||
mbfl_no_encoding_wchar,
|
||||
mbfl_no_encoding_8859_16,
|
||||
mbfl_filt_conv_common_ctor,
|
||||
mbfl_filt_conv_common_dtor,
|
||||
mbfl_filt_conv_wchar_8859_16,
|
||||
mbfl_filt_conv_common_flush
|
||||
};
|
||||
|
||||
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
|
||||
|
||||
/*
|
||||
* ISO-8859-16 => wchar
|
||||
*/
|
||||
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int s;
|
||||
|
||||
if (c >= 0 && c < 0xa0) {
|
||||
s = c;
|
||||
} else if (c >= 0xa0 && c < 0x100) {
|
||||
s = iso8859_16_ucs_table[c - 0xa0];
|
||||
if (s <= 0) {
|
||||
s = c;
|
||||
s &= MBFL_WCSPLANE_MASK;
|
||||
s |= MBFL_WCSPLANE_8859_16;
|
||||
}
|
||||
} else {
|
||||
s = c;
|
||||
s &= MBFL_WCSGROUP_MASK;
|
||||
s |= MBFL_WCSGROUP_THROUGH;
|
||||
}
|
||||
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* wchar => ISO-8859-16
|
||||
*/
|
||||
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter)
|
||||
{
|
||||
int s, n;
|
||||
|
||||
if (c >= 0 && c < 0xa0) {
|
||||
s = c;
|
||||
} else {
|
||||
s = -1;
|
||||
n = 95;
|
||||
while (n >= 0) {
|
||||
if (c == iso8859_16_ucs_table[n]) {
|
||||
s = 0xa0 + n;
|
||||
break;
|
||||
}
|
||||
n--;
|
||||
}
|
||||
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_16) {
|
||||
s = c & MBFL_WCSPLANE_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
if (s >= 0) {
|
||||
CK((*filter->output_function)(s, filter->data));
|
||||
} else {
|
||||
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
|
||||
CK(mbfl_filt_conv_illegal_output(c, filter));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* COPYRIGHT NOTICE
|
||||
*
|
||||
* This file is a portion of "streamable kanji code filter and converter"
|
||||
* library, which is distributed under GNU Lesser General Public License
|
||||
* version 2.1.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MBFL_MBFILTER_ISO8859_16_H
|
||||
#define MBFL_MBFILTER_ISO8859_16_H
|
||||
|
||||
#include "mbfilter.h"
|
||||
|
||||
extern const mbfl_encoding mbfl_encoding_8859_16;
|
||||
extern const struct mbfl_identify_vtbl vtbl_identify_8859_16;
|
||||
extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar;
|
||||
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16;
|
||||
|
||||
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter);
|
||||
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter);
|
||||
|
||||
#endif /* MBFL_MBFILTER_ISO8859_16_H */
|
||||
Executable
+42
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/awk -f
|
||||
#
|
||||
# $Id$
|
||||
#
|
||||
# Description: a script that generates a single byte code set to Unicode
|
||||
# mapping table.
|
||||
#
|
||||
BEGIN {
|
||||
FS="[ \t#]"
|
||||
}
|
||||
|
||||
/^#/ {
|
||||
# Do nothing
|
||||
}
|
||||
|
||||
{
|
||||
tbl[$1 + 0] = $2
|
||||
}
|
||||
|
||||
END {
|
||||
print "/* This file is automatically generated. Do not edit! */"
|
||||
if (IFNDEF_NAME) {
|
||||
print "#ifndef " IFNDEF_NAME
|
||||
}
|
||||
|
||||
print "static const unsigned int " TABLE_NAME "[] = {"
|
||||
i = 160;
|
||||
for (;;) {
|
||||
printf("\t0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x", tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++]);
|
||||
if (i != 256) {
|
||||
printf(",\n");
|
||||
} else {
|
||||
print
|
||||
break;
|
||||
}
|
||||
}
|
||||
print "};"
|
||||
|
||||
if (IFNDEF_NAME) {
|
||||
print "#endif /* " IFNDEF_NAME " */"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
/* This file is automatically generated. Do not edit! */
|
||||
#ifndef UNICODE_TABLEISO8859_16_H
|
||||
static const unsigned int iso8859_16_ucs_table[] = {
|
||||
0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
|
||||
0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
|
||||
0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
|
||||
0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
|
||||
0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
|
||||
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
|
||||
0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
|
||||
0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
|
||||
0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
|
||||
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
|
||||
0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
|
||||
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff
|
||||
};
|
||||
#endif /* UNICODE_TABLEISO8859_16_H */
|
||||
Executable
+21
@@ -0,0 +1,21 @@
|
||||
Microsoft Visual Studio Solution File, Format Version 7.00
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfiguration) = preSolution
|
||||
ConfigName.0 = Debug
|
||||
ConfigName.1 = Release
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectDependencies) = postSolution
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfiguration) = postSolution
|
||||
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
|
||||
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
|
||||
{B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
|
||||
{B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityAddIns) = postSolution
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
Executable
+650
@@ -0,0 +1,650 @@
|
||||
<?xml version="1.0" encoding = "shift_jis"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="7.00"
|
||||
Name="libmbfl"
|
||||
ProjectGUID="{B3636594-A785-4270-A765-8EAE922B5207}"
|
||||
SccProjectName=""
|
||||
SccLocalPath="">
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"/>
|
||||
</Platforms>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory=".\Debug"
|
||||
IntermediateDirectory=".\Debug"
|
||||
ConfigurationType="2"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="mbfl,."
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;MBFL_DLL_EXPORT;HAVE_CONFIG_H=1"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="1"
|
||||
UsePrecompiledHeader="2"
|
||||
PrecompiledHeaderFile=".\Debug/mbfl.pch"
|
||||
AssemblerListingLocation=".\Debug/"
|
||||
ObjectFile=".\Debug/"
|
||||
ProgramDataBaseFileName=".\Debug/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
DebugInformationFormat="4"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="odbc32.lib odbccp32.lib"
|
||||
OutputFile=".\Debug/mbfl.dll"
|
||||
LinkIncremental="2"
|
||||
SuppressStartupBanner="TRUE"
|
||||
ModuleDefinitionFile=""
|
||||
GenerateDebugInformation="TRUE"
|
||||
ProgramDatabaseFile=".\Debug/mbfl.pdb"
|
||||
ImportLibrary=".\Debug/mbfl.lib"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
MkTypLibCompatible="TRUE"
|
||||
SuppressStartupBanner="TRUE"
|
||||
TargetEnvironment="1"
|
||||
TypeLibraryName=".\Debug/mbfl.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1041"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory=".\Release"
|
||||
IntermediateDirectory=".\Release"
|
||||
ConfigurationType="2"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
InlineFunctionExpansion="1"
|
||||
AdditionalIncludeDirectories="mbfl,."
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;HAVE_CONFIG_H"
|
||||
StringPooling="TRUE"
|
||||
RuntimeLibrary="0"
|
||||
EnableFunctionLevelLinking="TRUE"
|
||||
UsePrecompiledHeader="2"
|
||||
PrecompiledHeaderFile=".\Release/mbfl.pch"
|
||||
AssemblerListingLocation=".\Release/"
|
||||
ObjectFile=".\Release/"
|
||||
ProgramDataBaseFileName=".\Release/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
AdditionalOptions="/MACHINE:I386"
|
||||
AdditionalDependencies="odbc32.lib odbccp32.lib"
|
||||
OutputFile=".\Release/mbfl.dll"
|
||||
LinkIncremental="1"
|
||||
SuppressStartupBanner="TRUE"
|
||||
ModuleDefinitionFile=""
|
||||
ProgramDatabaseFile=".\Release/mbfl.pdb"
|
||||
ImportLibrary=".\Release/mbfl.lib"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
MkTypLibCompatible="TRUE"
|
||||
SuppressStartupBanner="TRUE"
|
||||
TargetEnvironment="1"
|
||||
TypeLibraryName=".\Release/mbfl.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="vc6">
|
||||
<File
|
||||
RelativePath=".\filters\html_entities.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_7bit.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_8bit.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ascii.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_base64.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_big5.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_byte2.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_byte4.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp1251.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp1252.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp866.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp932.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp936.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_cn.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_jp.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_jp_win.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_kr.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_tw.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_htmlent.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_hz.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso2022_kr.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_1.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_10.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_13.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_14.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_15.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_16.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_2.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_3.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_4.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_5.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_6.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_7.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_8.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_9.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_jis.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_koi8r.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_pass.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_qprint.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_sjis.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ucs2.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ucs4.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_uhc.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf16.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf32.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf7.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf7imap.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf8.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_uuencode.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_wchar.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_allocators.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_convert.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_encoding.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_filter_output.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_ident.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_language.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_memory_device.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_string.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_de.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_en.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_ja.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_kr.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_neutral.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_ru.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_uni.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_zh.c">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl">
|
||||
<File
|
||||
RelativePath=".\config.h.vc6">
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32">
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h"
|
||||
"
|
||||
Outputs="$(InputDir)\config.h"/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32">
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h"
|
||||
"
|
||||
Outputs="$(InputDir)\config.h"/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\cp932_table.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\html_entities.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_7bit.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_8bit.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ascii.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_base64.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_big5.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_byte2.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_byte4.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp1251.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp1252.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp866.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp932.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_cp936.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_cn.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_jp.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_jp_win.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_kr.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_euc_tw.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_htmlent.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_hz.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso2022_kr.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_1.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_10.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_13.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_14.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_15.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_16.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_2.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_3.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_4.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_5.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_6.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_7.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_8.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_iso8859_9.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_jis.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_koi8r.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_pass.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_qprint.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_sjis.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ucs2.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_ucs4.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_uhc.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf16.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf32.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf7.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf7imap.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_utf8.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\mbfilter_uuencode.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfilter_wchar.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_allocators.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_consts.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_convert.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_encoding.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_filter_output.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_ident.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_language.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_memory_device.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbfl\mbfl_string.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_de.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_en.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_ja.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_kr.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_neutral.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_ru.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_uni.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\nls\nls_zh.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_prop.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_big5.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cns11643.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cp1251.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cp1252.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cp866.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cp932_ext.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_cp936.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_10.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_13.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_14.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_15.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_16.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_2.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_3.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_4.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_5.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_6.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_7.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_8.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_iso8859_9.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_jis.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_koi8r.h">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filters\unicode_table_uhc.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
|
||||
<File
|
||||
RelativePath=".\mbfl.rc">
|
||||
</File>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
||||
@@ -0,0 +1,177 @@
|
||||
README.ja 2005/02/04
|
||||
|
||||
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
|
||||
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
|
||||
|
||||
鬼車は正規表現ライブラリである。
|
||||
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
|
||||
文字エンコーディングを指定できることである。
|
||||
|
||||
サポートしている文字エンコーディング:
|
||||
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
|
||||
Shift_JIS, Big5, KOI8-R, KOI8 (*),
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
|
||||
|
||||
* KOI8はデフォルトのセットアップではライブラリの中に含まれない。
|
||||
(必要であればMakefileを編集すること)
|
||||
------------------------------------------------------------
|
||||
|
||||
インストール
|
||||
|
||||
ケース1: UnixとCygwin環境
|
||||
|
||||
1. ./configure
|
||||
2. make
|
||||
3. make install
|
||||
|
||||
ライブラリファイル: libonig.a
|
||||
|
||||
動作テスト (ASCII/EUC-JP)
|
||||
|
||||
make ctest
|
||||
|
||||
アンインストール
|
||||
|
||||
make uninstall
|
||||
|
||||
|
||||
|
||||
ケース2: Win32(VC++)環境
|
||||
|
||||
1. copy win32\Makefile Makefile
|
||||
2. copy win32\config.h config.h
|
||||
3. nmake
|
||||
|
||||
onig_s.lib: static link library
|
||||
onig.dll: dynamic link library
|
||||
|
||||
* 動作テスト (ASCII/Shift_JIS)
|
||||
4. copy win32\testc.c testc.c
|
||||
5. nmake ctest
|
||||
|
||||
|
||||
ライセンス
|
||||
|
||||
このソフトウェアがRubyと一緒に使用または配布される場合には、
|
||||
Rubyのライセンスに従う。
|
||||
それ以外の場合には、BSDライセンスに従う。
|
||||
|
||||
|
||||
正規表現
|
||||
|
||||
doc/RE.jaを参照
|
||||
|
||||
|
||||
使用方法
|
||||
|
||||
使用するプログラムで、oniguruma.hをインクルードする(Native APIの場合)。
|
||||
Native APIについては、doc/API.jaを参照。
|
||||
|
||||
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
|
||||
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
|
||||
|
||||
|
||||
使用例プログラム
|
||||
|
||||
sample/simple.c 最小例 (native API)
|
||||
sample/names.c 名前付きグループコールバック使用例
|
||||
sample/encode.c 幾つかの文字エンコーディング使用例
|
||||
sample/listcap.c 捕獲履歴機能の使用例
|
||||
sample/posix.c POSIX API使用例
|
||||
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
|
||||
sample/syntax.c PerlとJava文法のテスト
|
||||
|
||||
|
||||
ソースファイル
|
||||
|
||||
oniguruma.h 鬼車APIヘッダ (公開)
|
||||
|
||||
regenc.h 文字エンコーディング枠組みヘッダ
|
||||
regint.h 内部宣言
|
||||
regparse.h regparse.cとregcomp.cのための内部宣言
|
||||
regcomp.c コンパイル、最適化関数
|
||||
regenc.c 文字エンコーディング枠組み
|
||||
regerror.c エラーメッセージ関数
|
||||
regext.c 拡張API関数
|
||||
regexec.c 検索、照合関数
|
||||
regparse.c 正規表現パターン解析関数
|
||||
regsyntax.c 正規表現パターン文法関数、組込み文法定義
|
||||
regtrav.c 捕獲履歴木巡回関数
|
||||
regversion.c 版情報関数
|
||||
st.h ハッシュテーブル関数宣言
|
||||
st.c ハッシュテーブル関数
|
||||
|
||||
oniggnu.h GNU regex APIヘッダ (公開)
|
||||
reggnu.c GNU regex API関数
|
||||
|
||||
onigposix.h POSIX APIヘッダ (公開)
|
||||
regposerr.c POSIX APIエラーメッセージ関数
|
||||
regposix.c POSIX API関数
|
||||
|
||||
enc/mktable.c 文字タイプテーブル生成プログラム
|
||||
enc/ascii.c ASCII エンコーディング
|
||||
enc/euc_jp.c EUC-JP エンコーディング
|
||||
enc/euc_tw.c EUC-TW エンコーディング
|
||||
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
|
||||
enc/sjis.c Shift_JIS エンコーディング
|
||||
enc/big5.c Big5 エンコーディング
|
||||
enc/koi8.c KOI8 エンコーディング
|
||||
enc/koi8_r.c KOI8-R エンコーディング
|
||||
enc/iso8859_1.c ISO-8859-1 (Latin-1)
|
||||
enc/iso8859_2.c ISO-8859-2 (Latin-2)
|
||||
enc/iso8859_3.c ISO-8859-3 (Latin-3)
|
||||
enc/iso8859_4.c ISO-8859-4 (Latin-4)
|
||||
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
|
||||
enc/iso8859_6.c ISO-8859-6 (Arabic)
|
||||
enc/iso8859_7.c ISO-8859-7 (Greek)
|
||||
enc/iso8859_8.c ISO-8859-8 (Hebrew)
|
||||
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
|
||||
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
|
||||
enc/iso8859_11.c ISO-8859-11 (Thai)
|
||||
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
|
||||
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
|
||||
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
|
||||
enc/iso8859_16.c ISO-8859-16
|
||||
(Latin-10 または South-Eastern European with Euro)
|
||||
enc/utf8.c UTF-8 エンコーディング
|
||||
enc/utf16_be.c UTF-16BE エンコーディング
|
||||
enc/utf16_le.c UTF-16LE エンコーディング
|
||||
enc/utf32_be.c UTF-32BE エンコーディング
|
||||
enc/utf32_le.c UTF-32LE エンコーディング
|
||||
enc/unicode.c Unicode情報
|
||||
|
||||
win32/Makefile Win32用 Makefile (for VC++)
|
||||
win32/config.h Win32用 config.h
|
||||
|
||||
|
||||
|
||||
Ruby 1.8/1.6の日本語化GNU regexとのAPIの違い
|
||||
|
||||
+ re_compile_fastmap() は削除された。
|
||||
+ re_recompile_pattern() が追加された。
|
||||
+ re_alloc_pattern() が追加された。
|
||||
|
||||
|
||||
残件
|
||||
|
||||
? Unicode全コードポイント領域での大文字小文字照合
|
||||
? Unicodeプロパティ
|
||||
? ambig-flag Katakana <-> Hiragana
|
||||
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
|
||||
? ONIG_SYNTAX_ASIS追加
|
||||
?? \X (== \PM\pM*)
|
||||
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
|
||||
?? 改行文字(文字列)を変更できる
|
||||
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
File diff suppressed because it is too large
Load Diff
Executable
+253
@@ -0,0 +1,253 @@
|
||||
/**********************************************************************
|
||||
utf16_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
|
||||
|
||||
static int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static int
|
||||
utf16be_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_UTF16[*p];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
if (*(p+1) == 0x0a && *p == 0x00)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16be_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(*p)) {
|
||||
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
|
||||
+ p[3];
|
||||
}
|
||||
else {
|
||||
code = p[0] * 256 + p[1];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return (code > 0xffff ? 4 : 2);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
if (code > 0xffff) {
|
||||
unsigned int plane, high;
|
||||
|
||||
plane = code >> 16;
|
||||
*p++ = (plane >> 2) + 0xd8;
|
||||
high = (code & 0xff00) >> 8;
|
||||
*p++ = ((plane & 0x03) << 6) + (high >> 2);
|
||||
*p++ = (high & 0x02) + 0xdc;
|
||||
*p = (UChar )(code & 0xff);
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*p == 0) {
|
||||
p++;
|
||||
if (end > p + 2 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+1) == 0) {
|
||||
*lower++ = '\0';
|
||||
*lower = 0xdf;
|
||||
(*pp) += 4;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*lower++ = '\0';
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
|
||||
}
|
||||
else {
|
||||
*lower = *p;
|
||||
}
|
||||
|
||||
(*pp) += 2;
|
||||
return 2; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
int len;
|
||||
len = EncLen_UTF16[*p];
|
||||
if (lower != p) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += EncLen_UTF16[*p];
|
||||
|
||||
if (*p == 0) {
|
||||
int c, v;
|
||||
|
||||
p++;
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 2 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+1) == 0) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
|
||||
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
|
||||
|
||||
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_BE = {
|
||||
utf16be_mbc_enc_len,
|
||||
"UTF-16BE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
},
|
||||
utf16be_is_mbc_newline,
|
||||
utf16be_mbc_to_code,
|
||||
utf16be_code_to_mbclen,
|
||||
utf16be_code_to_mbc,
|
||||
utf16be_mbc_to_normalize,
|
||||
utf16be_is_mbc_ambiguous,
|
||||
onigenc_iso_8859_1_get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_unicode_get_ctype_code_range,
|
||||
utf16be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match
|
||||
};
|
||||
Executable
+248
@@ -0,0 +1,248 @@
|
||||
/**********************************************************************
|
||||
utf16_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
|
||||
|
||||
static int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
|
||||
static int
|
||||
utf16le_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return (code > 0xffff ? 4 : 2);
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return EncLen_UTF16[*(p+1)];
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 1 < end) {
|
||||
if (*p == 0x0a && *(p+1) == 0x00)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf16le_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
UChar c0 = *p;
|
||||
UChar c1 = *(p+1);
|
||||
|
||||
if (UTF16_IS_SURROGATE_FIRST(c1)) {
|
||||
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
|
||||
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
|
||||
+ p[2];
|
||||
}
|
||||
else {
|
||||
code = c1 * 256 + p[0];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
if (code > 0xffff) {
|
||||
unsigned int plane, high;
|
||||
|
||||
plane = code >> 16;
|
||||
high = (code & 0xff00) >> 8;
|
||||
|
||||
*p++ = ((plane & 0x03) << 6) + (high >> 2);
|
||||
*p++ = (plane >> 2) + 0xd8;
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
*p = (high & 0x02) + 0xdc;
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*(p+1) == 0) {
|
||||
if (end > p + 3 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+3) == 0) {
|
||||
*lower++ = 0xdf;
|
||||
*lower = '\0';
|
||||
(*pp) += 4;
|
||||
return 2;
|
||||
}
|
||||
|
||||
*(lower+1) = '\0';
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
|
||||
}
|
||||
else {
|
||||
*lower = *p;
|
||||
}
|
||||
(*pp) += 2;
|
||||
return 2; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
int len = EncLen_UTF16[*(p+1)];
|
||||
if (lower != p) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += EncLen_UTF16[*(p+1)];
|
||||
|
||||
if (*(p+1) == 0) {
|
||||
int c, v;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 3 &&
|
||||
((*p == 's' && *(p+2) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+2) == 'S'))) &&
|
||||
*(p+3) == 0) {
|
||||
(*pp) += 2;
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
|
||||
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
|
||||
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
if ((s - start) % 2 == 1) {
|
||||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
|
||||
s -= 2;
|
||||
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
utf16le_mbc_enc_len,
|
||||
"UTF-16LE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
},
|
||||
utf16le_is_mbc_newline,
|
||||
utf16le_mbc_to_code,
|
||||
utf16le_code_to_mbclen,
|
||||
utf16le_code_to_mbc,
|
||||
utf16le_mbc_to_normalize,
|
||||
utf16le_is_mbc_ambiguous,
|
||||
onigenc_iso_8859_1_get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_unicode_get_ctype_code_range,
|
||||
utf16le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match
|
||||
};
|
||||
Executable
+208
@@ -0,0 +1,208 @@
|
||||
/**********************************************************************
|
||||
utf32_be.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static int
|
||||
utf32be_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32be_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
*p++ = (UChar )((code & 0xff000000) >>24);
|
||||
*p++ = (UChar )((code & 0xff0000) >>16);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar ) (code & 0xff);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
|
||||
p += 3;
|
||||
if (end > p + 4 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower = 0xdf;
|
||||
(*pp) += 8;
|
||||
return 4;
|
||||
}
|
||||
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
|
||||
}
|
||||
else {
|
||||
*lower = *p;
|
||||
}
|
||||
|
||||
(*pp) += 4;
|
||||
return 4; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
int len = 4;
|
||||
if (lower != p) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += 4;
|
||||
|
||||
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
|
||||
int c, v;
|
||||
|
||||
p += 3;
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 4 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
|
||||
(*pp) += 4;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
|
||||
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
|
||||
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
rem = (s - start) % 4;
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_BE = {
|
||||
utf32be_mbc_enc_len,
|
||||
"UTF-32BE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
},
|
||||
utf32be_is_mbc_newline,
|
||||
utf32be_mbc_to_code,
|
||||
utf32be_code_to_mbclen,
|
||||
utf32be_code_to_mbc,
|
||||
utf32be_mbc_to_normalize,
|
||||
utf32be_is_mbc_ambiguous,
|
||||
onigenc_iso_8859_1_get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_unicode_get_ctype_code_range,
|
||||
utf32be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match
|
||||
};
|
||||
Executable
+206
@@ -0,0 +1,206 @@
|
||||
/**********************************************************************
|
||||
utf32_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
static int
|
||||
utf32le_mbc_enc_len(const UChar* p)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
if (p + 3 < end) {
|
||||
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OnigCodePoint
|
||||
utf32le_mbc_to_code(const UChar* p, const UChar* end)
|
||||
{
|
||||
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
||||
{
|
||||
UChar* p = buf;
|
||||
|
||||
*p++ = (UChar ) (code & 0xff);
|
||||
*p++ = (UChar )((code & 0xff00) >> 8);
|
||||
*p++ = (UChar )((code & 0xff0000) >>16);
|
||||
*p++ = (UChar )((code & 0xff000000) >>24);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
|
||||
UChar* lower)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
if (end > p + 7 &&
|
||||
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
|
||||
*lower++ = 0xdf;
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower = '\0';
|
||||
(*pp) += 8;
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
*lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
|
||||
}
|
||||
else {
|
||||
*lower++ = *p;
|
||||
}
|
||||
*lower++ = '\0';
|
||||
*lower++ = '\0';
|
||||
*lower = '\0';
|
||||
|
||||
(*pp) += 4;
|
||||
return 4; /* return byte length of converted char to lower */
|
||||
}
|
||||
else {
|
||||
int len = 4;
|
||||
if (lower != p) {
|
||||
int i;
|
||||
for (i = 0; i < len; i++) {
|
||||
*lower++ = *p++;
|
||||
}
|
||||
}
|
||||
(*pp) += len;
|
||||
return len; /* return byte length of converted char to lower */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
(*pp) += 4;
|
||||
|
||||
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
|
||||
int c, v;
|
||||
|
||||
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
|
||||
if (end > p + 7 &&
|
||||
((*p == 's' && *(p+4) == 's') ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
(*p == 'S' && *(p+4) == 'S'))) &&
|
||||
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
|
||||
(*pp) += 4;
|
||||
return TRUE;
|
||||
}
|
||||
else if (*p == 0xdf) {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
|
||||
ONIGENC_IS_MBC_ASCII(p)) ||
|
||||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
|
||||
!ONIGENC_IS_MBC_ASCII(p))) {
|
||||
c = *p;
|
||||
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
|
||||
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
|
||||
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
|
||||
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
|
||||
if (c >= 0xaa && c <= 0xba)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
return (v != 0 ? TRUE : FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static UChar*
|
||||
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
|
||||
{
|
||||
int rem;
|
||||
|
||||
if (s <= start) return (UChar* )s;
|
||||
|
||||
rem = (s - start) % 4;
|
||||
return (UChar* )(s - rem);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF32_LE = {
|
||||
utf32le_mbc_enc_len,
|
||||
"UTF-32LE", /* name */
|
||||
4, /* max byte length */
|
||||
4, /* min byte length */
|
||||
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
|
||||
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
|
||||
{
|
||||
(OnigCodePoint )'\\' /* esc */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
|
||||
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
|
||||
},
|
||||
utf32le_is_mbc_newline,
|
||||
utf32le_mbc_to_code,
|
||||
utf32le_code_to_mbclen,
|
||||
utf32le_code_to_mbc,
|
||||
utf32le_mbc_to_normalize,
|
||||
utf32le_is_mbc_ambiguous,
|
||||
onigenc_iso_8859_1_get_all_pair_ambig_codes,
|
||||
onigenc_ess_tsett_get_all_comp_ambig_codes,
|
||||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_unicode_get_ctype_code_range,
|
||||
utf32le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match
|
||||
};
|
||||
Executable
+173
@@ -0,0 +1,173 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
|
||||
<title>Oniguruma</title>
|
||||
</head>
|
||||
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
|
||||
|
||||
<h2>Oniguruma</h2>
|
||||
<p>
|
||||
2005/02/19 (C) K.Kosako
|
||||
</p>
|
||||
<p>
|
||||
<a href="http://miuras.net/matsushita.html">
|
||||
<img src="anti_matsushita.PNG" height="46" width="266">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Oniguruma is a regular expressions library.<br>
|
||||
The characteristics of this library is that different character encoding
|
||||
<br>for every regular expression object can be specified.
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt><b>Supported character encodings:</b><br>
|
||||
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
|
||||
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
|
||||
Shift_JIS, Big5, KOI8-R, KOI8,<br>
|
||||
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
|
||||
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
|
||||
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
|
||||
</p>
|
||||
</dl>
|
||||
<p>
|
||||
|
||||
<dl>
|
||||
<font color="orange">
|
||||
<dt><b>What's new</b>
|
||||
</font>
|
||||
<ul>
|
||||
<li>Character types(\w, \s, \d and POSIX bracket) were supported in full code point range with the Version 4.0.1 of the <a href="http://www.unicode.org/ucd/">Unicode Standard</a>. (since Version 3.5.0)
|
||||
</ul>
|
||||
</dl>
|
||||
|
||||
<hr>
|
||||
|
||||
<dl>
|
||||
<dt>There are two ways of using of it in this program.
|
||||
<ul>
|
||||
<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
|
||||
<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br>
|
||||
In Ruby 1.9, Oniguruma is already incorporated by Kazuo Saito.
|
||||
</ul>
|
||||
</dl>
|
||||
|
||||
<dl>
|
||||
<dt><b>Platform:</b>
|
||||
<ul>
|
||||
<li> Unix (include Mac OS X)
|
||||
<li> Cygwin
|
||||
<li> Win32
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>License:</b><br>
|
||||
When this software is partly used or it is distributed with Ruby,
|
||||
this of Ruby follows the license of Ruby.<br>
|
||||
It follows the BSD license in the case of the one except for it.
|
||||
</p>
|
||||
|
||||
<dt><b>Download:</b>
|
||||
<ul>
|
||||
<li> <a href="archive/onigd20050219.tar.gz">Latest release version 3.7.0</a> (2005/02/19) <a href="HISTORY_3X.txt">Change Log</a>
|
||||
<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04)
|
||||
<li> <a href="archive/onigd20050119.tar.gz">3.5.4</a> (2005/01/19)
|
||||
<li> <a href="archive/onigd2_4_1.tar.gz">Latest release version 2.4.1</a> (2005/01/05) <a href="HISTORY_2X.txt">Change Log</a>
|
||||
<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01)
|
||||
<li> <a href="archive/onigd2_3_3.tar.gz">2.3.3</a> (2004/10/30)
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<font color="red">
|
||||
* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
|
||||
* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
|
||||
</font>
|
||||
|
||||
<br>
|
||||
<br>
|
||||
<dt><b>Documents:</b> (version 3.7.0)
|
||||
<ul>
|
||||
<li> <a href="doc/RE.txt">Regular Expressions</a>
|
||||
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
|
||||
<li> <a href="doc/API.txt">Oniguruma API</a>
|
||||
<a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Sample Programs:</b>
|
||||
<ul>
|
||||
<li><a href="sample/simple.c">example of the minimum</a>
|
||||
<li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>Links:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
|
||||
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
|
||||
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
|
||||
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page)
|
||||
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
|
||||
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a>
|
||||
<li> <a href="http://www.artman21.net/">Jedit X</a>
|
||||
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
|
||||
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
|
||||
<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page)
|
||||
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
|
||||
<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page)
|
||||
<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
|
||||
<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
|
||||
<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
|
||||
<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
|
||||
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
|
||||
<li> <a href="http://www.ruby-lang.org/">Ruby</a>
|
||||
<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page)
|
||||
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<dt><b>References:</b>
|
||||
<ul>
|
||||
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
|
||||
<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
|
||||
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
|
||||
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
|
||||
<li> <a href="http://www.pcre.org/">PCRE</a>
|
||||
<!--
|
||||
<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page)
|
||||
<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page)
|
||||
-->
|
||||
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
|
||||
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
|
||||
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
|
||||
</ul>
|
||||
|
||||
<br>
|
||||
<!--
|
||||
<dt><b>ToDo:</b>
|
||||
<ul>
|
||||
<li> support character types for all code point range.
|
||||
</ul>
|
||||
-->
|
||||
</dl>
|
||||
<p>
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
</p>
|
||||
|
||||
<!--
|
||||
<hr>
|
||||
<font color="red">
|
||||
2004-06-14<br>
|
||||
To: "Greg A. Woods"<br>
|
||||
I can't send mail to you. (rejected)<br>
|
||||
Please set the nmatch argument of regexec() to 1,
|
||||
and use Oniguruma 3.6.0 or 2.4.1.<br>
|
||||
The nmatch argument should be array size of a pmatch.<br>
|
||||
But I don't know whether this problem is related to the crash
|
||||
that you reported.
|
||||
</font>
|
||||
-->
|
||||
<hr>
|
||||
</body>
|
||||
</html>
|
||||
Executable
+213
@@ -0,0 +1,213 @@
|
||||
/**********************************************************************
|
||||
regext.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
static void
|
||||
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = *s++;
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
*conv++ = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = '\0';
|
||||
*conv++ = *s++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = *s++;
|
||||
*conv++ = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = s[3];
|
||||
*conv++ = s[2];
|
||||
*conv++ = s[1];
|
||||
*conv++ = s[0];
|
||||
s += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
|
||||
{
|
||||
while (s < end) {
|
||||
*conv++ = s[1];
|
||||
*conv++ = s[0];
|
||||
s += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
|
||||
UChar** conv, UChar** conv_end)
|
||||
{
|
||||
int len = end - s;
|
||||
|
||||
if (to == ONIG_ENCODING_UTF16_BE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 2);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + (len * 2);
|
||||
conv_ext0be(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF16_LE) {
|
||||
swap16:
|
||||
*conv = (UChar* )xmalloc(len);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + len;
|
||||
conv_swap2bytes(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (to == ONIG_ENCODING_UTF16_LE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 2);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + (len * 2);
|
||||
conv_ext0le(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF16_BE) {
|
||||
goto swap16;
|
||||
}
|
||||
}
|
||||
if (to == ONIG_ENCODING_UTF32_BE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 4);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + (len * 4);
|
||||
conv_ext0be32(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF32_LE) {
|
||||
swap32:
|
||||
*conv = (UChar* )xmalloc(len);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + len;
|
||||
conv_swap4bytes(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else if (to == ONIG_ENCODING_UTF32_LE) {
|
||||
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
|
||||
*conv = (UChar* )xmalloc(len * 4);
|
||||
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
|
||||
*conv_end = *conv + (len * 4);
|
||||
conv_ext0le32(s, end, *conv);
|
||||
return 0;
|
||||
}
|
||||
else if (from == ONIG_ENCODING_UTF32_BE) {
|
||||
goto swap32;
|
||||
}
|
||||
}
|
||||
|
||||
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
{
|
||||
int r;
|
||||
UChar *cpat, *cpat_end;
|
||||
|
||||
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
||||
|
||||
if (ci->pattern_enc != ci->target_enc) {
|
||||
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
|
||||
&cpat, &cpat_end);
|
||||
if (r) return r;
|
||||
}
|
||||
else {
|
||||
cpat = (UChar* )pattern;
|
||||
cpat_end = (UChar* )pattern_end;
|
||||
}
|
||||
|
||||
r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
|
||||
ci->syntax);
|
||||
if (r) goto err;
|
||||
|
||||
r = onig_compile(*reg, cpat, cpat_end, einfo);
|
||||
if (r) {
|
||||
onig_free(*reg);
|
||||
*reg = NULL;
|
||||
}
|
||||
|
||||
err:
|
||||
if (cpat != pattern) xfree(cpat);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
||||
OnigCompileInfo* ci, OnigErrorInfo* einfo)
|
||||
{
|
||||
int r;
|
||||
regex_t *new_reg;
|
||||
|
||||
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
|
||||
if (r) return r;
|
||||
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
||||
onig_transfer(reg, new_reg);
|
||||
}
|
||||
else {
|
||||
onig_chain_link_add(reg, new_reg);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,207 @@
|
||||
/**********************************************************************
|
||||
regsyntax.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
OnigSyntaxType OnigSyntaxPosixBasic = {
|
||||
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
|
||||
, 0
|
||||
, 0
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPosixExtended = {
|
||||
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_BRACE_INTERVAL |
|
||||
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
|
||||
, 0
|
||||
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
|
||||
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
|
||||
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
|
||||
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxEmacs = {
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
|
||||
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
|
||||
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
|
||||
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
|
||||
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
|
||||
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
|
||||
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
|
||||
, ONIG_OPTION_NONE
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxGrep = {
|
||||
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
|
||||
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
|
||||
ONIG_SYN_OP_ESC_VBAR_ALT |
|
||||
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
|
||||
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
|
||||
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
|
||||
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
|
||||
, 0
|
||||
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
|
||||
, ONIG_OPTION_NONE
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxGnuRegex = {
|
||||
SYN_GNU_REGEX_OP
|
||||
, 0
|
||||
, SYN_GNU_REGEX_BV
|
||||
, ONIG_OPTION_NONE
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxJava = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
|
||||
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
|
||||
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
|
||||
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
};
|
||||
|
||||
OnigSyntaxType OnigSyntaxPerl = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
|
||||
, SYN_GNU_REGEX_BV
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
};
|
||||
|
||||
|
||||
extern int
|
||||
onig_set_default_syntax(OnigSyntaxType* syntax)
|
||||
{
|
||||
if (IS_NULL(syntax))
|
||||
syntax = ONIG_SYNTAX_RUBY;
|
||||
|
||||
OnigDefaultSyntax = syntax;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
|
||||
{
|
||||
*to = *from;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
|
||||
{
|
||||
syntax->op = op;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
|
||||
{
|
||||
syntax->op2 = op2;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
|
||||
{
|
||||
syntax->behavior = behavior;
|
||||
}
|
||||
|
||||
extern void
|
||||
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
|
||||
{
|
||||
syntax->options = options;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op2(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op2;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_behavior(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->behavior;
|
||||
}
|
||||
|
||||
extern OnigOptionType
|
||||
onig_get_syntax_options(OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->options;
|
||||
}
|
||||
|
||||
#ifdef USE_VARIABLE_META_CHARS
|
||||
extern int onig_set_meta_char(OnigEncoding enc,
|
||||
unsigned int what, OnigCodePoint code)
|
||||
{
|
||||
switch (what) {
|
||||
case ONIG_META_CHAR_ESCAPE:
|
||||
enc->meta_char_table.esc = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYCHAR:
|
||||
enc->meta_char_table.anychar = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYTIME:
|
||||
enc->meta_char_table.anytime = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
|
||||
enc->meta_char_table.zero_or_one_time = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
|
||||
enc->meta_char_table.one_or_more_time = code;
|
||||
break;
|
||||
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
|
||||
enc->meta_char_table.anychar_anytime = code;
|
||||
break;
|
||||
default:
|
||||
return ONIGERR_INVALID_ARGUMENT;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_VARIABLE_META_CHARS */
|
||||
@@ -0,0 +1,76 @@
|
||||
/**********************************************************************
|
||||
regtrav.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
|
||||
static int
|
||||
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
|
||||
int(*callback_func)(int,int,int,int,int,void*),
|
||||
int level, void* arg)
|
||||
{
|
||||
int r, i;
|
||||
|
||||
if (node == (OnigCaptureTreeNode* )0)
|
||||
return 0;
|
||||
|
||||
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
|
||||
r = (*callback_func)(node->group, node->beg, node->end,
|
||||
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < node->num_childs; i++) {
|
||||
r = capture_tree_traverse(node->childs[i], at,
|
||||
callback_func, level + 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
|
||||
r = (*callback_func)(node->group, node->beg, node->end,
|
||||
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_CAPTURE_HISTORY */
|
||||
|
||||
extern int
|
||||
onig_capture_tree_traverse(OnigRegion* region, int at,
|
||||
int(*callback_func)(int,int,int,int,int,void*), void* arg)
|
||||
{
|
||||
#ifdef USE_CAPTURE_HISTORY
|
||||
return capture_tree_traverse(region->history_root, at,
|
||||
callback_func, 0, arg);
|
||||
#else
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
/**********************************************************************
|
||||
regversion.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "oniguruma.h"
|
||||
#include <stdio.h>
|
||||
|
||||
extern const char*
|
||||
onig_version(void)
|
||||
{
|
||||
static char s[12];
|
||||
|
||||
sprintf(s, "%d.%d.%d",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
return s;
|
||||
}
|
||||
|
||||
extern const char*
|
||||
onig_copyright(void)
|
||||
{
|
||||
static char s[58];
|
||||
|
||||
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
return s;
|
||||
}
|
||||
@@ -0,0 +1,717 @@
|
||||
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||||
|
||||
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
|
||||
|
||||
#include "config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef NOT_RUBY
|
||||
#include "regint.h"
|
||||
#else
|
||||
#ifdef RUBY_PLATFORM
|
||||
#define xmalloc ruby_xmalloc
|
||||
#define xcalloc ruby_xcalloc
|
||||
#define xrealloc ruby_xrealloc
|
||||
#define xfree ruby_xfree
|
||||
|
||||
void *xmalloc(long);
|
||||
void *xcalloc(long, long);
|
||||
void *xrealloc(void *, long);
|
||||
void xfree(void *);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "st.h"
|
||||
|
||||
typedef struct st_table_entry st_table_entry;
|
||||
|
||||
struct st_table_entry {
|
||||
unsigned int hash;
|
||||
st_data_t key;
|
||||
st_data_t record;
|
||||
st_table_entry *next;
|
||||
};
|
||||
|
||||
#define ST_DEFAULT_MAX_DENSITY 5
|
||||
#define ST_DEFAULT_INIT_TABLE_SIZE 11
|
||||
|
||||
/*
|
||||
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
|
||||
* average number of items per bin before increasing the number of
|
||||
* bins
|
||||
*
|
||||
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
|
||||
* allocated initially
|
||||
*
|
||||
*/
|
||||
|
||||
static int numcmp(long, long);
|
||||
static int numhash(long);
|
||||
static struct st_hash_type type_numhash = {
|
||||
numcmp,
|
||||
numhash,
|
||||
st_nothing_key_free,
|
||||
st_nothing_key_clone
|
||||
};
|
||||
|
||||
/* extern int strcmp(const char *, const char *); */
|
||||
static int strhash(const char *);
|
||||
static struct st_hash_type type_strhash = {
|
||||
strcmp,
|
||||
strhash,
|
||||
st_nothing_key_free,
|
||||
st_nothing_key_clone
|
||||
};
|
||||
|
||||
static int strend_cmp(st_strend_key*, st_strend_key*);
|
||||
static int strend_hash(st_strend_key*);
|
||||
static int strend_key_free(st_data_t key);
|
||||
static st_data_t strend_key_clone(st_data_t x);
|
||||
|
||||
static struct st_hash_type type_strend_hash = {
|
||||
strend_cmp,
|
||||
strend_hash,
|
||||
strend_key_free,
|
||||
strend_key_clone
|
||||
};
|
||||
|
||||
static void rehash(st_table *);
|
||||
|
||||
#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
|
||||
#define Calloc(n,s) (char*)xcalloc((n),(s))
|
||||
|
||||
#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
|
||||
|
||||
#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
|
||||
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
|
||||
|
||||
/*
|
||||
* MINSIZE is the minimum size of a dictionary.
|
||||
*/
|
||||
|
||||
#define MINSIZE 8
|
||||
|
||||
/*
|
||||
Table of prime numbers 2^n+a, 2<=n<=30.
|
||||
*/
|
||||
static long primes[] = {
|
||||
8 + 3,
|
||||
16 + 3,
|
||||
32 + 5,
|
||||
64 + 3,
|
||||
128 + 3,
|
||||
256 + 27,
|
||||
512 + 9,
|
||||
1024 + 9,
|
||||
2048 + 5,
|
||||
4096 + 3,
|
||||
8192 + 27,
|
||||
16384 + 43,
|
||||
32768 + 3,
|
||||
65536 + 45,
|
||||
131072 + 29,
|
||||
262144 + 3,
|
||||
524288 + 21,
|
||||
1048576 + 7,
|
||||
2097152 + 17,
|
||||
4194304 + 15,
|
||||
8388608 + 9,
|
||||
16777216 + 43,
|
||||
33554432 + 35,
|
||||
67108864 + 15,
|
||||
134217728 + 29,
|
||||
268435456 + 3,
|
||||
536870912 + 11,
|
||||
1073741824 + 85,
|
||||
0
|
||||
};
|
||||
|
||||
static int
|
||||
new_size(size)
|
||||
int size;
|
||||
{
|
||||
int i;
|
||||
|
||||
#if 0
|
||||
for (i=3; i<31; i++) {
|
||||
if ((1<<i) > size) return 1<<i;
|
||||
}
|
||||
return -1;
|
||||
#else
|
||||
int newsize;
|
||||
|
||||
for (i = 0, newsize = MINSIZE;
|
||||
i < (int )(sizeof(primes)/sizeof(primes[0]));
|
||||
i++, newsize <<= 1)
|
||||
{
|
||||
if (newsize > size) return primes[i];
|
||||
}
|
||||
/* Ran out of polynomials */
|
||||
return -1; /* should raise exception */
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HASH_LOG
|
||||
static int collision = 0;
|
||||
static int init_st = 0;
|
||||
|
||||
static void
|
||||
stat_col()
|
||||
{
|
||||
FILE *f = fopen("/tmp/col", "w");
|
||||
fprintf(f, "collision: %d\n", collision);
|
||||
fclose(f);
|
||||
}
|
||||
#endif
|
||||
|
||||
st_table*
|
||||
st_init_table_with_size(type, size)
|
||||
struct st_hash_type *type;
|
||||
int size;
|
||||
{
|
||||
st_table *tbl;
|
||||
|
||||
#ifdef HASH_LOG
|
||||
if (init_st == 0) {
|
||||
init_st = 1;
|
||||
atexit(stat_col);
|
||||
}
|
||||
#endif
|
||||
|
||||
size = new_size(size); /* round up to prime number */
|
||||
|
||||
tbl = alloc(st_table);
|
||||
tbl->type = type;
|
||||
tbl->num_entries = 0;
|
||||
tbl->num_bins = size;
|
||||
tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
|
||||
|
||||
return tbl;
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_table(type)
|
||||
struct st_hash_type *type;
|
||||
{
|
||||
return st_init_table_with_size(type, 0);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_numtable(void)
|
||||
{
|
||||
return st_init_table(&type_numhash);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_numtable_with_size(size)
|
||||
int size;
|
||||
{
|
||||
return st_init_table_with_size(&type_numhash, size);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strtable(void)
|
||||
{
|
||||
return st_init_table(&type_strhash);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strtable_with_size(size)
|
||||
int size;
|
||||
{
|
||||
return st_init_table_with_size(&type_strhash, size);
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_init_strend_table_with_size(size)
|
||||
int size;
|
||||
{
|
||||
return st_init_table_with_size(&type_strend_hash, size);
|
||||
}
|
||||
|
||||
void
|
||||
st_free_table(table)
|
||||
st_table *table;
|
||||
{
|
||||
register st_table_entry *ptr, *next;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < table->num_bins; i++) {
|
||||
ptr = table->bins[i];
|
||||
while (ptr != 0) {
|
||||
next = ptr->next;
|
||||
table->type->key_free(ptr->key);
|
||||
free(ptr);
|
||||
ptr = next;
|
||||
}
|
||||
}
|
||||
free(table->bins);
|
||||
free(table);
|
||||
}
|
||||
|
||||
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
|
||||
((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
|
||||
|
||||
#ifdef HASH_LOG
|
||||
#define COLLISION collision++
|
||||
#else
|
||||
#define COLLISION
|
||||
#endif
|
||||
|
||||
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
|
||||
bin_pos = hash_val%(table)->num_bins;\
|
||||
ptr = (table)->bins[bin_pos];\
|
||||
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
|
||||
COLLISION;\
|
||||
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
|
||||
ptr = ptr->next;\
|
||||
}\
|
||||
ptr = ptr->next;\
|
||||
}\
|
||||
} while (0)
|
||||
|
||||
int
|
||||
st_lookup(table, key, value)
|
||||
st_table *table;
|
||||
register st_data_t key;
|
||||
st_data_t *value;
|
||||
{
|
||||
unsigned int hash_val, bin_pos;
|
||||
register st_table_entry *ptr;
|
||||
|
||||
hash_val = do_hash(key, table);
|
||||
FIND_ENTRY(table, ptr, hash_val, bin_pos);
|
||||
|
||||
if (ptr == 0) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if (value != 0) *value = ptr->record;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
st_lookup_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
const unsigned char* str_key;
|
||||
const unsigned char* end_key;
|
||||
st_data_t *value;
|
||||
{
|
||||
st_strend_key key;
|
||||
|
||||
key.s = (unsigned char* )str_key;
|
||||
key.end = (unsigned char* )end_key;
|
||||
|
||||
return st_lookup(table, (st_data_t )(&key), value);
|
||||
}
|
||||
|
||||
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
|
||||
do {\
|
||||
st_table_entry *entry;\
|
||||
if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
|
||||
rehash(table);\
|
||||
bin_pos = hash_val % table->num_bins;\
|
||||
}\
|
||||
\
|
||||
entry = alloc(st_table_entry);\
|
||||
\
|
||||
entry->hash = hash_val;\
|
||||
entry->key = key;\
|
||||
entry->record = value;\
|
||||
entry->next = table->bins[bin_pos];\
|
||||
table->bins[bin_pos] = entry;\
|
||||
table->num_entries++;\
|
||||
} while (0)
|
||||
|
||||
int
|
||||
st_insert(table, key, value)
|
||||
register st_table *table;
|
||||
register st_data_t key;
|
||||
st_data_t value;
|
||||
{
|
||||
unsigned int hash_val, bin_pos;
|
||||
register st_table_entry *ptr;
|
||||
|
||||
hash_val = do_hash(key, table);
|
||||
FIND_ENTRY(table, ptr, hash_val, bin_pos);
|
||||
|
||||
if (ptr == 0) {
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
ptr->record = value;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
st_insert_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
const unsigned char* str_key;
|
||||
const unsigned char* end_key;
|
||||
st_data_t value;
|
||||
{
|
||||
st_strend_key* key;
|
||||
|
||||
key = alloc(st_strend_key);
|
||||
key->s = (unsigned char* )str_key;
|
||||
key->end = (unsigned char* )end_key;
|
||||
|
||||
return st_insert(table, (st_data_t )key, value);
|
||||
}
|
||||
|
||||
void
|
||||
st_add_direct(table, key, value)
|
||||
st_table *table;
|
||||
st_data_t key;
|
||||
st_data_t value;
|
||||
{
|
||||
unsigned int hash_val, bin_pos;
|
||||
|
||||
hash_val = do_hash(key, table);
|
||||
bin_pos = hash_val % table->num_bins;
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos);
|
||||
}
|
||||
|
||||
void
|
||||
st_add_direct_strend(table, str_key, end_key, value)
|
||||
st_table *table;
|
||||
const unsigned char* str_key;
|
||||
const unsigned char* end_key;
|
||||
st_data_t value;
|
||||
{
|
||||
st_strend_key* key;
|
||||
|
||||
key = alloc(st_strend_key);
|
||||
key->s = (unsigned char* )str_key;
|
||||
key->end = (unsigned char* )end_key;
|
||||
st_add_direct(table, (st_data_t )key, value);
|
||||
}
|
||||
|
||||
static void
|
||||
rehash(table)
|
||||
register st_table *table;
|
||||
{
|
||||
register st_table_entry *ptr, *next, **new_bins;
|
||||
int i, old_num_bins = table->num_bins, new_num_bins;
|
||||
unsigned int hash_val;
|
||||
|
||||
new_num_bins = new_size(old_num_bins+1);
|
||||
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
|
||||
|
||||
for(i = 0; i < old_num_bins; i++) {
|
||||
ptr = table->bins[i];
|
||||
while (ptr != 0) {
|
||||
next = ptr->next;
|
||||
hash_val = ptr->hash % new_num_bins;
|
||||
ptr->next = new_bins[hash_val];
|
||||
new_bins[hash_val] = ptr;
|
||||
ptr = next;
|
||||
}
|
||||
}
|
||||
free(table->bins);
|
||||
table->num_bins = new_num_bins;
|
||||
table->bins = new_bins;
|
||||
}
|
||||
|
||||
st_table*
|
||||
st_copy(old_table)
|
||||
st_table *old_table;
|
||||
{
|
||||
st_table *new_table;
|
||||
st_table_entry *ptr, *entry;
|
||||
int i, num_bins = old_table->num_bins;
|
||||
|
||||
new_table = alloc(st_table);
|
||||
if (new_table == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
*new_table = *old_table;
|
||||
new_table->bins = (st_table_entry**)
|
||||
Calloc((unsigned)num_bins, sizeof(st_table_entry*));
|
||||
|
||||
if (new_table->bins == 0) {
|
||||
free(new_table);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(i = 0; i < num_bins; i++) {
|
||||
new_table->bins[i] = 0;
|
||||
ptr = old_table->bins[i];
|
||||
while (ptr != 0) {
|
||||
entry = alloc(st_table_entry);
|
||||
if (entry == 0) {
|
||||
free(new_table->bins);
|
||||
free(new_table);
|
||||
return 0;
|
||||
}
|
||||
*entry = *ptr;
|
||||
entry->key = old_table->type->key_clone(ptr->key);
|
||||
entry->next = new_table->bins[i];
|
||||
new_table->bins[i] = entry;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
}
|
||||
return new_table;
|
||||
}
|
||||
|
||||
int
|
||||
st_delete(table, key, value)
|
||||
register st_table *table;
|
||||
register st_data_t *key;
|
||||
st_data_t *value;
|
||||
{
|
||||
unsigned int hash_val;
|
||||
st_table_entry *tmp;
|
||||
register st_table_entry *ptr;
|
||||
|
||||
hash_val = do_hash_bin(*key, table);
|
||||
ptr = table->bins[hash_val];
|
||||
|
||||
if (ptr == 0) {
|
||||
if (value != 0) *value = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (EQUAL(table, *key, ptr->key)) {
|
||||
table->bins[hash_val] = ptr->next;
|
||||
table->num_entries--;
|
||||
if (value != 0) *value = ptr->record;
|
||||
*key = ptr->key;
|
||||
free(ptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
for(; ptr->next != 0; ptr = ptr->next) {
|
||||
if (EQUAL(table, ptr->next->key, *key)) {
|
||||
tmp = ptr->next;
|
||||
ptr->next = ptr->next->next;
|
||||
table->num_entries--;
|
||||
if (value != 0) *value = tmp->record;
|
||||
*key = tmp->key;
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
st_delete_safe(table, key, value, never)
|
||||
register st_table *table;
|
||||
register st_data_t *key;
|
||||
st_data_t *value;
|
||||
st_data_t never;
|
||||
{
|
||||
unsigned int hash_val;
|
||||
register st_table_entry *ptr;
|
||||
|
||||
hash_val = do_hash_bin(*key, table);
|
||||
ptr = table->bins[hash_val];
|
||||
|
||||
if (ptr == 0) {
|
||||
if (value != 0) *value = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(; ptr != 0; ptr = ptr->next) {
|
||||
if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
|
||||
table->num_entries--;
|
||||
*key = ptr->key;
|
||||
if (value != 0) *value = ptr->record;
|
||||
ptr->key = ptr->record = never;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
delete_never(key, value, never)
|
||||
st_data_t key, value, never;
|
||||
{
|
||||
if (value == never) return ST_DELETE;
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
void
|
||||
st_cleanup_safe(table, never)
|
||||
st_table *table;
|
||||
st_data_t never;
|
||||
{
|
||||
int num_entries = table->num_entries;
|
||||
|
||||
st_foreach(table, delete_never, never);
|
||||
table->num_entries = num_entries;
|
||||
}
|
||||
|
||||
void
|
||||
st_foreach(table, func, arg)
|
||||
st_table *table;
|
||||
int (*func)();
|
||||
st_data_t arg;
|
||||
{
|
||||
st_table_entry *ptr, *last, *tmp;
|
||||
enum st_retval retval;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < table->num_bins; i++) {
|
||||
last = 0;
|
||||
for(ptr = table->bins[i]; ptr != 0;) {
|
||||
retval = (*func)(ptr->key, ptr->record, arg, 0);
|
||||
switch (retval) {
|
||||
case ST_CHECK: /* check if hash is modified during iteration */
|
||||
tmp = 0;
|
||||
if (i < table->num_bins) {
|
||||
for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
|
||||
if (tmp == ptr) break;
|
||||
}
|
||||
}
|
||||
if (!tmp) {
|
||||
/* call func with error notice */
|
||||
retval = (*func)(0, 0, arg, 1);
|
||||
return;
|
||||
}
|
||||
/* fall through */
|
||||
case ST_CONTINUE:
|
||||
last = ptr;
|
||||
ptr = ptr->next;
|
||||
break;
|
||||
case ST_STOP:
|
||||
return;
|
||||
case ST_DELETE:
|
||||
tmp = ptr;
|
||||
if (last == 0) {
|
||||
table->bins[i] = ptr->next;
|
||||
}
|
||||
else {
|
||||
last->next = ptr->next;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
table->type->key_free(tmp->key);
|
||||
free(tmp);
|
||||
table->num_entries--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
strhash(string)
|
||||
register const char *string;
|
||||
{
|
||||
register int c;
|
||||
|
||||
#ifdef HASH_ELFHASH
|
||||
register unsigned int h = 0, g;
|
||||
|
||||
while ((c = *string++) != '\0') {
|
||||
h = ( h << 4 ) + c;
|
||||
if ( g = h & 0xF0000000 )
|
||||
h ^= g >> 24;
|
||||
h &= ~g;
|
||||
}
|
||||
return h;
|
||||
#elif HASH_PERL
|
||||
register int val = 0;
|
||||
|
||||
while ((c = *string++) != '\0') {
|
||||
val += c;
|
||||
val += (val << 10);
|
||||
val ^= (val >> 6);
|
||||
}
|
||||
val += (val << 3);
|
||||
val ^= (val >> 11);
|
||||
|
||||
return val + (val << 15);
|
||||
#else
|
||||
register int val = 0;
|
||||
|
||||
while ((c = *string++) != '\0') {
|
||||
val = val*997 + c;
|
||||
}
|
||||
|
||||
return val + (val>>5);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
numcmp(x, y)
|
||||
long x, y;
|
||||
{
|
||||
return x != y;
|
||||
}
|
||||
|
||||
static int
|
||||
numhash(n)
|
||||
long n;
|
||||
{
|
||||
return n;
|
||||
}
|
||||
|
||||
extern int
|
||||
st_nothing_key_free(st_data_t key) { return 0; }
|
||||
|
||||
extern st_data_t
|
||||
st_nothing_key_clone(st_data_t x) { return x; }
|
||||
|
||||
static int strend_cmp(st_strend_key* x, st_strend_key* y)
|
||||
{
|
||||
unsigned char *p, *q;
|
||||
int c;
|
||||
|
||||
if ((x->end - x->s) != (y->end - y->s))
|
||||
return 1;
|
||||
|
||||
p = x->s;
|
||||
q = y->s;
|
||||
while (p < x->end) {
|
||||
c = (int )*p - (int )*q;
|
||||
if (c != 0) return c;
|
||||
|
||||
p++; q++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int strend_hash(st_strend_key* x)
|
||||
{
|
||||
int val;
|
||||
unsigned char *p;
|
||||
|
||||
val = 0;
|
||||
p = x->s;
|
||||
while (p < x->end) {
|
||||
val = val * 997 + (int )*p++;
|
||||
}
|
||||
|
||||
return val + (val >> 5);
|
||||
}
|
||||
|
||||
static int strend_key_free(st_data_t x)
|
||||
{
|
||||
xfree((void* )x);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static st_data_t strend_key_clone(st_data_t x)
|
||||
{
|
||||
st_strend_key* new_key;
|
||||
st_strend_key* key = (st_strend_key* )x;
|
||||
|
||||
new_key = alloc(st_strend_key);
|
||||
*new_key = *key;
|
||||
return (st_data_t )new_key;
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||||
|
||||
/* @(#) st.h 5.1 89/12/14 */
|
||||
|
||||
#ifndef ST_INCLUDED
|
||||
|
||||
#define ST_INCLUDED
|
||||
|
||||
typedef unsigned long st_data_t;
|
||||
#define ST_DATA_T_DEFINED
|
||||
|
||||
typedef struct st_table st_table;
|
||||
|
||||
struct st_hash_type {
|
||||
int (*compare)();
|
||||
int (*hash)();
|
||||
int (*key_free)();
|
||||
st_data_t (*key_clone)();
|
||||
};
|
||||
|
||||
struct st_table {
|
||||
struct st_hash_type *type;
|
||||
int num_bins;
|
||||
int num_entries;
|
||||
struct st_table_entry **bins;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
unsigned char* s;
|
||||
unsigned char* end;
|
||||
} st_strend_key;
|
||||
|
||||
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
|
||||
|
||||
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
|
||||
|
||||
#ifndef _
|
||||
# define _(args) args
|
||||
#endif
|
||||
#ifndef ANYARGS
|
||||
# ifdef __cplusplus
|
||||
# define ANYARGS ...
|
||||
# else
|
||||
# define ANYARGS
|
||||
# endif
|
||||
#endif
|
||||
|
||||
st_table *st_init_table _((struct st_hash_type *));
|
||||
st_table *st_init_table_with_size _((struct st_hash_type *, int));
|
||||
st_table *st_init_numtable _((void));
|
||||
st_table *st_init_numtable_with_size _((int));
|
||||
st_table *st_init_strtable _((void));
|
||||
st_table *st_init_strtable_with_size _((int));
|
||||
st_table *st_init_strend_table_with_size _((int));
|
||||
int st_delete _((st_table *, st_data_t *, st_data_t *));
|
||||
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
|
||||
int st_insert _((st_table *, st_data_t, st_data_t));
|
||||
int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
|
||||
int st_lookup _((st_table *, st_data_t, st_data_t *));
|
||||
int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
|
||||
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
|
||||
void st_add_direct _((st_table *, st_data_t, st_data_t));
|
||||
void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
|
||||
void st_free_table _((st_table *));
|
||||
void st_cleanup_safe _((st_table *, st_data_t));
|
||||
st_table *st_copy _((st_table *));
|
||||
|
||||
extern st_data_t st_nothing_key_clone _((st_data_t key));
|
||||
extern int st_nothing_key_free _((st_data_t key));
|
||||
|
||||
#define ST_NUMCMP ((int (*)()) 0)
|
||||
#define ST_NUMHASH ((int (*)()) -2)
|
||||
|
||||
#define st_numcmp ST_NUMCMP
|
||||
#define st_numhash ST_NUMHASH
|
||||
|
||||
#endif /* ST_INCLUDED */
|
||||
@@ -0,0 +1,11 @@
|
||||
--TEST--
|
||||
Bug #31911 (mb_decode_mimeheader() is case-sensitive to hex escapes)
|
||||
--FILE--
|
||||
<?php
|
||||
echo mb_decode_mimeheader("Works: =?iso-8859-1?q?=3F=3F=3F?=");
|
||||
echo "\n";
|
||||
echo mb_decode_mimeheader("Fails: =?iso-8859-1?q?=3f=3f=3f?=")
|
||||
?>
|
||||
--EXPECT--
|
||||
Works: ???
|
||||
Fails: ???
|
||||
Reference in New Issue
Block a user