1
0
mirror of https://github.com/php/php-src.git synced 2026-04-27 18:23:26 +02:00

This commit was manufactured by cvs2svn to create branch 'PHP_5_0'.

This commit is contained in:
SVN Migration
2005-02-20 22:20:25 +00:00
parent 40aa1c6e73
commit 8190860776
20 changed files with 6910 additions and 0 deletions
+136
View File
@@ -0,0 +1,136 @@
/*
* "streamable kanji code filter and converter"
* Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
*
* LICENSE NOTICES
*
* This file is part of "streamable kanji code filter and converter",
* which is distributed under the terms of GNU Lesser General Public
* License (version 2) as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with "streamable kanji code filter and converter";
* if not, write to the Free Software Foundation, Inc., 59 Temple Place,
* Suite 330, Boston, MA 02111-1307 USA
*
* The author of this file:
*
*/
/*
* The source code included in this files was separated from mbfilter.c
* by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mbfilter.h"
#include "mbfilter_iso8859_16.h"
#include "unicode_table_iso8859_16.h"
static const char *mbfl_encoding_8859_16_aliases[] = {"ISO_8859-16", NULL};
const mbfl_encoding mbfl_encoding_8859_16 = {
mbfl_no_encoding_8859_16,
"ISO-8859-16",
"ISO-8859-16",
(const char *(*)[])&mbfl_encoding_8859_16_aliases,
NULL,
MBFL_ENCTYPE_SBCS
};
const struct mbfl_identify_vtbl vtbl_identify_8859_16 = {
mbfl_no_encoding_8859_16,
mbfl_filt_ident_common_ctor,
mbfl_filt_ident_common_dtor,
mbfl_filt_ident_true
};
const struct mbfl_convert_vtbl vtbl_8859_16_wchar = {
mbfl_no_encoding_8859_16,
mbfl_no_encoding_wchar,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_8859_16_wchar,
mbfl_filt_conv_common_flush
};
const struct mbfl_convert_vtbl vtbl_wchar_8859_16 = {
mbfl_no_encoding_wchar,
mbfl_no_encoding_8859_16,
mbfl_filt_conv_common_ctor,
mbfl_filt_conv_common_dtor,
mbfl_filt_conv_wchar_8859_16,
mbfl_filt_conv_common_flush
};
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
* ISO-8859-16 => wchar
*/
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter)
{
int s;
if (c >= 0 && c < 0xa0) {
s = c;
} else if (c >= 0xa0 && c < 0x100) {
s = iso8859_16_ucs_table[c - 0xa0];
if (s <= 0) {
s = c;
s &= MBFL_WCSPLANE_MASK;
s |= MBFL_WCSPLANE_8859_16;
}
} else {
s = c;
s &= MBFL_WCSGROUP_MASK;
s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
return c;
}
/*
* wchar => ISO-8859-16
*/
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter)
{
int s, n;
if (c >= 0 && c < 0xa0) {
s = c;
} else {
s = -1;
n = 95;
while (n >= 0) {
if (c == iso8859_16_ucs_table[n]) {
s = 0xa0 + n;
break;
}
n--;
}
if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_8859_16) {
s = c & MBFL_WCSPLANE_MASK;
}
}
if (s >= 0) {
CK((*filter->output_function)(s, filter->data));
} else {
if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
}
return c;
}
+23
View File
@@ -0,0 +1,23 @@
/*
* COPYRIGHT NOTICE
*
* This file is a portion of "streamable kanji code filter and converter"
* library, which is distributed under GNU Lesser General Public License
* version 2.1.
*
*/
#ifndef MBFL_MBFILTER_ISO8859_16_H
#define MBFL_MBFILTER_ISO8859_16_H
#include "mbfilter.h"
extern const mbfl_encoding mbfl_encoding_8859_16;
extern const struct mbfl_identify_vtbl vtbl_identify_8859_16;
extern const struct mbfl_convert_vtbl vtbl_8859_16_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_8859_16;
int mbfl_filt_conv_8859_16_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_8859_16(int c, mbfl_convert_filter *filter);
#endif /* MBFL_MBFILTER_ISO8859_16_H */
+42
View File
@@ -0,0 +1,42 @@
#!/usr/bin/awk -f
#
# $Id$
#
# Description: a script that generates a single byte code set to Unicode
# mapping table.
#
BEGIN {
FS="[ \t#]"
}
/^#/ {
# Do nothing
}
{
tbl[$1 + 0] = $2
}
END {
print "/* This file is automatically generated. Do not edit! */"
if (IFNDEF_NAME) {
print "#ifndef " IFNDEF_NAME
}
print "static const unsigned int " TABLE_NAME "[] = {"
i = 160;
for (;;) {
printf("\t0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x", tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++], tbl[i++]);
if (i != 256) {
printf(",\n");
} else {
print
break;
}
}
print "};"
if (IFNDEF_NAME) {
print "#endif /* " IFNDEF_NAME " */"
}
}
@@ -0,0 +1,17 @@
/* This file is automatically generated. Do not edit! */
#ifndef UNICODE_TABLEISO8859_16_H
static const unsigned int iso8859_16_ucs_table[] = {
0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff
};
#endif /* UNICODE_TABLEISO8859_16_H */
+21
View File
@@ -0,0 +1,21 @@
Microsoft Visual Studio Solution File, Format Version 7.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
ConfigName.0 = Debug
ConfigName.1 = Release
EndGlobalSection
GlobalSection(ProjectDependencies) = postSolution
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
{B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal
+650
View File
@@ -0,0 +1,650 @@
<?xml version="1.0" encoding = "shift_jis"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.00"
Name="libmbfl"
ProjectGUID="{B3636594-A785-4270-A765-8EAE922B5207}"
SccProjectName=""
SccLocalPath="">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="2"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="mbfl,."
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;MBFL_DLL_EXPORT;HAVE_CONFIG_H=1"
BasicRuntimeChecks="3"
RuntimeLibrary="1"
UsePrecompiledHeader="2"
PrecompiledHeaderFile=".\Debug/mbfl.pch"
AssemblerListingLocation=".\Debug/"
ObjectFile=".\Debug/"
ProgramDataBaseFileName=".\Debug/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
DebugInformationFormat="4"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="odbc32.lib odbccp32.lib"
OutputFile=".\Debug/mbfl.dll"
LinkIncremental="2"
SuppressStartupBanner="TRUE"
ModuleDefinitionFile=""
GenerateDebugInformation="TRUE"
ProgramDatabaseFile=".\Debug/mbfl.pdb"
ImportLibrary=".\Debug/mbfl.lib"/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="TRUE"
SuppressStartupBanner="TRUE"
TargetEnvironment="1"
TypeLibraryName=".\Debug/mbfl.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1041"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="2"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories="mbfl,."
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;HAVE_CONFIG_H"
StringPooling="TRUE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="TRUE"
UsePrecompiledHeader="2"
PrecompiledHeaderFile=".\Release/mbfl.pch"
AssemblerListingLocation=".\Release/"
ObjectFile=".\Release/"
ProgramDataBaseFileName=".\Release/"
WarningLevel="3"
SuppressStartupBanner="TRUE"
CompileAs="0"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
AdditionalDependencies="odbc32.lib odbccp32.lib"
OutputFile=".\Release/mbfl.dll"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
ModuleDefinitionFile=""
ProgramDatabaseFile=".\Release/mbfl.pdb"
ImportLibrary=".\Release/mbfl.lib"/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="NDEBUG"
MkTypLibCompatible="TRUE"
SuppressStartupBanner="TRUE"
TargetEnvironment="1"
TypeLibraryName=".\Release/mbfl.tlb"/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
</Configuration>
</Configurations>
<Files>
<Filter
Name="Source Files"
Filter="vc6">
<File
RelativePath=".\filters\html_entities.c">
</File>
<File
RelativePath=".\mbfl\mbfilter.c">
</File>
<File
RelativePath=".\filters\mbfilter_7bit.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_8bit.c">
</File>
<File
RelativePath=".\filters\mbfilter_ascii.c">
</File>
<File
RelativePath=".\filters\mbfilter_base64.c">
</File>
<File
RelativePath=".\filters\mbfilter_big5.c">
</File>
<File
RelativePath=".\filters\mbfilter_byte2.c">
</File>
<File
RelativePath=".\filters\mbfilter_byte4.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp1251.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp1252.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp866.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp932.c">
</File>
<File
RelativePath=".\filters\mbfilter_cp936.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_cn.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp_win.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_kr.c">
</File>
<File
RelativePath=".\filters\mbfilter_euc_tw.c">
</File>
<File
RelativePath=".\filters\mbfilter_htmlent.c">
</File>
<File
RelativePath=".\filters\mbfilter_hz.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso2022_kr.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_1.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_10.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_13.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_14.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_15.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_16.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_2.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_3.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_4.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_5.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_6.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_7.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_8.c">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_9.c">
</File>
<File
RelativePath=".\filters\mbfilter_jis.c">
</File>
<File
RelativePath=".\filters\mbfilter_koi8r.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_pass.c">
</File>
<File
RelativePath=".\filters\mbfilter_qprint.c">
</File>
<File
RelativePath=".\filters\mbfilter_sjis.c">
</File>
<File
RelativePath=".\filters\mbfilter_ucs2.c">
</File>
<File
RelativePath=".\filters\mbfilter_ucs4.c">
</File>
<File
RelativePath=".\filters\mbfilter_uhc.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf16.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf32.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf7.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf7imap.c">
</File>
<File
RelativePath=".\filters\mbfilter_utf8.c">
</File>
<File
RelativePath=".\filters\mbfilter_uuencode.c">
</File>
<File
RelativePath=".\mbfl\mbfilter_wchar.c">
</File>
<File
RelativePath=".\mbfl\mbfl_allocators.c">
</File>
<File
RelativePath=".\mbfl\mbfl_convert.c">
</File>
<File
RelativePath=".\mbfl\mbfl_encoding.c">
</File>
<File
RelativePath=".\mbfl\mbfl_filter_output.c">
</File>
<File
RelativePath=".\mbfl\mbfl_ident.c">
</File>
<File
RelativePath=".\mbfl\mbfl_language.c">
</File>
<File
RelativePath=".\mbfl\mbfl_memory_device.c">
</File>
<File
RelativePath=".\mbfl\mbfl_string.c">
</File>
<File
RelativePath=".\nls\nls_de.c">
</File>
<File
RelativePath=".\nls\nls_en.c">
</File>
<File
RelativePath=".\nls\nls_ja.c">
</File>
<File
RelativePath=".\nls\nls_kr.c">
</File>
<File
RelativePath=".\nls\nls_neutral.c">
</File>
<File
RelativePath=".\nls\nls_ru.c">
</File>
<File
RelativePath=".\nls\nls_uni.c">
</File>
<File
RelativePath=".\nls\nls_zh.c">
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl">
<File
RelativePath=".\config.h.vc6">
<FileConfiguration
Name="Debug|Win32">
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
"
Outputs="$(InputDir)\config.h"/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32">
<Tool
Name="VCCustomBuildTool"
CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
"
Outputs="$(InputDir)\config.h"/>
</FileConfiguration>
</File>
<File
RelativePath=".\filters\cp932_table.h">
</File>
<File
RelativePath=".\filters\html_entities.h">
</File>
<File
RelativePath=".\mbfl\mbfilter.h">
</File>
<File
RelativePath=".\filters\mbfilter_7bit.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_8bit.h">
</File>
<File
RelativePath=".\filters\mbfilter_ascii.h">
</File>
<File
RelativePath=".\filters\mbfilter_base64.h">
</File>
<File
RelativePath=".\filters\mbfilter_big5.h">
</File>
<File
RelativePath=".\filters\mbfilter_byte2.h">
</File>
<File
RelativePath=".\filters\mbfilter_byte4.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp1251.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp1252.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp866.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp932.h">
</File>
<File
RelativePath=".\filters\mbfilter_cp936.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_cn.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_jp_win.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_kr.h">
</File>
<File
RelativePath=".\filters\mbfilter_euc_tw.h">
</File>
<File
RelativePath=".\filters\mbfilter_htmlent.h">
</File>
<File
RelativePath=".\filters\mbfilter_hz.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso2022_kr.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_1.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_10.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_13.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_14.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_15.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_16.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_2.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_3.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_4.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_5.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_6.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_7.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_8.h">
</File>
<File
RelativePath=".\filters\mbfilter_iso8859_9.h">
</File>
<File
RelativePath=".\filters\mbfilter_jis.h">
</File>
<File
RelativePath=".\filters\mbfilter_koi8r.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_pass.h">
</File>
<File
RelativePath=".\filters\mbfilter_qprint.h">
</File>
<File
RelativePath=".\filters\mbfilter_sjis.h">
</File>
<File
RelativePath=".\filters\mbfilter_ucs2.h">
</File>
<File
RelativePath=".\filters\mbfilter_ucs4.h">
</File>
<File
RelativePath=".\filters\mbfilter_uhc.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf16.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf32.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf7.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf7imap.h">
</File>
<File
RelativePath=".\filters\mbfilter_utf8.h">
</File>
<File
RelativePath=".\filters\mbfilter_uuencode.h">
</File>
<File
RelativePath=".\mbfl\mbfilter_wchar.h">
</File>
<File
RelativePath=".\mbfl\mbfl_allocators.h">
</File>
<File
RelativePath=".\mbfl\mbfl_consts.h">
</File>
<File
RelativePath=".\mbfl\mbfl_convert.h">
</File>
<File
RelativePath=".\mbfl\mbfl_encoding.h">
</File>
<File
RelativePath=".\mbfl\mbfl_filter_output.h">
</File>
<File
RelativePath=".\mbfl\mbfl_ident.h">
</File>
<File
RelativePath=".\mbfl\mbfl_language.h">
</File>
<File
RelativePath=".\mbfl\mbfl_memory_device.h">
</File>
<File
RelativePath=".\mbfl\mbfl_string.h">
</File>
<File
RelativePath=".\nls\nls_de.h">
</File>
<File
RelativePath=".\nls\nls_en.h">
</File>
<File
RelativePath=".\nls\nls_ja.h">
</File>
<File
RelativePath=".\nls\nls_kr.h">
</File>
<File
RelativePath=".\nls\nls_neutral.h">
</File>
<File
RelativePath=".\nls\nls_ru.h">
</File>
<File
RelativePath=".\nls\nls_uni.h">
</File>
<File
RelativePath=".\nls\nls_zh.h">
</File>
<File
RelativePath=".\filters\unicode_prop.h">
</File>
<File
RelativePath=".\filters\unicode_table_big5.h">
</File>
<File
RelativePath=".\filters\unicode_table_cns11643.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp1251.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp1252.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp866.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp932_ext.h">
</File>
<File
RelativePath=".\filters\unicode_table_cp936.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_10.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_13.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_14.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_15.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_16.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_2.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_3.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_4.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_5.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_6.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_7.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_8.h">
</File>
<File
RelativePath=".\filters\unicode_table_iso8859_9.h">
</File>
<File
RelativePath=".\filters\unicode_table_jis.h">
</File>
<File
RelativePath=".\filters\unicode_table_koi8r.h">
</File>
<File
RelativePath=".\filters\unicode_table_uhc.h">
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
<File
RelativePath=".\mbfl.rc">
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>
+177
View File
@@ -0,0 +1,177 @@
README.ja 2005/02/04
鬼車 ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
http://www.geocities.jp/kosako3/oniguruma/
http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
鬼車は正規表現ライブラリである。
このライブラリの特長は、それぞれの正規表現オブジェクトごとに
文字エンコーディングを指定できることである。
サポートしている文字エンコーディング:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
Shift_JIS, Big5, KOI8-R, KOI8 (*),
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
* KOI8はデフォルトのセットアップではライブラリの中に含まれない。
(必要であればMakefileを編集すること)
------------------------------------------------------------
インストール
ケース1: UnixとCygwin環境
1. ./configure
2. make
3. make install
ライブラリファイル: libonig.a
動作テスト (ASCII/EUC-JP)
make ctest
アンインストール
make uninstall
ケース2: Win32(VC++)環境
1. copy win32\Makefile Makefile
2. copy win32\config.h config.h
3. nmake
onig_s.lib: static link library
onig.dll: dynamic link library
* 動作テスト (ASCII/Shift_JIS)
4. copy win32\testc.c testc.c
5. nmake ctest
ライセンス
このソフトウェアがRubyと一緒に使用または配布される場合には、
Rubyのライセンスに従う。
それ以外の場合には、BSDライセンスに従う。
正規表現
doc/RE.jaを参照
使用方法
使用するプログラムで、oniguruma.hをインクルードする(Native APIの場合)。
Native APIについては、doc/API.jaを参照。
Win32でスタティックリンクライブラリ(onig_s.lib)をリンクする場合には、
コンパイルするときに -DONIG_EXTERN=extern をコンパイル引数に追加すること。
使用例プログラム
sample/simple.c 最小例 (native API)
sample/names.c 名前付きグループコールバック使用例
sample/encode.c 幾つかの文字エンコーディング使用例
sample/listcap.c 捕獲履歴機能の使用例
sample/posix.c POSIX API使用例
sample/sql.c 可変メタ文字機能使用例 (SQL-like パターン)
sample/syntax.c PerlとJava文法のテスト
ソースファイル
oniguruma.h 鬼車APIヘッダ (公開)
regenc.h 文字エンコーディング枠組みヘッダ
regint.h 内部宣言
regparse.h regparse.cとregcomp.cのための内部宣言
regcomp.c コンパイル、最適化関数
regenc.c 文字エンコーディング枠組み
regerror.c エラーメッセージ関数
regext.c 拡張API関数
regexec.c 検索、照合関数
regparse.c 正規表現パターン解析関数
regsyntax.c 正規表現パターン文法関数、組込み文法定義
regtrav.c 捕獲履歴木巡回関数
regversion.c 版情報関数
st.h ハッシュテーブル関数宣言
st.c ハッシュテーブル関数
oniggnu.h GNU regex APIヘッダ (公開)
reggnu.c GNU regex API関数
onigposix.h POSIX APIヘッダ (公開)
regposerr.c POSIX APIエラーメッセージ関数
regposix.c POSIX API関数
enc/mktable.c 文字タイプテーブル生成プログラム
enc/ascii.c ASCII エンコーディング
enc/euc_jp.c EUC-JP エンコーディング
enc/euc_tw.c EUC-TW エンコーディング
enc/euc_kr.c EUC-KR, EUC-CN エンコーディング
enc/sjis.c Shift_JIS エンコーディング
enc/big5.c Big5 エンコーディング
enc/koi8.c KOI8 エンコーディング
enc/koi8_r.c KOI8-R エンコーディング
enc/iso8859_1.c ISO-8859-1 (Latin-1)
enc/iso8859_2.c ISO-8859-2 (Latin-2)
enc/iso8859_3.c ISO-8859-3 (Latin-3)
enc/iso8859_4.c ISO-8859-4 (Latin-4)
enc/iso8859_5.c ISO-8859-5 (Cyrillic)
enc/iso8859_6.c ISO-8859-6 (Arabic)
enc/iso8859_7.c ISO-8859-7 (Greek)
enc/iso8859_8.c ISO-8859-8 (Hebrew)
enc/iso8859_9.c ISO-8859-9 (Latin-5 または Turkish)
enc/iso8859_10.c ISO-8859-10 (Latin-6 または Nordic)
enc/iso8859_11.c ISO-8859-11 (Thai)
enc/iso8859_13.c ISO-8859-13 (Latin-7 または Baltic Rim)
enc/iso8859_14.c ISO-8859-14 (Latin-8 または Celtic)
enc/iso8859_15.c ISO-8859-15 (Latin-9 または West European with Euro)
enc/iso8859_16.c ISO-8859-16
(Latin-10 または South-Eastern European with Euro)
enc/utf8.c UTF-8 エンコーディング
enc/utf16_be.c UTF-16BE エンコーディング
enc/utf16_le.c UTF-16LE エンコーディング
enc/utf32_be.c UTF-32BE エンコーディング
enc/utf32_le.c UTF-32LE エンコーディング
enc/unicode.c Unicode情報
win32/Makefile Win32用 Makefile (for VC++)
win32/config.h Win32用 config.h
Ruby 1.8/1.6の日本語化GNU regexとのAPIの違い
+ re_compile_fastmap() は削除された。
+ re_recompile_pattern() が追加された。
+ re_alloc_pattern() が追加された。
残件
? Unicode全コードポイント領域での大文字小文字照合
? Unicodeプロパティ
? ambig-flag Katakana <-> Hiragana
? ONIG_OPTION_NOTBOS/NOTEOS追加 (\A, \z, \Z)
? ONIG_SYNTAX_ASIS追加
?? \X (== \PM\pM*)
?? 文法要素 ONIG_SYN_CONTEXT_INDEP_ANCHORSの実装
?? 改行文字(文字列)を変更できる
?? 検索位置移動停止演算子 (match_at()からONIG_STOPを返す)
and I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
File diff suppressed because it is too large Load Diff
+253
View File
@@ -0,0 +1,253 @@
/**********************************************************************
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
static int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16be_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*p];
}
static int
utf16be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
}
return 0;
}
static OnigCodePoint
utf16be_mbc_to_code(const UChar* p, const UChar* end)
{
OnigCodePoint code;
if (UTF16_IS_SURROGATE_FIRST(*p)) {
code = ((((p[0] - 0xd8) << 2) + ((p[1] & 0xc0) >> 6) + 1) << 16)
+ ((((p[1] & 0x3f) << 2) + (p[2] - 0xdc)) << 8)
+ p[3];
}
else {
code = p[0] * 256 + p[1];
}
return code;
}
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = code >> 16;
*p++ = (plane >> 2) + 0xd8;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (high & 0x02) + 0xdc;
*p = (UChar )(code & 0xff);
return 4;
}
else {
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )(code & 0xff);
return 2;
}
}
static int
utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*p == 0) {
p++;
if (end > p + 2 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+1) == 0) {
*lower++ = '\0';
*lower = 0xdf;
(*pp) += 4;
return 2;
}
*lower++ = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 2;
return 2; /* return byte length of converted char to lower */
}
else {
int len;
len = EncLen_UTF16[*p];
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*p];
if (*p == 0) {
int c, v;
p++;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 2 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+1) == 0) {
(*pp) += 2;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
s -= 2;
return (UChar* )s;
}
OnigEncodingType OnigEncodingUTF16_BE = {
utf16be_mbc_enc_len,
"UTF-16BE", /* name */
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf16be_is_mbc_newline,
utf16be_mbc_to_code,
utf16be_code_to_mbclen,
utf16be_code_to_mbc,
utf16be_mbc_to_normalize,
utf16be_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};
+248
View File
@@ -0,0 +1,248 @@
/**********************************************************************
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
static int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
}
static int
utf16le_mbc_enc_len(const UChar* p)
{
return EncLen_UTF16[*(p+1)];
}
static int
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
}
return 0;
}
static OnigCodePoint
utf16le_mbc_to_code(const UChar* p, const UChar* end)
{
OnigCodePoint code;
UChar c0 = *p;
UChar c1 = *(p+1);
if (UTF16_IS_SURROGATE_FIRST(c1)) {
code = ((((c1 - 0xd8) << 2) + ((c0 & 0xc0) >> 6) + 1) << 16)
+ ((((c0 & 0x3f) << 2) + (p[3] - 0xdc)) << 8)
+ p[2];
}
else {
code = c1 * 256 + p[0];
}
return code;
}
static int
utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
if (code > 0xffff) {
unsigned int plane, high;
plane = code >> 16;
high = (code & 0xff00) >> 8;
*p++ = ((plane & 0x03) << 6) + (high >> 2);
*p++ = (plane >> 2) + 0xd8;
*p++ = (UChar )(code & 0xff);
*p = (high & 0x02) + 0xdc;
return 4;
}
else {
*p++ = (UChar )(code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
return 2;
}
}
static int
utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+1) == 0) {
if (end > p + 3 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+3) == 0) {
*lower++ = 0xdf;
*lower = '\0';
(*pp) += 4;
return 2;
}
*(lower+1) = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 2;
return 2; /* return byte length of converted char to lower */
}
else {
int len = EncLen_UTF16[*(p+1)];
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += EncLen_UTF16[*(p+1)];
if (*(p+1) == 0) {
int c, v;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 3 &&
((*p == 's' && *(p+2) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+2) == 'S'))) &&
*(p+3) == 0) {
(*pp) += 2;
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
if (s <= start) return (UChar* )s;
if ((s - start) % 2 == 1) {
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
s -= 2;
return (UChar* )s;
}
OnigEncodingType OnigEncodingUTF16_LE = {
utf16le_mbc_enc_len,
"UTF-16LE", /* name */
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf16le_is_mbc_newline,
utf16le_mbc_to_code,
utf16le_code_to_mbclen,
utf16le_code_to_mbc,
utf16le_mbc_to_normalize,
utf16le_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};
+208
View File
@@ -0,0 +1,208 @@
/**********************************************************************
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32be_mbc_enc_len(const UChar* p)
{
return 4;
}
static int
utf32be_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
}
return 0;
}
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
}
static int
utf32be_code_to_mbclen(OnigCodePoint code)
{
return 4;
}
static int
utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar )((code & 0xff000000) >>24);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar ) (code & 0xff);
return 4;
}
static int
utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
p += 3;
if (end > p + 4 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
*lower++ = '\0';
*lower++ = '\0';
*lower++ = '\0';
*lower = 0xdf;
(*pp) += 8;
return 4;
}
*lower++ = '\0';
*lower++ = '\0';
*lower++ = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower = *p;
}
(*pp) += 4;
return 4; /* return byte length of converted char to lower */
}
else {
int len = 4;
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
int c, v;
p += 3;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 4 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
(*pp) += 4;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
OnigEncodingType OnigEncodingUTF32_BE = {
utf32be_mbc_enc_len,
"UTF-32BE", /* name */
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf32be_is_mbc_newline,
utf32be_mbc_to_code,
utf32be_code_to_mbclen,
utf32be_code_to_mbc,
utf32be_mbc_to_normalize,
utf32be_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};
+206
View File
@@ -0,0 +1,206 @@
/**********************************************************************
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regenc.h"
static int
utf32le_mbc_enc_len(const UChar* p)
{
return 4;
}
static int
utf32le_is_mbc_newline(const UChar* p, const UChar* end)
{
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
}
return 0;
}
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end)
{
return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
}
static int
utf32le_code_to_mbclen(OnigCodePoint code)
{
return 4;
}
static int
utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar* p = buf;
*p++ = (UChar ) (code & 0xff);
*p++ = (UChar )((code & 0xff00) >> 8);
*p++ = (UChar )((code & 0xff0000) >>16);
*p++ = (UChar )((code & 0xff000000) >>24);
return 4;
}
static int
utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
UChar* lower)
{
const UChar* p = *pp;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
if (end > p + 7 &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
*lower++ = 0xdf;
*lower++ = '\0';
*lower++ = '\0';
*lower = '\0';
(*pp) += 8;
return 4;
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
*lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
}
else {
*lower++ = *p;
}
*lower++ = '\0';
*lower++ = '\0';
*lower = '\0';
(*pp) += 4;
return 4; /* return byte length of converted char to lower */
}
else {
int len = 4;
if (lower != p) {
int i;
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
}
(*pp) += len;
return len; /* return byte length of converted char to lower */
}
}
static int
utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
(*pp) += 4;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
int c, v;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
if (end > p + 7 &&
((*p == 's' && *(p+4) == 's') ||
((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
(*p == 'S' && *(p+4) == 'S'))) &&
*(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
(*pp) += 4;
return TRUE;
}
else if (*p == 0xdf) {
return TRUE;
}
}
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
!ONIGENC_IS_MBC_ASCII(p))) {
c = *p;
v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
(ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
if ((v | ONIGENC_CTYPE_LOWER) != 0) {
/* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
if (c >= 0xaa && c <= 0xba)
return FALSE;
else
return TRUE;
}
return (v != 0 ? TRUE : FALSE);
}
}
return FALSE;
}
static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
{
int rem;
if (s <= start) return (UChar* )s;
rem = (s - start) % 4;
return (UChar* )(s - rem);
}
OnigEncodingType OnigEncodingUTF32_LE = {
utf32le_mbc_enc_len,
"UTF-32LE", /* name */
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
utf32le_is_mbc_newline,
utf32le_mbc_to_code,
utf32le_code_to_mbclen,
utf32le_code_to_mbc,
utf32le_mbc_to_normalize,
utf32le_is_mbc_ambiguous,
onigenc_iso_8859_1_get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
onigenc_unicode_is_code_ctype,
onigenc_unicode_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match
};
+173
View File
@@ -0,0 +1,173 @@
<html>
<head>
<meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=x-sjis">
<title>Oniguruma</title>
</head>
<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
<h2>Oniguruma</h2>
<p>
2005/02/19 (C) K.Kosako
</p>
<p>
<a href="http://miuras.net/matsushita.html">
<img src="anti_matsushita.PNG" height="46" width="266">
</a>
</p>
<p>
Oniguruma is a regular expressions library.<br>
The characteristics of this library is that different character encoding
<br>for every regular expression object can be specified.
</p>
<dl>
<dt><b>Supported character encodings:</b><br>
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
Shift_JIS, Big5, KOI8-R, KOI8,<br>
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
</p>
</dl>
<p>
<dl>
<font color="orange">
<dt><b>What's new</b>
</font>
<ul>
<li>Character types(\w, \s, \d and POSIX bracket) were supported in full code point range with the Version 4.0.1 of the <a href="http://www.unicode.org/ucd/">Unicode Standard</a>. (since Version 3.5.0)
</ul>
</dl>
<hr>
<dl>
<dt>There are two ways of using of it in this program.
<ul>
<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br>
In Ruby 1.9, Oniguruma is already incorporated by Kazuo Saito.
</ul>
</dl>
<dl>
<dt><b>Platform:</b>
<ul>
<li> Unix (include Mac OS X)
<li> Cygwin
<li> Win32
</ul>
<br>
<dt><b>License:</b><br>
When this software is partly used or it is distributed with Ruby,
this of Ruby follows the license of Ruby.<br>
It follows the BSD license in the case of the one except for it.
</p>
<dt><b>Download:</b>
<ul>
<li> <a href="archive/onigd20050219.tar.gz">Latest release version 3.7.0</a> (2005/02/19) <a href="HISTORY_3X.txt">Change Log</a>
<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04)
<li> <a href="archive/onigd20050119.tar.gz">3.5.4</a> (2005/01/19)
<li> <a href="archive/onigd2_4_1.tar.gz">Latest release version 2.4.1</a> (2005/01/05) <a href="HISTORY_2X.txt">Change Log</a>
<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01)
<li> <a href="archive/onigd2_3_3.tar.gz">2.3.3</a> (2004/10/30)
</ul>
<br>
<font color="red">
* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
</font>
<br>
<br>
<dt><b>Documents:</b> (version 3.7.0)
<ul>
<li> <a href="doc/RE.txt">Regular Expressions</a>
<a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
<li> <a href="doc/API.txt">Oniguruma API</a>
<a href="doc/API.ja.txt">(Japanese: EUC-JP)</a>
</ul>
<br>
<dt><b>Sample Programs:</b>
<ul>
<li><a href="sample/simple.c">example of the minimum</a>
<li><a href="sample/sql.c">example of the variable syntax and meta character (SQL-like pattern match)</a>
</ul>
<br>
<dt><b>Links:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page)
<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a>
<li> <a href="http://www.artman21.net/">Jedit X</a>
<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page)
<li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page)
<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
<li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
<li> <a href="http://www.ruby-lang.org/">Ruby</a>
<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page)
<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a>
</ul>
<br>
<dt><b>References:</b>
<ul>
<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
<li> <a href="http://www.pcre.org/">PCRE</a>
<!--
<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page)
<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page)
-->
<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
</ul>
<br>
<!--
<dt><b>ToDo:</b>
<ul>
<li> support character types for all code point range.
</ul>
-->
</dl>
<p>
and I'm thankful to Akinori MUSHA.
</p>
<!--
<hr>
<font color="red">
2004-06-14<br>
To: "Greg A. Woods"<br>
I can't send mail to you. (rejected)<br>
Please set the nmatch argument of regexec() to 1,
and use Oniguruma 3.6.0 or 2.4.1.<br>
The nmatch argument should be array size of a pmatch.<br>
But I don't know whether this problem is related to the crash
that you reported.
</font>
-->
<hr>
</body>
</html>
+213
View File
@@ -0,0 +1,213 @@
/**********************************************************************
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le32(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
*conv++ = '\0';
*conv++ = '\0';
}
}
static void
conv_ext0be(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = '\0';
*conv++ = *s++;
}
}
static void
conv_ext0le(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = *s++;
*conv++ = '\0';
}
}
static void
conv_swap4bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[3];
*conv++ = s[2];
*conv++ = s[1];
*conv++ = s[0];
s += 4;
}
}
static void
conv_swap2bytes(const UChar* s, const UChar* end, UChar* conv)
{
while (s < end) {
*conv++ = s[1];
*conv++ = s[0];
s += 2;
}
}
static int
conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* end,
UChar** conv, UChar** conv_end)
{
int len = end - s;
if (to == ONIG_ENCODING_UTF16_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 2);
conv_ext0be(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_LE) {
swap16:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + len;
conv_swap2bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF16_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 2);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 2);
conv_ext0le(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF16_BE) {
goto swap16;
}
}
if (to == ONIG_ENCODING_UTF32_BE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 4);
conv_ext0be32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_LE) {
swap32:
*conv = (UChar* )xmalloc(len);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + len;
conv_swap4bytes(s, end, *conv);
return 0;
}
}
else if (to == ONIG_ENCODING_UTF32_LE) {
if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
*conv = (UChar* )xmalloc(len * 4);
CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
*conv_end = *conv + (len * 4);
conv_ext0le32(s, end, *conv);
return 0;
}
else if (from == ONIG_ENCODING_UTF32_BE) {
goto swap32;
}
}
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
UChar *cpat, *cpat_end;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
&cpat, &cpat_end);
if (r) return r;
}
else {
cpat = (UChar* )pattern;
cpat_end = (UChar* )pattern_end;
}
r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
ci->syntax);
if (r) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
if (r) {
onig_free(*reg);
*reg = NULL;
}
err:
if (cpat != pattern) xfree(cpat);
return r;
}
extern int
onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
{
int r;
regex_t *new_reg;
r = onig_new_deluxe(&new_reg, pattern, pattern_end, ci, einfo);
if (r) return r;
if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_transfer(reg, new_reg);
}
else {
onig_chain_link_add(reg, new_reg);
}
return 0;
}
+207
View File
@@ -0,0 +1,207 @@
/**********************************************************************
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
OnigSyntaxType OnigSyntaxPosixBasic = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_BRACE_INTERVAL )
, 0
, 0
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
OnigSyntaxType OnigSyntaxPosixExtended = {
( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_LPAREN_SUBEXP |
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
};
OnigSyntaxType OnigSyntaxEmacs = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC |
ONIG_SYN_OP_ESC_BRACE_INTERVAL |
ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF |
ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_DECIMAL_BACKREF |
ONIG_SYN_OP_LINE_ANCHOR | ONIG_SYN_OP_ESC_CONTROL_CHARS )
, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxGrep = {
( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
ONIG_SYN_OP_ESC_VBAR_ALT |
ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
ONIG_SYN_OP_ESC_W_WORD | ONIG_SYN_OP_ESC_B_WORD_BOUND |
ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | ONIG_SYN_OP_DECIMAL_BACKREF )
, 0
, ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxGnuRegex = {
SYN_GNU_REGEX_OP
, 0
, SYN_GNU_REGEX_BV
, ONIG_OPTION_NONE
};
OnigSyntaxType OnigSyntaxJava = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL | ONIG_SYN_OP2_CCLASS_SET_OP |
ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_U_HEX4 |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
, ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
, ONIG_OPTION_SINGLELINE
};
OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
, SYN_GNU_REGEX_BV
, ONIG_OPTION_SINGLELINE
};
extern int
onig_set_default_syntax(OnigSyntaxType* syntax)
{
if (IS_NULL(syntax))
syntax = ONIG_SYNTAX_RUBY;
OnigDefaultSyntax = syntax;
return 0;
}
extern void
onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from)
{
*to = *from;
}
extern void
onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op)
{
syntax->op = op;
}
extern void
onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2)
{
syntax->op2 = op2;
}
extern void
onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior)
{
syntax->behavior = behavior;
}
extern void
onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
{
syntax->options = options;
}
extern unsigned int
onig_get_syntax_op(OnigSyntaxType* syntax)
{
return syntax->op;
}
extern unsigned int
onig_get_syntax_op2(OnigSyntaxType* syntax)
{
return syntax->op2;
}
extern unsigned int
onig_get_syntax_behavior(OnigSyntaxType* syntax)
{
return syntax->behavior;
}
extern OnigOptionType
onig_get_syntax_options(OnigSyntaxType* syntax)
{
return syntax->options;
}
#ifdef USE_VARIABLE_META_CHARS
extern int onig_set_meta_char(OnigEncoding enc,
unsigned int what, OnigCodePoint code)
{
switch (what) {
case ONIG_META_CHAR_ESCAPE:
enc->meta_char_table.esc = code;
break;
case ONIG_META_CHAR_ANYCHAR:
enc->meta_char_table.anychar = code;
break;
case ONIG_META_CHAR_ANYTIME:
enc->meta_char_table.anytime = code;
break;
case ONIG_META_CHAR_ZERO_OR_ONE_TIME:
enc->meta_char_table.zero_or_one_time = code;
break;
case ONIG_META_CHAR_ONE_OR_MORE_TIME:
enc->meta_char_table.one_or_more_time = code;
break;
case ONIG_META_CHAR_ANYCHAR_ANYTIME:
enc->meta_char_table.anychar_anytime = code;
break;
default:
return ONIGERR_INVALID_ARGUMENT;
break;
}
return 0;
}
#endif /* USE_VARIABLE_META_CHARS */
+76
View File
@@ -0,0 +1,76 @@
/**********************************************************************
regtrav.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "regint.h"
#ifdef USE_CAPTURE_HISTORY
static int
capture_tree_traverse(OnigCaptureTreeNode* node, int at,
int(*callback_func)(int,int,int,int,int,void*),
int level, void* arg)
{
int r, i;
if (node == (OnigCaptureTreeNode* )0)
return 0;
if ((at & ONIG_TRAVERSE_CALLBACK_AT_FIRST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_FIRST, arg);
if (r != 0) return r;
}
for (i = 0; i < node->num_childs; i++) {
r = capture_tree_traverse(node->childs[i], at,
callback_func, level + 1, arg);
if (r != 0) return r;
}
if ((at & ONIG_TRAVERSE_CALLBACK_AT_LAST) != 0) {
r = (*callback_func)(node->group, node->beg, node->end,
level, ONIG_TRAVERSE_CALLBACK_AT_LAST, arg);
if (r != 0) return r;
}
return 0;
}
#endif /* USE_CAPTURE_HISTORY */
extern int
onig_capture_tree_traverse(OnigRegion* region, int at,
int(*callback_func)(int,int,int,int,int,void*), void* arg)
{
#ifdef USE_CAPTURE_HISTORY
return capture_tree_traverse(region->history_root, at,
callback_func, 0, arg);
#else
return ONIG_NO_SUPPORT_CONFIG;
#endif
}
+55
View File
@@ -0,0 +1,55 @@
/**********************************************************************
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "oniguruma.h"
#include <stdio.h>
extern const char*
onig_version(void)
{
static char s[12];
sprintf(s, "%d.%d.%d",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}
extern const char*
onig_copyright(void)
{
static char s[58];
sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
ONIGURUMA_VERSION_MAJOR,
ONIGURUMA_VERSION_MINOR,
ONIGURUMA_VERSION_TEENY);
return s;
}
+717
View File
@@ -0,0 +1,717 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
#include <malloc.h>
#endif
#ifdef NOT_RUBY
#include "regint.h"
#else
#ifdef RUBY_PLATFORM
#define xmalloc ruby_xmalloc
#define xcalloc ruby_xcalloc
#define xrealloc ruby_xrealloc
#define xfree ruby_xfree
void *xmalloc(long);
void *xcalloc(long, long);
void *xrealloc(void *, long);
void xfree(void *);
#endif
#endif
#include "st.h"
typedef struct st_table_entry st_table_entry;
struct st_table_entry {
unsigned int hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
};
#define ST_DEFAULT_MAX_DENSITY 5
#define ST_DEFAULT_INIT_TABLE_SIZE 11
/*
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
* average number of items per bin before increasing the number of
* bins
*
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
* allocated initially
*
*/
static int numcmp(long, long);
static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
st_nothing_key_free,
st_nothing_key_clone
};
/* extern int strcmp(const char *, const char *); */
static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
st_nothing_key_free,
st_nothing_key_clone
};
static int strend_cmp(st_strend_key*, st_strend_key*);
static int strend_hash(st_strend_key*);
static int strend_key_free(st_data_t key);
static st_data_t strend_key_clone(st_data_t x);
static struct st_hash_type type_strend_hash = {
strend_cmp,
strend_hash,
strend_key_free,
strend_key_clone
};
static void rehash(st_table *);
#define alloc(type) (type*)xmalloc((unsigned)sizeof(type))
#define Calloc(n,s) (char*)xcalloc((n),(s))
#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key))
#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
/*
* MINSIZE is the minimum size of a dictionary.
*/
#define MINSIZE 8
/*
Table of prime numbers 2^n+a, 2<=n<=30.
*/
static long primes[] = {
8 + 3,
16 + 3,
32 + 5,
64 + 3,
128 + 3,
256 + 27,
512 + 9,
1024 + 9,
2048 + 5,
4096 + 3,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 29,
262144 + 3,
524288 + 21,
1048576 + 7,
2097152 + 17,
4194304 + 15,
8388608 + 9,
16777216 + 43,
33554432 + 35,
67108864 + 15,
134217728 + 29,
268435456 + 3,
536870912 + 11,
1073741824 + 85,
0
};
static int
new_size(size)
int size;
{
int i;
#if 0
for (i=3; i<31; i++) {
if ((1<<i) > size) return 1<<i;
}
return -1;
#else
int newsize;
for (i = 0, newsize = MINSIZE;
i < (int )(sizeof(primes)/sizeof(primes[0]));
i++, newsize <<= 1)
{
if (newsize > size) return primes[i];
}
/* Ran out of polynomials */
return -1; /* should raise exception */
#endif
}
#ifdef HASH_LOG
static int collision = 0;
static int init_st = 0;
static void
stat_col()
{
FILE *f = fopen("/tmp/col", "w");
fprintf(f, "collision: %d\n", collision);
fclose(f);
}
#endif
st_table*
st_init_table_with_size(type, size)
struct st_hash_type *type;
int size;
{
st_table *tbl;
#ifdef HASH_LOG
if (init_st == 0) {
init_st = 1;
atexit(stat_col);
}
#endif
size = new_size(size); /* round up to prime number */
tbl = alloc(st_table);
tbl->type = type;
tbl->num_entries = 0;
tbl->num_bins = size;
tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
return tbl;
}
st_table*
st_init_table(type)
struct st_hash_type *type;
{
return st_init_table_with_size(type, 0);
}
st_table*
st_init_numtable(void)
{
return st_init_table(&type_numhash);
}
st_table*
st_init_numtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_numhash, size);
}
st_table*
st_init_strtable(void)
{
return st_init_table(&type_strhash);
}
st_table*
st_init_strtable_with_size(size)
int size;
{
return st_init_table_with_size(&type_strhash, size);
}
st_table*
st_init_strend_table_with_size(size)
int size;
{
return st_init_table_with_size(&type_strend_hash, size);
}
void
st_free_table(table)
st_table *table;
{
register st_table_entry *ptr, *next;
int i;
for(i = 0; i < table->num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
table->type->key_free(ptr->key);
free(ptr);
ptr = next;
}
}
free(table->bins);
free(table);
}
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
#ifdef HASH_LOG
#define COLLISION collision++
#else
#define COLLISION
#endif
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
bin_pos = hash_val%(table)->num_bins;\
ptr = (table)->bins[bin_pos];\
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
COLLISION;\
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
ptr = ptr->next;\
}\
ptr = ptr->next;\
}\
} while (0)
int
st_lookup(table, key, value)
st_table *table;
register st_data_t key;
st_data_t *value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
return 0;
}
else {
if (value != 0) *value = ptr->record;
return 1;
}
}
int
st_lookup_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t *value;
{
st_strend_key key;
key.s = (unsigned char* )str_key;
key.end = (unsigned char* )end_key;
return st_lookup(table, (st_data_t )(&key), value);
}
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
rehash(table);\
bin_pos = hash_val % table->num_bins;\
}\
\
entry = alloc(st_table_entry);\
\
entry->hash = hash_val;\
entry->key = key;\
entry->record = value;\
entry->next = table->bins[bin_pos];\
table->bins[bin_pos] = entry;\
table->num_entries++;\
} while (0)
int
st_insert(table, key, value)
register st_table *table;
register st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
register st_table_entry *ptr;
hash_val = do_hash(key, table);
FIND_ENTRY(table, ptr, hash_val, bin_pos);
if (ptr == 0) {
ADD_DIRECT(table, key, value, hash_val, bin_pos);
return 0;
}
else {
ptr->record = value;
return 1;
}
}
int
st_insert_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
return st_insert(table, (st_data_t )key, value);
}
void
st_add_direct(table, key, value)
st_table *table;
st_data_t key;
st_data_t value;
{
unsigned int hash_val, bin_pos;
hash_val = do_hash(key, table);
bin_pos = hash_val % table->num_bins;
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
void
st_add_direct_strend(table, str_key, end_key, value)
st_table *table;
const unsigned char* str_key;
const unsigned char* end_key;
st_data_t value;
{
st_strend_key* key;
key = alloc(st_strend_key);
key->s = (unsigned char* )str_key;
key->end = (unsigned char* )end_key;
st_add_direct(table, (st_data_t )key, value);
}
static void
rehash(table)
register st_table *table;
{
register st_table_entry *ptr, *next, **new_bins;
int i, old_num_bins = table->num_bins, new_num_bins;
unsigned int hash_val;
new_num_bins = new_size(old_num_bins+1);
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
for(i = 0; i < old_num_bins; i++) {
ptr = table->bins[i];
while (ptr != 0) {
next = ptr->next;
hash_val = ptr->hash % new_num_bins;
ptr->next = new_bins[hash_val];
new_bins[hash_val] = ptr;
ptr = next;
}
}
free(table->bins);
table->num_bins = new_num_bins;
table->bins = new_bins;
}
st_table*
st_copy(old_table)
st_table *old_table;
{
st_table *new_table;
st_table_entry *ptr, *entry;
int i, num_bins = old_table->num_bins;
new_table = alloc(st_table);
if (new_table == 0) {
return 0;
}
*new_table = *old_table;
new_table->bins = (st_table_entry**)
Calloc((unsigned)num_bins, sizeof(st_table_entry*));
if (new_table->bins == 0) {
free(new_table);
return 0;
}
for(i = 0; i < num_bins; i++) {
new_table->bins[i] = 0;
ptr = old_table->bins[i];
while (ptr != 0) {
entry = alloc(st_table_entry);
if (entry == 0) {
free(new_table->bins);
free(new_table);
return 0;
}
*entry = *ptr;
entry->key = old_table->type->key_clone(ptr->key);
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
}
}
return new_table;
}
int
st_delete(table, key, value)
register st_table *table;
register st_data_t *key;
st_data_t *value;
{
unsigned int hash_val;
st_table_entry *tmp;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
if (EQUAL(table, *key, ptr->key)) {
table->bins[hash_val] = ptr->next;
table->num_entries--;
if (value != 0) *value = ptr->record;
*key = ptr->key;
free(ptr);
return 1;
}
for(; ptr->next != 0; ptr = ptr->next) {
if (EQUAL(table, ptr->next->key, *key)) {
tmp = ptr->next;
ptr->next = ptr->next->next;
table->num_entries--;
if (value != 0) *value = tmp->record;
*key = tmp->key;
free(tmp);
return 1;
}
}
return 0;
}
int
st_delete_safe(table, key, value, never)
register st_table *table;
register st_data_t *key;
st_data_t *value;
st_data_t never;
{
unsigned int hash_val;
register st_table_entry *ptr;
hash_val = do_hash_bin(*key, table);
ptr = table->bins[hash_val];
if (ptr == 0) {
if (value != 0) *value = 0;
return 0;
}
for(; ptr != 0; ptr = ptr->next) {
if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
table->num_entries--;
*key = ptr->key;
if (value != 0) *value = ptr->record;
ptr->key = ptr->record = never;
return 1;
}
}
return 0;
}
static int
delete_never(key, value, never)
st_data_t key, value, never;
{
if (value == never) return ST_DELETE;
return ST_CONTINUE;
}
void
st_cleanup_safe(table, never)
st_table *table;
st_data_t never;
{
int num_entries = table->num_entries;
st_foreach(table, delete_never, never);
table->num_entries = num_entries;
}
void
st_foreach(table, func, arg)
st_table *table;
int (*func)();
st_data_t arg;
{
st_table_entry *ptr, *last, *tmp;
enum st_retval retval;
int i;
for(i = 0; i < table->num_bins; i++) {
last = 0;
for(ptr = table->bins[i]; ptr != 0;) {
retval = (*func)(ptr->key, ptr->record, arg, 0);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
tmp = 0;
if (i < table->num_bins) {
for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
if (tmp == ptr) break;
}
}
if (!tmp) {
/* call func with error notice */
retval = (*func)(0, 0, arg, 1);
return;
}
/* fall through */
case ST_CONTINUE:
last = ptr;
ptr = ptr->next;
break;
case ST_STOP:
return;
case ST_DELETE:
tmp = ptr;
if (last == 0) {
table->bins[i] = ptr->next;
}
else {
last->next = ptr->next;
}
ptr = ptr->next;
table->type->key_free(tmp->key);
free(tmp);
table->num_entries--;
}
}
}
}
static int
strhash(string)
register const char *string;
{
register int c;
#ifdef HASH_ELFHASH
register unsigned int h = 0, g;
while ((c = *string++) != '\0') {
h = ( h << 4 ) + c;
if ( g = h & 0xF0000000 )
h ^= g >> 24;
h &= ~g;
}
return h;
#elif HASH_PERL
register int val = 0;
while ((c = *string++) != '\0') {
val += c;
val += (val << 10);
val ^= (val >> 6);
}
val += (val << 3);
val ^= (val >> 11);
return val + (val << 15);
#else
register int val = 0;
while ((c = *string++) != '\0') {
val = val*997 + c;
}
return val + (val>>5);
#endif
}
static int
numcmp(x, y)
long x, y;
{
return x != y;
}
static int
numhash(n)
long n;
{
return n;
}
extern int
st_nothing_key_free(st_data_t key) { return 0; }
extern st_data_t
st_nothing_key_clone(st_data_t x) { return x; }
static int strend_cmp(st_strend_key* x, st_strend_key* y)
{
unsigned char *p, *q;
int c;
if ((x->end - x->s) != (y->end - y->s))
return 1;
p = x->s;
q = y->s;
while (p < x->end) {
c = (int )*p - (int )*q;
if (c != 0) return c;
p++; q++;
}
return 0;
}
static int strend_hash(st_strend_key* x)
{
int val;
unsigned char *p;
val = 0;
p = x->s;
while (p < x->end) {
val = val * 997 + (int )*p++;
}
return val + (val >> 5);
}
static int strend_key_free(st_data_t x)
{
xfree((void* )x);
return 0;
}
static st_data_t strend_key_clone(st_data_t x)
{
st_strend_key* new_key;
st_strend_key* key = (st_strend_key* )x;
new_key = alloc(st_strend_key);
*new_key = *key;
return (st_data_t )new_key;
}
+77
View File
@@ -0,0 +1,77 @@
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* @(#) st.h 5.1 89/12/14 */
#ifndef ST_INCLUDED
#define ST_INCLUDED
typedef unsigned long st_data_t;
#define ST_DATA_T_DEFINED
typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
int (*key_free)();
st_data_t (*key_clone)();
};
struct st_table {
struct st_hash_type *type;
int num_bins;
int num_entries;
struct st_table_entry **bins;
};
typedef struct {
unsigned char* s;
unsigned char* end;
} st_strend_key;
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
#ifndef _
# define _(args) args
#endif
#ifndef ANYARGS
# ifdef __cplusplus
# define ANYARGS ...
# else
# define ANYARGS
# endif
#endif
st_table *st_init_table _((struct st_hash_type *));
st_table *st_init_table_with_size _((struct st_hash_type *, int));
st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
st_table *st_init_strend_table_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
extern st_data_t st_nothing_key_clone _((st_data_t key));
extern int st_nothing_key_free _((st_data_t key));
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
#define st_numcmp ST_NUMCMP
#define st_numhash ST_NUMHASH
#endif /* ST_INCLUDED */
+11
View File
@@ -0,0 +1,11 @@
--TEST--
Bug #31911 (mb_decode_mimeheader() is case-sensitive to hex escapes)
--FILE--
<?php
echo mb_decode_mimeheader("Works: =?iso-8859-1?q?=3F=3F=3F?=");
echo "\n";
echo mb_decode_mimeheader("Fails: =?iso-8859-1?q?=3f=3f=3f?=")
?>
--EXPECT--
Works: ???
Fails: ???