1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Update uriparser to version 0.9.9 (#19711)

This commit is contained in:
Máté Kocsis
2025-09-04 23:58:08 +02:00
committed by GitHub
parent a13dc047ac
commit 01ae278c47
30 changed files with 4788 additions and 116 deletions

View File

@@ -17,7 +17,11 @@ URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.
$URIPARSER_DIR/src/UriEscape.c $URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
$URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \
$URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \
$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c"
$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriSetFragment.c \
$URIPARSER_DIR/src/UriSetHostAuto.c $URIPARSER_DIR/src/UriSetHostCommon.c $URIPARSER_DIR/src/UriSetHostIp4.c \
$URIPARSER_DIR/src/UriSetHostIp6.c $URIPARSER_DIR/src/UriSetHostIpFuture.c $URIPARSER_DIR/src/UriSetHostRegName.c \
$URIPARSER_DIR/src/UriSetPath.c $URIPARSER_DIR/src/UriSetPort.c $URIPARSER_DIR/src/UriSetQuery.c \
$URIPARSER_DIR/src/UriSetScheme.c $URIPARSER_DIR/src/UriSetUserInfo.c $URIPARSER_DIR/src/UriShorten.c $URIPARSER_DIR/src/UriVersion.c"
PHP_NEW_EXTENSION(uri, [php_uri.c php_uri_common.c uri_parser_rfc3986.c uri_parser_whatwg.c uri_parser_php_parse_url.c $URIPARSER_SOURCES], [no],,[-I$ext_srcdir/$URIPARSER_DIR/include -DURI_STATIC_BUILD -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1])
PHP_ADD_EXTENSION_DEP(uri, lexbor)

View File

@@ -5,5 +5,8 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri
ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD");
ADD_EXTENSION_DEP('uri', 'lexbor');
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri");
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c \
UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c \
UriSetFragment.c UriSetHostAuto.c UriSetHostCommon.c UriSetHostIp4.c UriSetHostIp6.c UriSetHostIpFuture.c UriSetHostRegName.c \
UriSetPath.c UriSetPort.c UriSetQuery.c UriSetScheme.c UriSetUserInfo.c UriShorten.c UriVersion.c", "uri");
PHP_INSTALL_HEADERS("ext/uri", "php_uri.h php_uri_common.h uri_parser_rfc3986.h uri_parser_whatwg.h uri_parser_php_parse_url.h uriparser/src uriparser/include");

View File

@@ -0,0 +1,36 @@
uriparser - RFC 3986 URI parsing library
Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
3. Neither the name of the copyright holder nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

View File

@@ -48,14 +48,15 @@
/* Version helper macro */
#define URI_ANSI_TO_UNICODE(x) L##x
#define URI_ANSI_TO_UNICODE_HELPER(x) L ## x
#define URI_ANSI_TO_UNICODE(x) URI_ANSI_TO_UNICODE_HELPER(x)
/* Version */
#define URI_VER_MAJOR 0
#define URI_VER_MINOR 9
#define URI_VER_RELEASE 8
#define URI_VER_RELEASE 9
#define URI_VER_SUFFIX_ANSI ""
#define URI_VER_SUFFIX_UNICODE URI_ANSI_TO_UNICODE(URI_VER_SUFFIX_ANSI)
@@ -148,6 +149,17 @@ typedef int UriBool; /**< Boolean type */
/* Error specific to uriTestMemoryManager */
#define URI_ERROR_MEMORY_MANAGER_FAULTY 11 /* [>=0.9.0] The UriMemoryManager given did not pass the test suite */
/* Error specific to uriSetUserInfo */
#define URI_ERROR_SETUSERINFO_HOST_NOT_SET 12 /* [>=0.9.9] The %URI given does not have the host set */
/* Error specific to uriSetPort */
#define URI_ERROR_SETPORT_HOST_NOT_SET 13 /* [>=0.9.9] The %URI given does not have the host set */
/* Error specific to uriSetHost* */
#define URI_ERROR_SETHOST_USERINFO_SET 14 /* [>=0.9.9] The %URI given does have user info set */
#define URI_ERROR_SETHOST_PORT_SET 15 /* [>=0.9.9] The %URI given does have a port set */
#ifndef URI_DOXYGEN
# include <stdio.h> /* For NULL, snprintf */

View File

@@ -88,12 +88,15 @@ extern "C" {
/**
* Converts a IPv4 text representation into four bytes.
* Converts an IPv4 text representation into four bytes.
*
* @param octetOutput Output destination
* @param first First character of IPv4 text to parse
* @param afterLast Position to stop parsing at
* @return Error code or 0 on success
*
* @see uriParseIpSixAddressA
* @see uriParseIpSixAddressMmA
*/
URI_PUBLIC int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput,
const URI_CHAR * first, const URI_CHAR * afterLast);

View File

@@ -0,0 +1,36 @@
uriparser - RFC 3986 URI parsing library
Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
2. Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
3. Neither the name of the copyright holder nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -68,6 +68,10 @@
#include <assert.h>
/*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X");
/*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT(".");
/*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT("..");
@@ -83,6 +87,32 @@ void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) {
int URI_FUNC(FreeUriPath)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
if (uri->pathHead != NULL) {
URI_TYPE(PathSegment) * segWalk = uri->pathHead;
while (segWalk != NULL) {
URI_TYPE(PathSegment) * const next = segWalk->next;
if ((uri->owner == URI_TRUE) && (segWalk->text.first != segWalk->text.afterLast)) {
memory->free(memory, (URI_CHAR *)segWalk->text.first);
}
segWalk->text.first = NULL;
segWalk->text.afterLast = NULL;
segWalk->next = NULL;
memory->free(memory, segWalk);
segWalk = next;
}
uri->pathHead = NULL;
uri->pathTail = NULL;
}
return URI_SUCCESS;
}
/* Compares two text ranges for equal text content */
int URI_FUNC(CompareRange)(
const URI_TYPE(TextRange) * a,
@@ -137,11 +167,11 @@ UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory) {
const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory) {
if (sourceRange->first == NULL) {
destRange->first = NULL;
destRange->afterLast = NULL;
} else if (sourceRange->first == sourceRange->afterLast && useSafe) {
} else if (sourceRange->first == sourceRange->afterLast) {
destRange->first = URI_FUNC(SafeToPointTo);
destRange->afterLast = URI_FUNC(SafeToPointTo);
} else {
@@ -464,14 +494,6 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) {
/* Uppercase recommended in section 2.1. of RFC 3986 *
* https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
return URI_FUNC(HexToLetterEx)(value, URI_TRUE);
}
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
switch (value) {
case 0: return _UT('0');
@@ -498,11 +520,15 @@ URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
/* Checks if a URI has the host component set. */
UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri) {
/* NOTE: .hostData.ipFuture.first is not being checked, *
* because we do check .hostText.first and *
* .hostData.ipFuture.first has to be identical to *
* .hostText.first if set, and hence there is *
* no more information to be gained. */
return (uri != NULL)
&& ((uri->hostText.first != NULL)
|| (uri->hostData.ip4 != NULL)
|| (uri->hostData.ip6 != NULL)
|| (uri->hostData.ipFuture.first != NULL)
);
}
@@ -631,6 +657,130 @@ UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri,
static UriBool URI_FUNC(PrependNewDotSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
{
URI_TYPE(PathSegment) * const segment = memory->malloc(memory, 1 * sizeof(URI_TYPE(PathSegment)));
if (segment == NULL) {
return URI_FALSE; /* i.e. raise malloc error */
}
segment->next = uri->pathHead;
{
URI_TYPE(TextRange) dotRange;
dotRange.first = URI_FUNC(ConstPwd);
dotRange.afterLast = URI_FUNC(ConstPwd) + 1;
if (uri->owner == URI_TRUE) {
if (URI_FUNC(CopyRange)(&(segment->text), &dotRange, memory) == URI_FALSE) {
memory->free(memory, segment);
return URI_FALSE; /* i.e. raise malloc error */
}
} else {
segment->text = dotRange; /* copies all members */
}
}
uri->pathHead = segment;
}
return URI_TRUE;
}
/* When dropping a scheme from a URI without a host and with a colon (":")
* in the first path segment, a consecutive reparse would rightfully
* mis-classify the first path segment as a scheme due to the colon.
* To protect against this case, we prepend an artifical "." segment
* to the path in here; the function is called after the scheme has
* just been dropped.
*
* 0. We start with parsed URI "scheme:path1:/path2/path3".
* 1. We drop the scheme naively and yield "path1:/path2/path3".
* 2. We prepend "." and yield unambiguous "./path1:/path2/path3".
*
* From the view of the RFC 3986 grammar, this is replacing rule path-rootless
* by path-noscheme content.
*
* Returns URI_TRUE for (a) nothing to do or (b) successful changes.
* Returns URI_FALSE to signal out-of-memory.
*/
UriBool URI_FUNC(FixPathNoScheme)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
if ((uri->absolutePath == URI_TRUE)
|| (uri->pathHead == NULL)
|| (uri->scheme.first != NULL)
|| URI_FUNC(HasHost)(uri)) {
return URI_TRUE; /* i.e. nothing to do */
}
/* Check for troublesome first path segment containing a colon */
{
UriBool colonFound = URI_FALSE;
const URI_CHAR * walker = uri->pathHead->text.first;
while (walker < uri->pathHead->text.afterLast) {
if (walker[0] == _UT(':')) {
colonFound = URI_TRUE;
break;
}
walker++;
}
assert((walker == uri->pathHead->text.afterLast) || (colonFound == URI_TRUE));
if (colonFound == URI_FALSE) {
return URI_TRUE; /* i.e. nothing to do */
}
}
/* Insert "." segment in front */
return URI_FUNC(PrependNewDotSegment)(uri, memory);
}
/* When dropping a host from a URI without a scheme, an absolute path
* and and empty first path segment, a consecutive reparse would rightfully
* mis-classify the first path segment as a host marker due to the "//".
* To protect against this case, we prepend an artifical "." segment
* to the path in here; the function is called after the host has
* just been dropped.
*
* 0. We start with parsed URI "//host//path1/path2".
* 1. We drop the host naively and yield "//path1/path2".
* 2. We insert "./" and yield unambiguous "/.//path1/path2".
*
* Returns URI_TRUE for (a) nothing to do or (b) successful changes.
* Returns URI_FALSE to signal out-of-memory.
*/
UriBool URI_FUNC(EnsureThatPathIsNotMistakenForHost)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
if ((URI_FUNC(HasHost)(uri) == URI_TRUE)
|| (uri->absolutePath == URI_FALSE)
|| (uri->pathHead == NULL)
|| (uri->pathHead == uri->pathTail) /* i.e. no second slash */
|| (uri->pathHead->text.first != uri->pathHead->text.afterLast)) {
return URI_TRUE; /* i.e. nothing to do */
}
/* Insert "." segment in front */
return URI_FUNC(PrependNewDotSegment)(uri, memory);
}
void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
/* Fix path if only one empty segment */

View File

@@ -78,6 +78,8 @@ extern const URI_CHAR * const URI_FUNC(ConstParent);
void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri);
int URI_FUNC(FreeUriPath)(URI_TYPE(Uri) * uri, UriMemoryManager * memory);
int URI_FUNC(CompareRange)(
const URI_TYPE(TextRange) * a,
const URI_TYPE(TextRange) * b);
@@ -85,7 +87,7 @@ int URI_FUNC(CompareRange)(
UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory);
UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory);
const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory);
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory);
@@ -93,7 +95,6 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned, UriMemoryManager * memory);
unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig);
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value);
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase);
UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source,
@@ -102,6 +103,8 @@ UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * source, UriMemoryManager * memory);
UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri, UriMemoryManager * memory);
UriBool URI_FUNC(FixPathNoScheme)(URI_TYPE(Uri) * uri, UriMemoryManager * memory);
UriBool URI_FUNC(EnsureThatPathIsNotMistakenForHost)(URI_TYPE(Uri) * uri, UriMemoryManager * memory);
void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory);

View File

@@ -41,7 +41,7 @@
#define PACKAGE_VERSION "0.9.8"
#define PACKAGE_VERSION "@PROJECT_VERSION@"
/*
#define HAVE_WPRINTF*

View File

@@ -103,7 +103,7 @@ static void URI_FUNC(PreventLeakageAfterCopy)(URI_TYPE(Uri) * uri,
int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory) {
unsigned int doneMask = URI_NORMALIZED;
unsigned int revertMask = URI_NORMALIZED;
if (sourceUri == NULL || destUri == NULL) {
return URI_ERROR_NULL;
@@ -113,32 +113,32 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
URI_FUNC(ResetUri)(destUri);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, URI_FALSE, memory) == URI_FALSE) {
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_SCHEME;
revertMask |= URI_NORMALIZE_SCHEME;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->userInfo, &sourceUri->userInfo, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->userInfo, &sourceUri->userInfo, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_USER_INFO;
revertMask |= URI_NORMALIZE_USER_INFO;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostText, &sourceUri->hostText, URI_TRUE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostText, &sourceUri->hostText, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
revertMask |= URI_NORMALIZE_HOST;
if (sourceUri->hostData.ip4 == NULL) {
destUri->hostData.ip4 = NULL;
} else {
destUri->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
if (destUri->hostData.ip4 == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
*(destUri->hostData.ip4) = *(sourceUri->hostData.ip4);
@@ -149,26 +149,26 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
} else {
destUri->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
if (destUri->hostData.ip6 == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
*(destUri->hostData.ip6) = *(sourceUri->hostData.ip6);
}
if (sourceUri->hostData.ipFuture.first != NULL && sourceUri->hostText.first == sourceUri->hostData.ipFuture.first) {
if (sourceUri->hostData.ipFuture.first != NULL) {
destUri->hostData.ipFuture.first = destUri->hostText.first;
destUri->hostData.ipFuture.afterLast = destUri->hostText.afterLast;
} else if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostData.ipFuture, &sourceUri->hostData.ipFuture, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
} else if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostData.ipFuture, &sourceUri->hostData.ipFuture, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->portText, &sourceUri->portText, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->portText, &sourceUri->portText, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_PORT;
revertMask |= URI_NORMALIZE_PORT;
destUri->pathHead = NULL;
destUri->pathTail = NULL;
@@ -180,7 +180,7 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
while (sourceWalker != NULL) {
URI_TYPE(PathSegment) * destWalker = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
if (destWalker == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
@@ -191,11 +191,11 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
if (destUri->pathHead == NULL) {
destUri->pathHead = destWalker;
doneMask |= URI_NORMALIZE_PATH;
revertMask |= URI_NORMALIZE_PATH;
}
if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, URI_TRUE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
@@ -210,15 +210,15 @@ int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
}
}
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_QUERY;
revertMask |= URI_NORMALIZE_QUERY;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->fragment, &sourceUri->fragment, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->fragment, &sourceUri->fragment, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, revertMask, memory);
return URI_ERROR_MALLOC;
}

View File

@@ -229,9 +229,12 @@ URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst,
/* Percent encode */
{
const unsigned char code = (unsigned char)read[0];
/* Uppercase recommended in (last sentence of) section 2.1 *
* of RFC 3986: *
* https://datatracker.ietf.org/doc/html/rfc3986#section-2.1 */
write[0] = _UT('%');
write[1] = URI_FUNC(HexToLetter)(code >> 4);
write[2] = URI_FUNC(HexToLetter)(code & 0x0f);
write[1] = URI_FUNC(HexToLetterEx)(code >> 4, URI_TRUE);
write[2] = URI_FUNC(HexToLetterEx)(code & 0x0f, URI_TRUE);
write += 3;
}
prevWasCr = URI_FALSE;

View File

@@ -83,11 +83,11 @@
static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask,
unsigned int * outMask, UriMemoryManager * memory);
static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * revertMask,
unsigned int maskTest, URI_TYPE(TextRange) * range,
UriMemoryManager * memory);
static UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri,
unsigned int * doneMask, UriMemoryManager * memory);
unsigned int * revertMask, UriMemoryManager * memory);
static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first,
const URI_CHAR ** afterLast);
@@ -329,10 +329,10 @@ static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)(
write++;
} else {
/* 6.2.2.1 Case Normalization: *
* lowercase percent-encodings */
* uppercase percent-encodings */
write[0] = _UT('%');
write[1] = URI_FUNC(HexToLetter)(left);
write[2] = URI_FUNC(HexToLetter)(right);
write[1] = URI_FUNC(HexToLetterEx)(left, URI_TRUE);
write[2] = URI_FUNC(HexToLetterEx)(right, URI_TRUE);
write += 3;
}
@@ -397,17 +397,17 @@ static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** f
static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * revertMask,
unsigned int maskTest, URI_TYPE(TextRange) * range,
UriMemoryManager * memory) {
if (((*doneMask & maskTest) == 0)
if (((*revertMask & maskTest) == 0)
&& (range->first != NULL)
&& (range->afterLast != NULL)
&& (range->afterLast > range->first)) {
if (URI_FUNC(CopyRange)(range, range, memory) == URI_FALSE) {
return URI_FALSE;
}
*doneMask |= maskTest;
*revertMask |= maskTest;
}
return URI_TRUE;
}
@@ -415,24 +415,24 @@ static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
static URI_INLINE UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri,
unsigned int * doneMask, UriMemoryManager * memory) {
unsigned int * revertMask, UriMemoryManager * memory) {
URI_TYPE(PathSegment) * walker = uri->pathHead;
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME,
if (!URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_SCHEME,
&(uri->scheme), memory)
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO,
|| !URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_USER_INFO,
&(uri->userInfo), memory)
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY,
|| !URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_QUERY,
&(uri->query), memory)
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT,
|| !URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_FRAGMENT,
&(uri->fragment), memory)) {
return URI_FALSE; /* Raises malloc error */
}
/* Host */
if ((*doneMask & URI_NORMALIZE_HOST) == 0) {
if ((*revertMask & URI_NORMALIZE_HOST) == 0) {
if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
if (!URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_HOST,
&(uri->hostData.ipFuture), memory)) {
return URI_FALSE; /* Raises malloc error */
}
@@ -440,7 +440,7 @@ static URI_INLINE UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri,
uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
} else if (uri->hostText.first != NULL) {
/* Regname */
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
if (!URI_FUNC(MakeRangeOwner)(revertMask, URI_NORMALIZE_HOST,
&(uri->hostText), memory)) {
return URI_FALSE; /* Raises malloc error */
}
@@ -448,9 +448,9 @@ static URI_INLINE UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri,
}
/* Path */
if ((*doneMask & URI_NORMALIZE_PATH) == 0) {
if ((*revertMask & URI_NORMALIZE_PATH) == 0) {
while (walker != NULL) {
if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text), memory)) {
if (!URI_FUNC(MakeRangeOwner)(revertMask, 0, &(walker->text), memory)) {
/* Free allocations done so far and kill path */
/* Kill path to one before walker (if any) */
@@ -479,13 +479,13 @@ static URI_INLINE UriBool URI_FUNC(MakeOwnerEngine)(URI_TYPE(Uri) * uri,
}
walker = walker->next;
}
*doneMask |= URI_NORMALIZE_PATH;
*revertMask |= URI_NORMALIZE_PATH;
}
/* Port text, must come last so we don't have to undo that one if it fails. *
* Otherwise we would need and extra enum flag for it although the port *
* cannot go unnormalized... */
if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText), memory)) {
if (!URI_FUNC(MakeRangeOwner)(revertMask, 0, &(uri->portText), memory)) {
return URI_FALSE; /* Raises malloc error */
}
@@ -621,7 +621,7 @@ static void URI_FUNC(AdvancePastLeadingZeros)(
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
unsigned int inMask, unsigned int * outMask,
UriMemoryManager * memory) {
unsigned int doneMask = URI_NORMALIZED;
unsigned int revertMask = URI_NORMALIZED;
/* Not just doing inspection? -> memory manager required! */
if (outMask == NULL) {
@@ -671,10 +671,10 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast);
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_SCHEME;
revertMask |= URI_NORMALIZE_SCHEME;
}
}
@@ -688,10 +688,10 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first),
&(uri->hostData.ipFuture.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
revertMask |= URI_NORMALIZE_HOST;
}
uri->hostText.first = uri->hostData.ipFuture.first;
uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
@@ -706,10 +706,10 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
&(uri->hostText.first),
&(uri->hostText.afterLast),
memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
revertMask |= URI_NORMALIZE_HOST;
}
URI_FUNC(LowercaseInplaceExceptPercentEncoding)(uri->hostText.first,
@@ -753,10 +753,10 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first),
&(uri->userInfo.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_USER_INFO;
revertMask |= URI_NORMALIZE_USER_INFO;
}
}
}
@@ -801,20 +801,20 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
while (walker != NULL) {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first),
&(walker->text.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
walker = walker->next;
}
doneMask |= URI_NORMALIZE_PATH;
revertMask |= URI_NORMALIZE_PATH;
}
/* 6.2.2.3 Path Segment Normalization */
if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative,
(uri->owner == URI_TRUE)
|| ((doneMask & URI_NORMALIZE_PATH) != 0),
|| ((revertMask & URI_NORMALIZE_PATH) != 0),
memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
URI_FUNC(FixEmptyTrailSegment)(uri, memory);
@@ -841,10 +841,10 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first),
&(uri->query.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_QUERY;
revertMask |= URI_NORMALIZE_QUERY;
}
}
@@ -855,18 +855,18 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first),
&(uri->fragment.afterLast), memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_FRAGMENT;
revertMask |= URI_NORMALIZE_FRAGMENT;
}
}
}
/* Dup all not duped yet */
if ((outMask == NULL) && !uri->owner) {
if (!URI_FUNC(MakeOwnerEngine)(uri, &doneMask, memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
if (!URI_FUNC(MakeOwnerEngine)(uri, &revertMask, memory)) {
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}
uri->owner = URI_TRUE;
@@ -878,7 +878,7 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
int URI_FUNC(MakeOwnerMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
unsigned int doneMask = URI_NORMALIZED;
unsigned int revertMask = URI_NORMALIZED;
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
@@ -890,8 +890,8 @@ int URI_FUNC(MakeOwnerMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
return URI_SUCCESS;
}
if (! URI_FUNC(MakeOwnerEngine)(uri, &doneMask, memory)) {
URI_FUNC(PreventLeakage)(uri, doneMask, memory);
if (! URI_FUNC(MakeOwnerEngine)(uri, &revertMask, memory)) {
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
return URI_ERROR_MALLOC;
}

View File

@@ -480,7 +480,7 @@ static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state,
case _UT('v'):
case _UT('V'):
*/
if (first + 1 >= afterLast) {
if (afterLast - first < 2) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL;
}
@@ -755,7 +755,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(
}
/* "::"? */
if (first + 1 >= afterLast) {
if (afterLast - first < 2) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL;
}
@@ -773,7 +773,7 @@ static const URI_CHAR * URI_FUNC(ParseIPv6address2)(
setZipper = 1;
/* ":::+"? */
if (first + 1 >= afterLast) {
if (afterLast - first < 2) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL; /* No ']' yet */
}
@@ -1583,14 +1583,14 @@ static const URI_CHAR * URI_FUNC(ParsePctEncoded)(
switch (*first) {
case _UT('%'):
*/
if (first + 1 >= afterLast) {
if (afterLast - first < 2) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL;
}
switch (first[1]) {
case URI_SET_HEXDIG:
if (first + 2 >= afterLast) {
if (afterLast - first < 3) {
URI_FUNC(StopSyntax)(state, afterLast, memory);
return NULL;
}
@@ -2286,12 +2286,10 @@ int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
/* Host data - IPvFuture (may affect host text) */
if (uri->hostData.ipFuture.first != NULL) {
/* NOTE: .hostData.ipFuture may hold the very same range pointers
* as .hostText; then we need to prevent freeing memory twice. */
if (uri->hostText.first == uri->hostData.ipFuture.first) {
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
}
/* NOTE: .hostData.ipFuture holds the very same range pointers
* as .hostText; we must not free memory twice. */
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
if (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast) {
memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first);
@@ -2332,20 +2330,7 @@ int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
}
/* Path */
if (uri->pathHead != NULL) {
URI_TYPE(PathSegment) * segWalk = uri->pathHead;
while (segWalk != NULL) {
URI_TYPE(PathSegment) * const next = segWalk->next;
if (uri->owner && (segWalk->text.first != NULL)
&& (segWalk->text.first < segWalk->text.afterLast)) {
memory->free(memory, (URI_CHAR *)segWalk->text.first);
}
memory->free(memory, segWalk);
segWalk = next;
}
uri->pathHead = NULL;
uri->pathTail = NULL;
}
URI_FUNC(FreeUriPath)(uri, memory);
if (uri->owner) {
/* Query */

View File

@@ -0,0 +1,306 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetFragment.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetFragment.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
#define URI_SET_SUB_DELIMS \
_UT('!'): \
case _UT('$'): \
case _UT('&'): \
case _UT('\''): \
case _UT('('): \
case _UT(')'): \
case _UT('*'): \
case _UT('+'): \
case _UT(','): \
case _UT(';'): \
case _UT('=')
#define URI_SET_UNRESERVED \
URI_SET_ALPHA: \
case URI_SET_DIGIT: \
case _UT('-'): \
case _UT('.'): \
case _UT('_'): \
case _UT('~')
UriBool URI_FUNC(IsWellFormedFragment)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* The related part of the grammar in RFC 3986 reads:
*
* fragment = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
while (first < afterLast) {
switch (first[0]) {
case URI_SET_UNRESERVED:
break;
/* pct-encoded */
case _UT('%'):
if (afterLast - first < 3) {
return URI_FALSE;
}
switch (first[1]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
switch (first[2]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
first += 2;
break;
case URI_SET_SUB_DELIMS:
break;
/* ":" / "@" and "/" / "?" */
case _UT(':'):
case _UT('@'):
case _UT('/'):
case _UT('?'):
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetFragmentMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if ((first != NULL) && (URI_FUNC(IsWellFormedFragment)(first, afterLast) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
if ((uri->owner == URI_TRUE) && (uri->fragment.first != uri->fragment.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->fragment.first);
}
uri->fragment.first = NULL;
uri->fragment.afterLast = NULL;
/* Already done? */
if (first == NULL) {
return URI_SUCCESS;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->fragment, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetFragment)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetFragmentMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,139 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostAuto.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostAuto.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
int URI_FUNC(SetHostAutoMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if ((first == NULL) || (first >= afterLast)) {
return URI_FUNC(SetHostRegNameMm)(uri, first, afterLast, memory);
}
/* Auto-detect type and then apply */
{
UriHostType hostType;
/* IPv6 or IPvFuture? */
if (first[0] == _UT('[')) {
if ((afterLast - first < 2) || (afterLast[-1] != _UT(']'))) {
return URI_ERROR_SYNTAX;
}
/* Drop the bracket wrap (for InternalSetHostMm call below) */
first++;
afterLast--;
if (first >= afterLast) {
return URI_ERROR_SYNTAX;
}
switch (first[0]) {
case _UT('v'):
case _UT('V'):
hostType = URI_HOST_TYPE_IPFUTURE;
break;
default:
hostType = URI_HOST_TYPE_IP6;
break;
}
/* IPv4? */
} else if (URI_FUNC(IsWellFormedHostIp4)(first, afterLast)) {
hostType = URI_HOST_TYPE_IP4;
} else {
/* RegName! */
hostType = URI_HOST_TYPE_REGNAME;
}
return URI_FUNC(InternalSetHostMm)(uri, hostType, first, afterLast, memory);
}
}
int URI_FUNC(SetHostAuto)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetHostAutoMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,53 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_SET_HOST_BASE_H
#define URI_SET_HOST_BASE_H 1
typedef enum UriHostTypeEnum {
URI_HOST_TYPE_IP4,
URI_HOST_TYPE_IP6,
URI_HOST_TYPE_IPFUTURE,
URI_HOST_TYPE_REGNAME
} UriHostType;
#endif /* URI_SET_HOST_BASE_H */

View File

@@ -0,0 +1,266 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriSetHostCommon.c
* Holds code used by multiple SetHost* functions.
* NOTE: This source file includes itself twice.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostCommon.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostCommon.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include <uriparser/UriIp4.h>
# include "UriCommon.h"
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
#endif
#include <assert.h>
int URI_FUNC(InternalSetHostMm)(URI_TYPE(Uri) * uri,
UriHostType hostType,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Superficial input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
/* The RFC 3986 grammar reads:
* authority = [ userinfo "@" ] host [ ":" port ]
* So no user info or port without a host. */
if (first == NULL) {
if (uri->userInfo.first != NULL) {
return URI_ERROR_SETHOST_USERINFO_SET;
} else if (uri->portText.first != NULL) {
return URI_ERROR_SETHOST_PORT_SET;
}
}
/* Syntax-check the new value */
if (first != NULL) {
switch (hostType) {
case URI_HOST_TYPE_IP4:
if (URI_FUNC(IsWellFormedHostIp4)(first, afterLast) == URI_FALSE) {
return URI_ERROR_SYNTAX;
}
break;
case URI_HOST_TYPE_IP6:
{
const int res = URI_FUNC(IsWellFormedHostIp6Mm)(first, afterLast, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
if (res != URI_SUCCESS) {
return res;
}
}
break;
case URI_HOST_TYPE_IPFUTURE:
{
const int res = URI_FUNC(IsWellFormedHostIpFutureMm)(first, afterLast, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
if (res != URI_SUCCESS) {
return res;
}
}
break;
case URI_HOST_TYPE_REGNAME:
if (URI_FUNC(IsWellFormedHostRegName)(first, afterLast) == URI_FALSE) {
return URI_ERROR_SYNTAX;
}
break;
default:
assert(0 && "Unsupported URI host type");
}
}
{
/* Clear old value */
const UriBool hadHostBefore = URI_FUNC(HasHost)(uri);
if (uri->hostData.ipFuture.first != NULL) {
/* NOTE: .hostData.ipFuture holds the very same range pointers
* as .hostText; we must not free memory twice. */
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
if ((uri->owner == URI_TRUE) && (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first);
}
uri->hostData.ipFuture.first = NULL;
uri->hostData.ipFuture.afterLast = NULL;
} else if (uri->hostText.first != NULL) {
if ((uri->owner == URI_TRUE) && (uri->hostText.first != uri->hostText.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->hostText.first);
}
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
}
if (uri->hostData.ip4 != NULL) {
memory->free(memory, uri->hostData.ip4);
uri->hostData.ip4 = NULL;
} else if (uri->hostData.ip6 != NULL) {
memory->free(memory, uri->hostData.ip6);
uri->hostData.ip6 = NULL;
}
/* Already done setting? */
if (first == NULL) {
/* Yes, but disambiguate as needed */
if (hadHostBefore == URI_TRUE) {
uri->absolutePath = URI_TRUE;
{
const UriBool success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(uri, memory);
return (success == URI_TRUE)
? URI_SUCCESS
: URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value; NOTE that .hostText is set for all four host types */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->hostText, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
uri->absolutePath = URI_FALSE; /* always URI_FALSE for URIs with host */
/* Fill .hostData as needed */
switch (hostType) {
case URI_HOST_TYPE_IP4:
{
uri->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
if (uri->hostData.ip4 == NULL) {
return URI_ERROR_MALLOC;
}
{
const int res = URI_FUNC(ParseIpFourAddress)(uri->hostData.ip4->data, first, afterLast);
#if defined(NDEBUG)
(void)res; /* i.e. mark as unused */
#else
assert(res == URI_SUCCESS); /* because checked for well-formedness earlier */
#endif
}
}
break;
case URI_HOST_TYPE_IP6:
{
uri->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
if (uri->hostData.ip6 == NULL) {
return URI_ERROR_MALLOC;
}
{
const int res = URI_FUNC(ParseIpSixAddressMm)(uri->hostData.ip6, first, afterLast, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC)); /* because checked for well-formedness earlier */
if (res != URI_SUCCESS) {
return res;
}
}
}
break;
case URI_HOST_TYPE_IPFUTURE:
uri->hostData.ipFuture.first = uri->hostText.first;
uri->hostData.ipFuture.afterLast = uri->hostText.afterLast;
break;
case URI_HOST_TYPE_REGNAME:
break;
default:
assert(0 && "Unsupported URI host type");
}
}
return URI_SUCCESS;
}
#endif

View File

@@ -0,0 +1,79 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_SET_HOST_COMMON_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_SET_HOST_COMMON_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostCommon.h"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostCommon.h"
# undef URI_PASS_UNICODE
# endif
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_SET_HOST_COMMON_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_SET_HOST_COMMON_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_SET_HOST_COMMON_H_ANSI 1
# include <uriparser/UriDefsAnsi.h>
# else
# define URI_SET_HOST_COMMON_H_UNICODE 1
# include <uriparser/UriDefsUnicode.h>
# endif
int URI_FUNC(InternalSetHostMm)(URI_TYPE(Uri) * uri,
UriHostType hostType,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory);
#endif
#endif

View File

@@ -0,0 +1,105 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostIp4.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostIp4.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include <uriparser/UriIp4.h>
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
#endif
UriBool URI_FUNC(IsWellFormedHostIp4)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
{
unsigned char octetOutput[4];
return (URI_FUNC(ParseIpFourAddress)(octetOutput, first, afterLast) == URI_SUCCESS)
? URI_TRUE
: URI_FALSE;
}
}
int URI_FUNC(SetHostIp4Mm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
return URI_FUNC(InternalSetHostMm)(uri, URI_HOST_TYPE_IP4, first, afterLast, memory);
}
int URI_FUNC(SetHostIp4)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetHostIp4Mm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,182 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostIp6.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostIp6.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
#endif
#include <assert.h>
#include <string.h> /* for memcpy */
#define URI_MAX_IP6_LEN (8 * 4 + 7 * 1) /* i.e. 8 full quads plus 7 colon separators */
int URI_FUNC(ParseIpSixAddressMm)(UriIp6 * output,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* NOTE: output is allowed to be NULL */
if ((first == NULL) || (afterLast == NULL)) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
/* Are we dealing with potential IPvFuture input? */
if (first < afterLast) {
switch (first[0]) {
case _UT('v'):
case _UT('V'):
return URI_ERROR_SYNTAX;
default:
break;
}
}
/* Are we dealing with IPv6 input? */
{
/* Assemble "//[..]" input wrap for upcoming parse as a URI
* NOTE: If the input contains closing "]" on its own, the resulting
* string will not be valid URI syntax, and hence there is
* no risk of false positives from "bracket injection". */
URI_CHAR candidate[3 + URI_MAX_IP6_LEN + 1 + 1] = _UT("//[");
const size_t inputLenChars = (afterLast - first);
/* Detect overflow */
if (inputLenChars > URI_MAX_IP6_LEN) {
return URI_ERROR_SYNTAX;
}
memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
memcpy(candidate + 3 + inputLenChars, _UT("]"), 2 * sizeof(URI_CHAR)); /* includes zero terminator */
/* Parse as an RFC 3986 URI */
{
const size_t candidateLenChars = 3 + inputLenChars + 1;
URI_TYPE(Uri) uri;
const int res = URI_FUNC(ParseSingleUriExMm)(&uri, candidate, candidate + candidateLenChars, NULL, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
if (res == URI_SUCCESS) {
assert(uri.hostData.ip6 != NULL);
if (output != NULL) {
memcpy(output->data, uri.hostData.ip6->data, sizeof(output->data));
}
URI_FUNC(FreeUriMembersMm)(&uri, memory);
}
return res;
}
}
}
int URI_FUNC(ParseIpSixAddress)(UriIp6 * output,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(ParseIpSixAddressMm)(output, first, afterLast, NULL);
}
int URI_FUNC(IsWellFormedHostIp6Mm)(const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory) {
return URI_FUNC(ParseIpSixAddressMm)(NULL, first, afterLast, memory);
}
int URI_FUNC(IsWellFormedHostIp6)(const URI_CHAR * first, const URI_CHAR * afterLast) {
return URI_FUNC(IsWellFormedHostIp6Mm)(first, afterLast, NULL);
}
int URI_FUNC(SetHostIp6Mm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
return URI_FUNC(InternalSetHostMm)(uri, URI_HOST_TYPE_IP6, first, afterLast, memory);
}
int URI_FUNC(SetHostIp6)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetHostIp6Mm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,174 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostIpFuture.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostIpFuture.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
#endif
#include <assert.h>
#include <string.h> /* for memcpy */
int URI_FUNC(IsWellFormedHostIpFutureMm)(const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
/* Are we dealing with potential IPv6 input? */
if (first < afterLast) {
switch (first[0]) {
case _UT('v'):
case _UT('V'):
break;
default:
return URI_ERROR_SYNTAX;
}
}
/* Are we dealing with IPvFuture input? */
{
/* Assemble "//[..]" input wrap for upcoming parse as a URI
* NOTE: If the input contains closing "]" on its own, the resulting
* string will not be valid URI syntax, and hence there is
* no risk of false positives from "bracket injection". */
const size_t inputLenChars = (afterLast - first);
const size_t MAX_SIZE_T = (size_t)-1;
/* Detect overflow */
if (MAX_SIZE_T - inputLenChars < 3 + 1 + 1) {
return URI_ERROR_MALLOC;
}
{
const size_t candidateLenChars = 3 + inputLenChars + 1;
/* Detect overflow */
if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) {
return URI_ERROR_MALLOC;
}
{
URI_CHAR * const candidate = memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR));
if (candidate == NULL) {
return URI_ERROR_MALLOC;
}
memcpy(candidate, _UT("//["), 3 * sizeof(URI_CHAR));
memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
memcpy(candidate + 3 + inputLenChars, _UT("]"), 2 * sizeof(URI_CHAR)); /* includes zero terminator */
/* Parse as an RFC 3986 URI */
{
URI_TYPE(Uri) uri;
const int res = URI_FUNC(ParseSingleUriExMm)(&uri, candidate, candidate + candidateLenChars, NULL, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
if (res == URI_SUCCESS) {
assert(uri.hostData.ipFuture.first != NULL);
URI_FUNC(FreeUriMembersMm)(&uri, memory);
}
memory->free(memory, candidate);
return res;
}
}
}
}
}
int URI_FUNC(IsWellFormedHostIpFuture)(const URI_CHAR * first, const URI_CHAR * afterLast) {
return URI_FUNC(IsWellFormedHostIpFutureMm)(first, afterLast, NULL);
}
int URI_FUNC(SetHostIpFutureMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
return URI_FUNC(InternalSetHostMm)(uri, URI_HOST_TYPE_IPFUTURE, first, afterLast, memory);
}
int URI_FUNC(SetHostIpFuture)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetHostIpFutureMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,246 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetHostRegName.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetHostRegName.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriMemory.h"
# include "UriSetHostBase.h"
# include "UriSetHostCommon.h"
#endif
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
#define URI_SET_SUB_DELIMS \
_UT('!'): \
case _UT('$'): \
case _UT('&'): \
case _UT('\''): \
case _UT('('): \
case _UT(')'): \
case _UT('*'): \
case _UT('+'): \
case _UT(','): \
case _UT(';'): \
case _UT('=')
#define URI_SET_UNRESERVED \
URI_SET_ALPHA: \
case URI_SET_DIGIT: \
case _UT('-'): \
case _UT('.'): \
case _UT('_'): \
case _UT('~')
UriBool URI_FUNC(IsWellFormedHostRegName)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* reg-name = *( unreserved / pct-encoded / sub-delims ) */
while (first < afterLast) {
switch (first[0]) {
case URI_SET_UNRESERVED:
break;
/* pct-encoded */
case _UT('%'):
if (afterLast - first < 3) {
return URI_FALSE;
}
switch (first[1]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
switch (first[2]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
first += 2;
break;
case URI_SET_SUB_DELIMS:
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetHostRegNameMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
return URI_FUNC(InternalSetHostMm)(uri, URI_HOST_TYPE_REGNAME, first, afterLast, memory);
}
int URI_FUNC(SetHostRegName)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetHostRegNameMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,495 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetPath.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetPath.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
#define URI_SET_SUB_DELIMS \
_UT('!'): \
case _UT('$'): \
case _UT('&'): \
case _UT('\''): \
case _UT('('): \
case _UT(')'): \
case _UT('*'): \
case _UT('+'): \
case _UT(','): \
case _UT(';'): \
case _UT('=')
#define URI_SET_UNRESERVED \
URI_SET_ALPHA: \
case URI_SET_DIGIT: \
case _UT('-'): \
case _UT('.'): \
case _UT('_'): \
case _UT('~')
UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast, UriBool hasHost) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) {
return URI_FALSE;
}
/* The related part of the grammar in RFC 3986 (section 3.3) reads:
*
* path = path-abempty ; begins with "/" or is empty
* / path-absolute ; begins with "/" but not "//"
* / path-noscheme ; begins with a non-colon segment
* / path-rootless ; begins with a segment
* / path-empty ; zero characters
*
* path-abempty = *( "/" segment )
* path-absolute = "/" [ segment-nz *( "/" segment ) ]
* path-noscheme = segment-nz-nc *( "/" segment )
* path-rootless = segment-nz *( "/" segment )
* path-empty = 0<pchar>
*
* segment = *pchar
* segment-nz = 1*pchar
* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
* ; non-zero-length segment without any colon ":"
*
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*
* The check below simplifies this to ..
*
* path = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" )
*
* .. and leaves the rest to pre-return removal of ambiguity
* from cases like "path1:/path2" and "//path1/path2" inside SetPath.
*/
while (first < afterLast) {
switch (first[0]) {
case URI_SET_UNRESERVED:
break;
/* pct-encoded */
case _UT('%'):
if (afterLast - first < 3) {
return URI_FALSE;
}
switch (first[1]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
switch (first[2]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
first += 2;
break;
case URI_SET_SUB_DELIMS:
break;
/* ":" / "@" and "/" */
case _UT(':'):
case _UT('@'):
case _UT('/'):
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
assert(uri->pathHead != NULL);
assert(uri->pathHead->text.first == uri->pathHead->text.afterLast);
{
URI_TYPE(PathSegment) * const originalHead = uri->pathHead;
uri->pathHead = uri->pathHead->next;
originalHead->text.first = NULL;
originalHead->text.afterLast = NULL;
memory->free(memory, originalHead);
}
}
/* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE.
* This function checks for a leading empty path segment (that would have the "visual effect"
* of a leading slash during stringification) and transforms it into .absolutePath == URI_TRUE
* instead, if present. */
static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
assert(uri != NULL);
assert(memory != NULL);
if ((URI_FUNC(HasHost)(uri) == URI_TRUE)
|| (uri->pathHead == NULL)
|| (uri->pathHead->text.first != uri->pathHead->text.afterLast)) {
return; /* i.e. nothing to do */
}
assert(uri->absolutePath == URI_FALSE);
URI_FUNC(DropEmptyFirstPathSegment)(uri, memory);
uri->absolutePath = URI_TRUE;
}
static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
assert(destUri != NULL);
assert(first != NULL);
assert(afterLast != NULL);
assert(memory != NULL);
assert(destUri->pathHead == NULL); /* set by SetPathMm right before */
assert(destUri->pathTail == NULL); /* set by SetPathMm right before */
assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */
/* Skip the leading slash from target URIs with a host (so that we can
* transfer the path 1:1 further down) */
if (URI_FUNC(HasHost)(destUri) == URI_TRUE) {
/* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */
assert((afterLast - first >= 1) && (first[0] == _UT('/')));
first++;
} else if (first == afterLast) {
/* This avoids (1) all the expensive but unnecessary work below
* and also (2) mis-encoding as single empty path segment
* that would need (detection and) repair further down otherwise */
return URI_SUCCESS;
}
/* Assemble "///.." input wrap for upcoming parse as a URI */
{
const size_t inputLenChars = (afterLast - first);
const size_t MAX_SIZE_T = (size_t)-1;
/* Detect overflow */
if (MAX_SIZE_T - inputLenChars < 3 + 1) {
return URI_ERROR_MALLOC;
}
{
const size_t candidateLenChars = 3 + inputLenChars;
/* Detect overflow */
if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) {
return URI_ERROR_MALLOC;
}
{
URI_CHAR * const candidate = memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR));
if (candidate == NULL) {
return URI_ERROR_MALLOC;
}
memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR));
memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
candidate[3 + inputLenChars] = _UT('\0');
/* Parse as an RFC 3986 URI */
{
URI_TYPE(Uri) tempUri;
const int res = URI_FUNC(ParseSingleUriExMm)(&tempUri,
candidate,
candidate + candidateLenChars,
NULL,
memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
if (res != URI_SUCCESS) {
memory->free(memory, candidate);
return res;
}
/* Nothing but path and host is supposed to be set by the parse, in particular not: */
assert(tempUri.query.first == NULL);
assert(tempUri.fragment.first == NULL);
/* Ensure that the strings in the path segments are all owned by `tempUri`
* because we want to (1) rip out and keep the full path list further down
* and (2) be able to free the parsed string (`candidate`) also. */
{
const int res = URI_FUNC(MakeOwnerMm)(&tempUri, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC));
if (res != URI_SUCCESS) {
URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
memory->free(memory, candidate);
return res;
}
assert(tempUri.owner == URI_TRUE);
}
/* Move path to destination URI */
assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */
destUri->pathHead = tempUri.pathHead;
destUri->pathTail = tempUri.pathTail;
destUri->absolutePath = URI_FALSE;
tempUri.pathHead = NULL;
tempUri.pathTail = NULL;
/* Free the rest of the temp URI */
URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
memory->free(memory, candidate);
/* Restore use of .absolutePath as needed */
URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory);
/* Disambiguate as needed */
{
const UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory);
if (success == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
{
const UriBool success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory);
if (success == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
}
}
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if ((first != NULL) && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri)) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
{
const int res = URI_FUNC(FreeUriPath)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
uri->absolutePath = URI_FALSE;
}
/* Already done? */
if (first == NULL) {
return URI_SUCCESS;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
const int res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory);
assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX) || (res == URI_ERROR_MALLOC));
return res;
}
}
int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,179 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetPort.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetPort.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
UriBool URI_FUNC(IsWellFormedPort)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* NOTE: Grammar reads "port = *DIGIT" which includes the empty string. */
while (first < afterLast) {
switch (first[0]) {
case URI_SET_DIGIT:
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetPortTextMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
/* The RFC 3986 grammar reads:
* authority = [ userinfo "@" ] host [ ":" port ]
* So no port without a host. */
if ((first != NULL) && (URI_FUNC(HasHost)(uri) == URI_FALSE)) {
return URI_ERROR_SETPORT_HOST_NOT_SET;
}
if ((first != NULL) && (URI_FUNC(IsWellFormedPort)(first, afterLast) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
if ((uri->owner == URI_TRUE) && (uri->portText.first != uri->portText.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->portText.first);
}
uri->portText.first = NULL;
uri->portText.afterLast = NULL;
/* Already done? */
if (first == NULL) {
return URI_SUCCESS;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->portText, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetPortText)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetPortTextMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,306 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetQuery.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetQuery.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
#define URI_SET_SUB_DELIMS \
_UT('!'): \
case _UT('$'): \
case _UT('&'): \
case _UT('\''): \
case _UT('('): \
case _UT(')'): \
case _UT('*'): \
case _UT('+'): \
case _UT(','): \
case _UT(';'): \
case _UT('=')
#define URI_SET_UNRESERVED \
URI_SET_ALPHA: \
case URI_SET_DIGIT: \
case _UT('-'): \
case _UT('.'): \
case _UT('_'): \
case _UT('~')
UriBool URI_FUNC(IsWellFormedQuery)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* The related part of the grammar in RFC 3986 reads:
*
* query = *( pchar / "/" / "?" )
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
*/
while (first < afterLast) {
switch (first[0]) {
case URI_SET_UNRESERVED:
break;
/* pct-encoded */
case _UT('%'):
if (afterLast - first < 3) {
return URI_FALSE;
}
switch (first[1]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
switch (first[2]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
first += 2;
break;
case URI_SET_SUB_DELIMS:
break;
/* ":" / "@" and "/" / "?" */
case _UT(':'):
case _UT('@'):
case _UT('/'):
case _UT('?'):
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetQueryMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if ((first != NULL) && (URI_FUNC(IsWellFormedQuery)(first, afterLast) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
if ((uri->owner == URI_TRUE) && (uri->query.first != uri->query.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->query.first);
}
uri->query.first = NULL;
uri->query.afterLast = NULL;
/* Already done? */
if (first == NULL) {
return URI_SUCCESS;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->query, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetQuery)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetQueryMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,272 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetScheme.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetScheme.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
UriBool URI_FUNC(IsWellFormedScheme)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* The related part of the grammar in RFC 3986 reads:
*
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*/
if (first >= afterLast) {
return URI_FALSE;
}
switch (first[0]) {
case URI_SET_ALPHA:
break;
default:
return URI_FALSE;
}
first++;
while (first < afterLast) {
switch (first[0]) {
case URI_SET_ALPHA:
case URI_SET_DIGIT:
case _UT('+'):
case _UT('-'):
case _UT('.'):
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetSchemeMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if ((first != NULL) && (URI_FUNC(IsWellFormedScheme)(first, afterLast) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
if ((uri->owner == URI_TRUE) && (uri->scheme.first != uri->scheme.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->scheme.first);
}
uri->scheme.first = NULL;
uri->scheme.afterLast = NULL;
/* Already done setting? */
if (first == NULL) {
/* Yes, but disambiguate as needed */
const UriBool success = URI_FUNC(FixPathNoScheme)(uri, memory);
return (success == URI_TRUE)
? URI_SUCCESS
: URI_ERROR_MALLOC;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->scheme, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetScheme)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetSchemeMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,306 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriSetUserInfo.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriSetUserInfo.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
#endif
#include <assert.h>
#define URI_SET_DIGIT \
_UT('0'): \
case _UT('1'): \
case _UT('2'): \
case _UT('3'): \
case _UT('4'): \
case _UT('5'): \
case _UT('6'): \
case _UT('7'): \
case _UT('8'): \
case _UT('9')
#define URI_SET_HEX_LETTER_UPPER \
_UT('A'): \
case _UT('B'): \
case _UT('C'): \
case _UT('D'): \
case _UT('E'): \
case _UT('F')
#define URI_SET_HEX_LETTER_LOWER \
_UT('a'): \
case _UT('b'): \
case _UT('c'): \
case _UT('d'): \
case _UT('e'): \
case _UT('f')
#define URI_SET_HEXDIG \
URI_SET_DIGIT: \
case URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER
#define URI_SET_ALPHA \
URI_SET_HEX_LETTER_UPPER: \
case URI_SET_HEX_LETTER_LOWER: \
case _UT('g'): \
case _UT('G'): \
case _UT('h'): \
case _UT('H'): \
case _UT('i'): \
case _UT('I'): \
case _UT('j'): \
case _UT('J'): \
case _UT('k'): \
case _UT('K'): \
case _UT('l'): \
case _UT('L'): \
case _UT('m'): \
case _UT('M'): \
case _UT('n'): \
case _UT('N'): \
case _UT('o'): \
case _UT('O'): \
case _UT('p'): \
case _UT('P'): \
case _UT('q'): \
case _UT('Q'): \
case _UT('r'): \
case _UT('R'): \
case _UT('s'): \
case _UT('S'): \
case _UT('t'): \
case _UT('T'): \
case _UT('u'): \
case _UT('U'): \
case _UT('v'): \
case _UT('V'): \
case _UT('w'): \
case _UT('W'): \
case _UT('x'): \
case _UT('X'): \
case _UT('y'): \
case _UT('Y'): \
case _UT('z'): \
case _UT('Z')
#define URI_SET_SUB_DELIMS \
_UT('!'): \
case _UT('$'): \
case _UT('&'): \
case _UT('\''): \
case _UT('('): \
case _UT(')'): \
case _UT('*'): \
case _UT('+'): \
case _UT(','): \
case _UT(';'): \
case _UT('=')
#define URI_SET_UNRESERVED \
URI_SET_ALPHA: \
case URI_SET_DIGIT: \
case _UT('-'): \
case _UT('.'): \
case _UT('_'): \
case _UT('~')
UriBool URI_FUNC(IsWellFormedUserInfo)(const URI_CHAR * first, const URI_CHAR * afterLast) {
if ((first == NULL) || (afterLast == NULL)) {
return URI_FALSE;
}
/* userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) */
while (first < afterLast) {
switch (first[0]) {
case URI_SET_UNRESERVED:
break;
/* pct-encoded */
case _UT('%'):
if (afterLast - first < 3) {
return URI_FALSE;
}
switch (first[1]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
switch (first[2]) {
case URI_SET_HEXDIG:
break;
default:
return URI_FALSE;
}
first += 2;
break;
case URI_SET_SUB_DELIMS:
break;
/* ":" */
case _UT(':'):
break;
default:
return URI_FALSE;
}
first++;
}
return URI_TRUE;
}
int URI_FUNC(SetUserInfoMm)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast,
UriMemoryManager * memory) {
/* Input validation (before making any changes) */
if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
/* The RFC 3986 grammar reads:
* authority = [ userinfo "@" ] host [ ":" port ]
* So no user info without a host. */
if ((first != NULL) && (URI_FUNC(HasHost)(uri) == URI_FALSE)) {
return URI_ERROR_SETUSERINFO_HOST_NOT_SET;
}
if ((first != NULL) && (URI_FUNC(IsWellFormedUserInfo)(first, afterLast) == URI_FALSE)) {
return URI_ERROR_SYNTAX;
}
/* Clear old value */
if ((uri->owner == URI_TRUE) && (uri->userInfo.first != uri->userInfo.afterLast)) {
memory->free(memory, (URI_CHAR *)uri->userInfo.first);
}
uri->userInfo.first = NULL;
uri->userInfo.afterLast = NULL;
/* Already done? */
if (first == NULL) {
return URI_SUCCESS;
}
assert(first != NULL);
/* Ensure owned */
if (uri->owner == URI_FALSE) {
const int res = URI_FUNC(MakeOwnerMm)(uri, memory);
if (res != URI_SUCCESS) {
return res;
}
}
assert(uri->owner == URI_TRUE);
/* Apply new value */
{
URI_TYPE(TextRange) sourceRange;
sourceRange.first = first;
sourceRange.afterLast = afterLast;
if (URI_FUNC(CopyRangeAsNeeded)(&uri->userInfo, &sourceRange, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
}
return URI_SUCCESS;
}
int URI_FUNC(SetUserInfo)(URI_TYPE(Uri) * uri,
const URI_CHAR * first,
const URI_CHAR * afterLast) {
return URI_FUNC(SetUserInfoMm)(uri, first, afterLast, NULL);
}
#endif

View File

@@ -0,0 +1,87 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriVersion.c
* Implements a runtime version getter.
* NOTE: This source file includes itself twice.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriVersion.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriVersion.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
#endif
const URI_CHAR * URI_FUNC(BaseRuntimeVersion)(void) {
#if defined(URI_PASS_ANSI)
return URI_VER_ANSI;
#elif defined(URI_PASS_UNICODE)
return URI_VER_UNICODE;
#else
# error Either URI_PASS_ANSI or URI_PASS_UNICODE must be defined
#endif
}
#endif