diff --git a/ext/uri/config.m4 b/ext/uri/config.m4 index 08dc044d8d2..3631ad3c5c0 100644 --- a/ext/uri/config.m4 +++ b/ext/uri/config.m4 @@ -11,8 +11,8 @@ AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of urip AC_DEFINE([URI_NO_UNICODE], [1], [Define to 1 for disabling unicode support of uriparser.]) URIPARSER_DIR="uriparser" -URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriEscape.c \ -$URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \ +URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriCopy.c \ +$URIPARSER_DIR/src/UriEscape.c $URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \ $URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \ $URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \ $URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c" diff --git a/ext/uri/config.w32 b/ext/uri/config.w32 index 9c6af0cc5fa..8086b4b9bfc 100644 --- a/ext/uri/config.w32 +++ b/ext/uri/config.w32 @@ -5,5 +5,5 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD"); ADD_EXTENSION_DEP('uri', 'lexbor'); -ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriShorten.c", "uri"); +ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri"); PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h uriparser/src uriparser/include"); diff --git a/ext/uri/uriparser/include/uriparser/Uri.h b/ext/uri/uriparser/include/uriparser/Uri.h index 44bc5acc54b..f0f2ad9a34b 100644 --- a/ext/uri/uriparser/include/uriparser/Uri.h +++ b/ext/uri/uriparser/include/uriparser/Uri.h @@ -201,6 +201,17 @@ typedef struct URI_TYPE(QueryListStruct) { } URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */ +/** + * Checks if a URI has the host component set. + * + * @param uri IN: %URI to check + * @return URI_TRUE when host is set, URI_FALSE otherwise + * + * @since 0.9.9 + */ +URI_PUBLIC UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri); + + /** * Parses a RFC 3986 %URI. @@ -644,6 +655,36 @@ URI_PUBLIC int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, +/** + * Copies a %URI structure. + * + * @param destUri OUT: Output destination + * @param sourceUri IN: %URI to copy + * @param memory IN: Memory manager to use, NULL for default libc + * @return Error code or 0 on success + * + * @see uriCopyUriA + * @since 0.9.9 + */ +URI_PUBLIC int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri, + const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory); + + + +/** + * Copies a %URI structure. + * + * @param destUri OUT: Output destination + * @param sourceUri IN: %URI to copy + * @return Error code or 0 on success + * + * @see uriCopyUriMmA + * @since 0.9.9 + */ +URI_PUBLIC int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri, const URI_TYPE(Uri) * sourceUri); + + + /** * Determines the components of a %URI that are not normalized. * diff --git a/ext/uri/uriparser/include/uriparser/UriBase.h b/ext/uri/uriparser/include/uriparser/UriBase.h index dc3883e6516..46c02135bb1 100644 --- a/ext/uri/uriparser/include/uriparser/UriBase.h +++ b/ext/uri/uriparser/include/uriparser/UriBase.h @@ -258,7 +258,8 @@ typedef enum UriNormalizationMaskEnum { URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */ URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */ URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */ - URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */ + URI_NORMALIZE_FRAGMENT = 1 << 5, /**< Normalize fragment (fix uppercase percent-encodings) */ + URI_NORMALIZE_PORT = 1 << 6 /**< Normalize port (drop leading zeros) @since 0.9.9 */ } UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */ diff --git a/ext/uri/uriparser/src/UriCommon.c b/ext/uri/uriparser/src/UriCommon.c index 88e2767d71c..ccec5d4d5c8 100644 --- a/ext/uri/uriparser/src/UriCommon.c +++ b/ext/uri/uriparser/src/UriCommon.c @@ -119,6 +119,40 @@ int URI_FUNC(CompareRange)( +UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange, + const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory) { + const int lenInChars = (int)(sourceRange->afterLast - sourceRange->first); + const int lenInBytes = lenInChars * sizeof(URI_CHAR); + URI_CHAR * dup = memory->malloc(memory, lenInBytes); + if (dup == NULL) { + return URI_FALSE; + } + memcpy(dup, sourceRange->first, lenInBytes); + destRange->first = dup; + destRange->afterLast = dup + lenInChars; + + return URI_TRUE; +} + + + +UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange, + const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory) { + if (sourceRange->first == NULL) { + destRange->first = NULL; + destRange->afterLast = NULL; + } else if (sourceRange->first == sourceRange->afterLast && useSafe) { + destRange->first = URI_FUNC(SafeToPointTo); + destRange->afterLast = URI_FUNC(SafeToPointTo); + } else { + return URI_FUNC(CopyRange)(destRange, sourceRange, memory); + } + + return URI_TRUE; +} + + + UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, UriBool relative, UriBool pathOwned, UriMemoryManager * memory) { URI_TYPE(PathSegment) * walker; @@ -189,7 +223,7 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, if (prev == NULL) { /* Last and first */ - if (URI_FUNC(IsHostSet)(uri)) { + if (URI_FUNC(HasHost)(uri)) { /* Replace "." with empty segment to represent trailing slash */ walker->text.first = URI_FUNC(SafeToPointTo); walker->text.afterLast = URI_FUNC(SafeToPointTo); @@ -463,7 +497,7 @@ URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) { /* Checks if a URI has the host component set. */ -UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) { +UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri) { return (uri != NULL) && ((uri->hostText.first != NULL) || (uri->hostData.ip4 != NULL) @@ -601,7 +635,7 @@ void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) { /* Fix path if only one empty segment */ if (!uri->absolutePath - && !URI_FUNC(IsHostSet)(uri) + && !URI_FUNC(HasHost)(uri) && (uri->pathHead != NULL) && (uri->pathHead->next == NULL) && (uri->pathHead->text.first == uri->pathHead->text.afterLast)) { diff --git a/ext/uri/uriparser/src/UriCommon.h b/ext/uri/uriparser/src/UriCommon.h index 42311ddc98b..8dffab9f9f6 100644 --- a/ext/uri/uriparser/src/UriCommon.h +++ b/ext/uri/uriparser/src/UriCommon.h @@ -82,6 +82,11 @@ int URI_FUNC(CompareRange)( const URI_TYPE(TextRange) * a, const URI_TYPE(TextRange) * b); +UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange, + const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory); +UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange, + const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory); + UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri, UriMemoryManager * memory); UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri, @@ -91,8 +96,6 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig); URI_CHAR URI_FUNC(HexToLetter)(unsigned int value); URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase); -UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri); - UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source, UriMemoryManager * memory); UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, diff --git a/ext/uri/uriparser/src/UriCopy.c b/ext/uri/uriparser/src/UriCopy.c new file mode 100644 index 00000000000..0974ec5c040 --- /dev/null +++ b/ext/uri/uriparser/src/UriCopy.c @@ -0,0 +1,234 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * Copyright (C) 2025, Máté Kocsis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file UriCopy.c + * Holds the RFC 3986 %URI normalization implementation. + * NOTE: This source file includes itself twice. + */ + +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCopy.c" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCopy.c" +# undef URI_PASS_UNICODE +# endif +#else +# ifdef URI_PASS_ANSI +# include +# else +# include +# include +# endif + + + +#ifndef URI_DOXYGEN +# include +# include "UriCommon.h" +# include "UriMemory.h" +# include "UriNormalize.h" +# include "UriCopy.h" +#endif + + + +static void URI_FUNC(PreventLeakageAfterCopy)(URI_TYPE(Uri) * uri, + unsigned int revertMask, UriMemoryManager * memory) { + URI_FUNC(PreventLeakage)(uri, revertMask, memory); + + if (uri->hostData.ip4 != NULL) { + memory->free(memory, uri->hostData.ip4); + uri->hostData.ip4 = NULL; + } else if (uri->hostData.ip6 != NULL) { + memory->free(memory, uri->hostData.ip6); + uri->hostData.ip6 = NULL; + } + + if (revertMask & URI_NORMALIZE_PORT) { + if (uri->portText.first != uri->portText.afterLast) { + memory->free(memory, (URI_CHAR *)uri->portText.first); + } + uri->portText.first = NULL; + uri->portText.afterLast = NULL; + } +} + + + +int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri, + const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory) { + unsigned int doneMask = URI_NORMALIZED; + + if (sourceUri == NULL || destUri == NULL) { + return URI_ERROR_NULL; + } + + URI_CHECK_MEMORY_MANAGER(memory); /* may return */ + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, URI_FALSE, memory) == URI_FALSE) { + return URI_ERROR_MALLOC; + } + + doneMask |= URI_NORMALIZE_SCHEME; + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->userInfo, &sourceUri->userInfo, URI_FALSE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + doneMask |= URI_NORMALIZE_USER_INFO; + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostText, &sourceUri->hostText, URI_TRUE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + doneMask |= URI_NORMALIZE_HOST; + + if (sourceUri->hostData.ip4 == NULL) { + destUri->hostData.ip4 = NULL; + } else { + destUri->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4)); + if (destUri->hostData.ip4 == NULL) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + *(destUri->hostData.ip4) = *(sourceUri->hostData.ip4); + } + + if (sourceUri->hostData.ip6 == NULL) { + destUri->hostData.ip6 = NULL; + } else { + destUri->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6)); + if (destUri->hostData.ip6 == NULL) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + *(destUri->hostData.ip6) = *(sourceUri->hostData.ip6); + } + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostData.ipFuture, &sourceUri->hostData.ipFuture, URI_FALSE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->portText, &sourceUri->portText, URI_FALSE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + doneMask |= URI_NORMALIZE_PORT; + + destUri->pathHead = NULL; + destUri->pathTail = NULL; + + if (sourceUri->pathHead != NULL) { + URI_TYPE(PathSegment) * sourceWalker = sourceUri->pathHead; + URI_TYPE(PathSegment) * destPrev = NULL; + + while (sourceWalker != NULL) { + URI_TYPE(PathSegment) * destWalker = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); + if (destWalker == NULL) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + destWalker->text.first = NULL; + destWalker->text.afterLast = NULL; + destWalker->next = NULL; + destWalker->reserved = NULL; + + if (destUri->pathHead == NULL) { + destUri->pathHead = destWalker; + doneMask |= URI_NORMALIZE_PATH; + } + + if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, URI_TRUE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + if (destPrev != NULL) { + destPrev->next = destWalker; + } + + destPrev = destWalker; + sourceWalker = sourceWalker->next; + + destUri->pathTail = destWalker; + } + } + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, URI_FALSE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + doneMask |= URI_NORMALIZE_QUERY; + + if (URI_FUNC(CopyRangeAsNeeded)(&destUri->fragment, &sourceUri->fragment, URI_FALSE, memory) == URI_FALSE) { + URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory); + return URI_ERROR_MALLOC; + } + + destUri->absolutePath = sourceUri->absolutePath; + destUri->owner = URI_TRUE; + destUri->reserved = NULL; + + return URI_SUCCESS; +} + + + +int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri, + const URI_TYPE(Uri) * sourceUri) { + return URI_FUNC(CopyUriMm)(destUri, sourceUri, NULL); +} + +#endif diff --git a/ext/uri/uriparser/src/UriCopy.h b/ext/uri/uriparser/src/UriCopy.h new file mode 100644 index 00000000000..952b1df4f9c --- /dev/null +++ b/ext/uri/uriparser/src/UriCopy.h @@ -0,0 +1,78 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2007, Weijia Song + * Copyright (C) 2007, Sebastian Pipping + * Copyright (C) 2025, Máté Kocsis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_COPY_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriCopy.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriCopy.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_COPY_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_COPY_H_ANSI 1 +# include +# else +# define URI_COPY_H_UNICODE 1 +# include +# endif + + + +int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri, + const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory); +int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri, + const URI_TYPE(Uri) * sourceUri); + +#endif +#endif diff --git a/ext/uri/uriparser/src/UriNormalize.c b/ext/uri/uriparser/src/UriNormalize.c index 0cf353f1111..56b19573665 100644 --- a/ext/uri/uriparser/src/UriNormalize.c +++ b/ext/uri/uriparser/src/UriNormalize.c @@ -109,12 +109,9 @@ static void URI_FUNC(LowercaseInplaceExceptPercentEncoding)(const URI_CHAR * fir static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first, const URI_CHAR ** afterLast, UriMemoryManager * memory); -static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, - unsigned int revertMask, UriMemoryManager * memory); - -static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, +void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, unsigned int revertMask, UriMemoryManager * memory) { if (revertMask & URI_NORMALIZE_SCHEME) { /* NOTE: A scheme cannot be the empty string @@ -407,15 +404,9 @@ static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask, && (range->first != NULL) && (range->afterLast != NULL) && (range->afterLast > range->first)) { - const int lenInChars = (int)(range->afterLast - range->first); - const int lenInBytes = lenInChars * sizeof(URI_CHAR); - URI_CHAR * dup = memory->malloc(memory, lenInBytes); - if (dup == NULL) { - return URI_FALSE; /* Raises malloc error */ + if (URI_FUNC(CopyRange)(range, range, memory) == URI_FALSE) { + return URI_FALSE; } - memcpy(dup, range->first, lenInBytes); - range->first = dup; - range->afterLast = dup + lenInChars; *doneMask |= maskTest; } return URI_TRUE; @@ -557,6 +548,75 @@ int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) { } +static const URI_CHAR * URI_FUNC(PastLeadingZeros)(const URI_CHAR * first, const URI_CHAR * afterLast) { + assert(first != NULL); + assert(afterLast != NULL); + assert(first != afterLast); + + { + /* Find the first non-zero character */ + const URI_CHAR * remainderFirst = first; + while ((remainderFirst < afterLast) && (remainderFirst[0] == _UT('0'))) { + remainderFirst++; + } + + /* Is the string /all/ zeros? */ + if (remainderFirst == afterLast) { + /* Yes, and length is >=1 because we ruled out the empty string earlier; + * pull back onto rightmost zero */ + assert(remainderFirst > first); + remainderFirst--; + assert(remainderFirst[0] == _UT('0')); + } + + return remainderFirst; + } +} + + + +static void URI_FUNC(DropLeadingZerosInplace)(URI_CHAR * first, const URI_CHAR ** afterLast) { + assert(first != NULL); + assert(afterLast != NULL); + assert(*afterLast != NULL); + + if (first == *afterLast) { + return; + } + + { + const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(first, *afterLast); + + if (remainderFirst > first) { + const size_t remainderLen = *afterLast - remainderFirst; + memmove(first, remainderFirst, remainderLen * sizeof(URI_CHAR)); + first[remainderLen] = _UT('\0'); + *afterLast = first + remainderLen; + } + } +} + + + +static void URI_FUNC(AdvancePastLeadingZeros)( + const URI_CHAR ** first, const URI_CHAR * afterLast) { + assert(first != NULL); + assert(*first != NULL); + assert(afterLast != NULL); + + if (*first == afterLast) { + return; + } + + { + const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(*first, afterLast); + + /* Cut off leading zeros */ + *first = remainderFirst; + } +} + + static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, unsigned int * outMask, @@ -658,6 +718,27 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, } } + /* Port */ + if (outMask != NULL) { + /* Is there a port even? */ + if (uri->portText.first != NULL) { + /* Determine whether the port is already normalized, i.e. either "", "0" or no leading zeros */ + const size_t portLen = uri->portText.afterLast - uri->portText.first; + if ((portLen > 1) && (uri->portText.first[0] == _UT('0'))) { + *outMask |= URI_NORMALIZE_PORT; + } + } + } else { + /* Normalize the port, i.e. drop leading zeros (except for string "0") */ + if ((inMask & URI_NORMALIZE_PORT) && (uri->portText.first != NULL)) { + if (uri->owner) { + URI_FUNC(DropLeadingZerosInplace)((URI_CHAR *)uri->portText.first, &(uri->portText.afterLast)); + } else { + URI_FUNC(AdvancePastLeadingZeros)(&(uri->portText.first), uri->portText.afterLast); + } + } + } + /* User info */ if (outMask != NULL) { const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)( diff --git a/ext/uri/uriparser/src/UriNormalize.h b/ext/uri/uriparser/src/UriNormalize.h new file mode 100644 index 00000000000..cb58085b7d3 --- /dev/null +++ b/ext/uri/uriparser/src/UriNormalize.h @@ -0,0 +1,76 @@ +/* + * uriparser - RFC 3986 URI parsing library + * + * Copyright (C) 2018, Weijia Song + * Copyright (C) 2018, Sebastian Pipping + * Copyright (C) 2025, Máté Kocsis + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI)) \ + || (defined(URI_PASS_UNICODE) && !defined(URI_NORMALIZE_H_UNICODE)) \ + || (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* What encodings are enabled? */ +#include +#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE)) +/* Include SELF twice */ +# ifdef URI_ENABLE_ANSI +# define URI_PASS_ANSI 1 +# include "UriNormalize.h" +# undef URI_PASS_ANSI +# endif +# ifdef URI_ENABLE_UNICODE +# define URI_PASS_UNICODE 1 +# include "UriNormalize.h" +# undef URI_PASS_UNICODE +# endif +/* Only one pass for each encoding */ +#elif (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI) \ + && defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \ + && !defined(URI_NORMALIZE_H_UNICODE) && defined(URI_ENABLE_UNICODE)) +# ifdef URI_PASS_ANSI +# define URI_NORMALIZE_H_ANSI 1 +# include +# else +# define URI_NORMALIZE_H_UNICODE 1 +# include +# endif + + + +void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri, + unsigned int revertMask, UriMemoryManager * memory); + +#endif +#endif diff --git a/ext/uri/uriparser/src/UriRecompose.c b/ext/uri/uriparser/src/UriRecompose.c index 5027eca6cfa..1567efc81dc 100644 --- a/ext/uri/uriparser/src/UriRecompose.c +++ b/ext/uri/uriparser/src/UriRecompose.c @@ -152,7 +152,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest, /* [05/19] endif; */ } /* [06/19] if defined(authority) then */ - if (URI_FUNC(IsHostSet)(uri)) { + if (URI_FUNC(HasHost)(uri)) { /* [07/19] append "//" to result; */ if (dest != NULL) { if (written + 2 <= maxChars) { @@ -422,7 +422,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest, /* [10/19] append path to result; */ /* Slash needed here? */ if (uri->absolutePath || ((uri->pathHead != NULL) - && URI_FUNC(IsHostSet)(uri))) { + && URI_FUNC(HasHost)(uri))) { if (dest != NULL) { if (written + 1 <= maxChars) { memcpy(dest + written, _UT("/"), diff --git a/ext/uri/uriparser/src/UriResolve.c b/ext/uri/uriparser/src/UriResolve.c index 80031a894d4..8e47e6af8c6 100644 --- a/ext/uri/uriparser/src/UriResolve.c +++ b/ext/uri/uriparser/src/UriResolve.c @@ -128,7 +128,7 @@ static int URI_FUNC(ResolveAbsolutePathFlag)(URI_TYPE(Uri) * absWork, return URI_ERROR_NULL; } - if (URI_FUNC(IsHostSet)(absWork) && absWork->absolutePath) { + if (URI_FUNC(HasHost)(absWork) && absWork->absolutePath) { /* Empty segment needed, instead? */ if (absWork->pathHead == NULL) { URI_TYPE(PathSegment) * const segment = memory->malloc(memory, sizeof(URI_TYPE(PathSegment))); @@ -203,7 +203,7 @@ static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest, /* [06/32] else */ } else { /* [07/32] if defined(R.authority) then */ - if (URI_FUNC(IsHostSet)(relSource)) { + if (URI_FUNC(HasHost)(relSource)) { /* [08/32] T.authority = R.authority; */ if (!URI_FUNC(CopyAuthority)(absDest, relSource, memory)) { return URI_ERROR_MALLOC;