1
0
mirror of https://github.com/php/php-src.git synced 2026-03-24 00:02:20 +01:00

Update uriparser to commit 5f7c6d88c50f548d0c7f499c22d36f51d34775b3

While there, fix Windows build by adding UriResolve.c to the sources.
This commit is contained in:
Máté Kocsis
2025-07-01 21:53:33 +02:00
parent 59dd0f8a48
commit 66376389fe
12 changed files with 573 additions and 25 deletions

View File

@@ -11,8 +11,8 @@ AC_DEFINE([URI_ENABLE_ANSI], [1], [Define to 1 for enabling ANSI support of urip
AC_DEFINE([URI_NO_UNICODE], [1], [Define to 1 for disabling unicode support of uriparser.])
URIPARSER_DIR="uriparser"
URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriEscape.c \
$URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
URIPARSER_SOURCES="$URIPARSER_DIR/src/UriCommon.c $URIPARSER_DIR/src/UriCompare.c $URIPARSER_DIR/src/UriCopy.c \
$URIPARSER_DIR/src/UriEscape.c $URIPARSER_DIR/src/UriFile.c $URIPARSER_DIR/src/UriIp4.c $URIPARSER_DIR/src/UriIp4Base.c \
$URIPARSER_DIR/src/UriMemory.c $URIPARSER_DIR/src/UriNormalize.c $URIPARSER_DIR/src/UriNormalizeBase.c \
$URIPARSER_DIR/src/UriParse.c $URIPARSER_DIR/src/UriParseBase.c $URIPARSER_DIR/src/UriQuery.c \
$URIPARSER_DIR/src/UriRecompose.c $URIPARSER_DIR/src/UriResolve.c $URIPARSER_DIR/src/UriShorten.c"

View File

@@ -5,5 +5,5 @@ AC_DEFINE("URI_NO_UNICODE", 1, "Define to 1 for disabling unicode support of uri
ADD_FLAG("CFLAGS_URI", "/D URI_STATIC_BUILD");
ADD_EXTENSION_DEP('uri', 'lexbor');
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriShorten.c", "uri");
ADD_SOURCES("ext/uri/uriparser/src", "UriCommon.c UriCompare.c UriCopy.c UriEscape.c UriFile.c UriIp4.c UriIp4Base.c UriMemory.c UriNormalize.c UriNormalizeBase.c UriParse.c UriParseBase.c UriQuery.c UriRecompose.c UriResolve.c UriShorten.c", "uri");
PHP_INSTALL_HEADERS("ext/uri", "php_lexbor.h php_uri.h php_uri_common.h uriparser/src uriparser/include");

View File

@@ -201,6 +201,17 @@ typedef struct URI_TYPE(QueryListStruct) {
} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */
/**
* Checks if a URI has the host component set.
*
* @param uri <b>IN</b>: %URI to check
* @return <c>URI_TRUE</c> when host is set, <c>URI_FALSE</c> otherwise
*
* @since 0.9.9
*/
URI_PUBLIC UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri);
/**
* Parses a RFC 3986 %URI.
@@ -644,6 +655,36 @@ URI_PUBLIC int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri,
/**
* Copies a %URI structure.
*
* @param destUri <b>OUT</b>: Output destination
* @param sourceUri <b>IN</b>: %URI to copy
* @param memory <b>IN</b>: Memory manager to use, NULL for default libc
* @return Error code or 0 on success
*
* @see uriCopyUriA
* @since 0.9.9
*/
URI_PUBLIC int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory);
/**
* Copies a %URI structure.
*
* @param destUri <b>OUT</b>: Output destination
* @param sourceUri <b>IN</b>: %URI to copy
* @return Error code or 0 on success
*
* @see uriCopyUriMmA
* @since 0.9.9
*/
URI_PUBLIC int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri, const URI_TYPE(Uri) * sourceUri);
/**
* Determines the components of a %URI that are not normalized.
*

View File

@@ -258,7 +258,8 @@ typedef enum UriNormalizationMaskEnum {
URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */
URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */
URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */
URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */
URI_NORMALIZE_FRAGMENT = 1 << 5, /**< Normalize fragment (fix uppercase percent-encodings) */
URI_NORMALIZE_PORT = 1 << 6 /**< Normalize port (drop leading zeros) @since 0.9.9 */
} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */

View File

@@ -119,6 +119,40 @@ int URI_FUNC(CompareRange)(
UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory) {
const int lenInChars = (int)(sourceRange->afterLast - sourceRange->first);
const int lenInBytes = lenInChars * sizeof(URI_CHAR);
URI_CHAR * dup = memory->malloc(memory, lenInBytes);
if (dup == NULL) {
return URI_FALSE;
}
memcpy(dup, sourceRange->first, lenInBytes);
destRange->first = dup;
destRange->afterLast = dup + lenInChars;
return URI_TRUE;
}
UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory) {
if (sourceRange->first == NULL) {
destRange->first = NULL;
destRange->afterLast = NULL;
} else if (sourceRange->first == sourceRange->afterLast && useSafe) {
destRange->first = URI_FUNC(SafeToPointTo);
destRange->afterLast = URI_FUNC(SafeToPointTo);
} else {
return URI_FUNC(CopyRange)(destRange, sourceRange, memory);
}
return URI_TRUE;
}
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned, UriMemoryManager * memory) {
URI_TYPE(PathSegment) * walker;
@@ -189,7 +223,7 @@ UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
if (prev == NULL) {
/* Last and first */
if (URI_FUNC(IsHostSet)(uri)) {
if (URI_FUNC(HasHost)(uri)) {
/* Replace "." with empty segment to represent trailing slash */
walker->text.first = URI_FUNC(SafeToPointTo);
walker->text.afterLast = URI_FUNC(SafeToPointTo);
@@ -463,7 +497,7 @@ URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
/* Checks if a URI has the host component set. */
UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) {
UriBool URI_FUNC(HasHost)(const URI_TYPE(Uri) * uri) {
return (uri != NULL)
&& ((uri->hostText.first != NULL)
|| (uri->hostData.ip4 != NULL)
@@ -601,7 +635,7 @@ void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory) {
/* Fix path if only one empty segment */
if (!uri->absolutePath
&& !URI_FUNC(IsHostSet)(uri)
&& !URI_FUNC(HasHost)(uri)
&& (uri->pathHead != NULL)
&& (uri->pathHead->next == NULL)
&& (uri->pathHead->text.first == uri->pathHead->text.afterLast)) {

View File

@@ -82,6 +82,11 @@ int URI_FUNC(CompareRange)(
const URI_TYPE(TextRange) * a,
const URI_TYPE(TextRange) * b);
UriBool URI_FUNC(CopyRange)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriMemoryManager * memory);
UriBool URI_FUNC(CopyRangeAsNeeded)(URI_TYPE(TextRange) * destRange,
const URI_TYPE(TextRange) * sourceRange, UriBool useSafe, UriMemoryManager * memory);
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
UriMemoryManager * memory);
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
@@ -91,8 +96,6 @@ unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig);
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value);
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase);
UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri);
UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source,
UriMemoryManager * memory);
UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,

View File

@@ -0,0 +1,234 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
* Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriCopy.c
* Holds the RFC 3986 %URI normalization implementation.
* NOTE: This source file includes itself twice.
*/
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriCopy.c"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriCopy.c"
# undef URI_PASS_UNICODE
# endif
#else
# ifdef URI_PASS_ANSI
# include <uriparser/UriDefsAnsi.h>
# else
# include <uriparser/UriDefsUnicode.h>
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include <uriparser/Uri.h>
# include "UriCommon.h"
# include "UriMemory.h"
# include "UriNormalize.h"
# include "UriCopy.h"
#endif
static void URI_FUNC(PreventLeakageAfterCopy)(URI_TYPE(Uri) * uri,
unsigned int revertMask, UriMemoryManager * memory) {
URI_FUNC(PreventLeakage)(uri, revertMask, memory);
if (uri->hostData.ip4 != NULL) {
memory->free(memory, uri->hostData.ip4);
uri->hostData.ip4 = NULL;
} else if (uri->hostData.ip6 != NULL) {
memory->free(memory, uri->hostData.ip6);
uri->hostData.ip6 = NULL;
}
if (revertMask & URI_NORMALIZE_PORT) {
if (uri->portText.first != uri->portText.afterLast) {
memory->free(memory, (URI_CHAR *)uri->portText.first);
}
uri->portText.first = NULL;
uri->portText.afterLast = NULL;
}
}
int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory) {
unsigned int doneMask = URI_NORMALIZED;
if (sourceUri == NULL || destUri == NULL) {
return URI_ERROR_NULL;
}
URI_CHECK_MEMORY_MANAGER(memory); /* may return */
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->scheme, &sourceUri->scheme, URI_FALSE, memory) == URI_FALSE) {
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_SCHEME;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->userInfo, &sourceUri->userInfo, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_USER_INFO;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostText, &sourceUri->hostText, URI_TRUE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
if (sourceUri->hostData.ip4 == NULL) {
destUri->hostData.ip4 = NULL;
} else {
destUri->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
if (destUri->hostData.ip4 == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
*(destUri->hostData.ip4) = *(sourceUri->hostData.ip4);
}
if (sourceUri->hostData.ip6 == NULL) {
destUri->hostData.ip6 = NULL;
} else {
destUri->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
if (destUri->hostData.ip6 == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
*(destUri->hostData.ip6) = *(sourceUri->hostData.ip6);
}
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->hostData.ipFuture, &sourceUri->hostData.ipFuture, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->portText, &sourceUri->portText, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_PORT;
destUri->pathHead = NULL;
destUri->pathTail = NULL;
if (sourceUri->pathHead != NULL) {
URI_TYPE(PathSegment) * sourceWalker = sourceUri->pathHead;
URI_TYPE(PathSegment) * destPrev = NULL;
while (sourceWalker != NULL) {
URI_TYPE(PathSegment) * destWalker = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
if (destWalker == NULL) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
destWalker->text.first = NULL;
destWalker->text.afterLast = NULL;
destWalker->next = NULL;
destWalker->reserved = NULL;
if (destUri->pathHead == NULL) {
destUri->pathHead = destWalker;
doneMask |= URI_NORMALIZE_PATH;
}
if (URI_FUNC(CopyRangeAsNeeded)(&destWalker->text, &sourceWalker->text, URI_TRUE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
if (destPrev != NULL) {
destPrev->next = destWalker;
}
destPrev = destWalker;
sourceWalker = sourceWalker->next;
destUri->pathTail = destWalker;
}
}
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->query, &sourceUri->query, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_QUERY;
if (URI_FUNC(CopyRangeAsNeeded)(&destUri->fragment, &sourceUri->fragment, URI_FALSE, memory) == URI_FALSE) {
URI_FUNC(PreventLeakageAfterCopy)(destUri, doneMask, memory);
return URI_ERROR_MALLOC;
}
destUri->absolutePath = sourceUri->absolutePath;
destUri->owner = URI_TRUE;
destUri->reserved = NULL;
return URI_SUCCESS;
}
int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri) {
return URI_FUNC(CopyUriMm)(destUri, sourceUri, NULL);
}
#endif

View File

@@ -0,0 +1,78 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
* Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_COPY_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriCopy.h"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriCopy.h"
# undef URI_PASS_UNICODE
# endif
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_COPY_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_COPY_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_COPY_H_ANSI 1
# include <uriparser/UriDefsAnsi.h>
# else
# define URI_COPY_H_UNICODE 1
# include <uriparser/UriDefsUnicode.h>
# endif
int URI_FUNC(CopyUriMm)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri, UriMemoryManager * memory);
int URI_FUNC(CopyUri)(URI_TYPE(Uri) * destUri,
const URI_TYPE(Uri) * sourceUri);
#endif
#endif

View File

@@ -109,12 +109,9 @@ static void URI_FUNC(LowercaseInplaceExceptPercentEncoding)(const URI_CHAR * fir
static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast, UriMemoryManager * memory);
static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask, UriMemoryManager * memory);
static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask, UriMemoryManager * memory) {
if (revertMask & URI_NORMALIZE_SCHEME) {
/* NOTE: A scheme cannot be the empty string
@@ -407,15 +404,9 @@ static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
&& (range->first != NULL)
&& (range->afterLast != NULL)
&& (range->afterLast > range->first)) {
const int lenInChars = (int)(range->afterLast - range->first);
const int lenInBytes = lenInChars * sizeof(URI_CHAR);
URI_CHAR * dup = memory->malloc(memory, lenInBytes);
if (dup == NULL) {
return URI_FALSE; /* Raises malloc error */
if (URI_FUNC(CopyRange)(range, range, memory) == URI_FALSE) {
return URI_FALSE;
}
memcpy(dup, range->first, lenInBytes);
range->first = dup;
range->afterLast = dup + lenInChars;
*doneMask |= maskTest;
}
return URI_TRUE;
@@ -557,6 +548,75 @@ int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) {
}
static const URI_CHAR * URI_FUNC(PastLeadingZeros)(const URI_CHAR * first, const URI_CHAR * afterLast) {
assert(first != NULL);
assert(afterLast != NULL);
assert(first != afterLast);
{
/* Find the first non-zero character */
const URI_CHAR * remainderFirst = first;
while ((remainderFirst < afterLast) && (remainderFirst[0] == _UT('0'))) {
remainderFirst++;
}
/* Is the string /all/ zeros? */
if (remainderFirst == afterLast) {
/* Yes, and length is >=1 because we ruled out the empty string earlier;
* pull back onto rightmost zero */
assert(remainderFirst > first);
remainderFirst--;
assert(remainderFirst[0] == _UT('0'));
}
return remainderFirst;
}
}
static void URI_FUNC(DropLeadingZerosInplace)(URI_CHAR * first, const URI_CHAR ** afterLast) {
assert(first != NULL);
assert(afterLast != NULL);
assert(*afterLast != NULL);
if (first == *afterLast) {
return;
}
{
const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(first, *afterLast);
if (remainderFirst > first) {
const size_t remainderLen = *afterLast - remainderFirst;
memmove(first, remainderFirst, remainderLen * sizeof(URI_CHAR));
first[remainderLen] = _UT('\0');
*afterLast = first + remainderLen;
}
}
}
static void URI_FUNC(AdvancePastLeadingZeros)(
const URI_CHAR ** first, const URI_CHAR * afterLast) {
assert(first != NULL);
assert(*first != NULL);
assert(afterLast != NULL);
if (*first == afterLast) {
return;
}
{
const URI_CHAR * const remainderFirst = URI_FUNC(PastLeadingZeros)(*first, afterLast);
/* Cut off leading zeros */
*first = remainderFirst;
}
}
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
unsigned int inMask, unsigned int * outMask,
@@ -658,6 +718,27 @@ static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
}
}
/* Port */
if (outMask != NULL) {
/* Is there a port even? */
if (uri->portText.first != NULL) {
/* Determine whether the port is already normalized, i.e. either "", "0" or no leading zeros */
const size_t portLen = uri->portText.afterLast - uri->portText.first;
if ((portLen > 1) && (uri->portText.first[0] == _UT('0'))) {
*outMask |= URI_NORMALIZE_PORT;
}
}
} else {
/* Normalize the port, i.e. drop leading zeros (except for string "0") */
if ((inMask & URI_NORMALIZE_PORT) && (uri->portText.first != NULL)) {
if (uri->owner) {
URI_FUNC(DropLeadingZerosInplace)((URI_CHAR *)uri->portText.first, &(uri->portText.afterLast));
} else {
URI_FUNC(AdvancePastLeadingZeros)(&(uri->portText.first), uri->portText.afterLast);
}
}
}
/* User info */
if (outMask != NULL) {
const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(

View File

@@ -0,0 +1,76 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2018, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2018, Sebastian Pipping <sebastian@pipping.org>
* Copyright (C) 2025, Máté Kocsis <kocsismate@php.net>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_NORMALIZE_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# ifdef URI_ENABLE_ANSI
# define URI_PASS_ANSI 1
# include "UriNormalize.h"
# undef URI_PASS_ANSI
# endif
# ifdef URI_ENABLE_UNICODE
# define URI_PASS_UNICODE 1
# include "UriNormalize.h"
# undef URI_PASS_UNICODE
# endif
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_NORMALIZE_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_NORMALIZE_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_NORMALIZE_H_ANSI 1
# include <uriparser/UriDefsAnsi.h>
# else
# define URI_NORMALIZE_H_UNICODE 1
# include <uriparser/UriDefsUnicode.h>
# endif
void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask, UriMemoryManager * memory);
#endif
#endif

View File

@@ -152,7 +152,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest,
/* [05/19] endif; */
}
/* [06/19] if defined(authority) then */
if (URI_FUNC(IsHostSet)(uri)) {
if (URI_FUNC(HasHost)(uri)) {
/* [07/19] append "//" to result; */
if (dest != NULL) {
if (written + 2 <= maxChars) {
@@ -422,7 +422,7 @@ static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest,
/* [10/19] append path to result; */
/* Slash needed here? */
if (uri->absolutePath || ((uri->pathHead != NULL)
&& URI_FUNC(IsHostSet)(uri))) {
&& URI_FUNC(HasHost)(uri))) {
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("/"),

View File

@@ -128,7 +128,7 @@ static int URI_FUNC(ResolveAbsolutePathFlag)(URI_TYPE(Uri) * absWork,
return URI_ERROR_NULL;
}
if (URI_FUNC(IsHostSet)(absWork) && absWork->absolutePath) {
if (URI_FUNC(HasHost)(absWork) && absWork->absolutePath) {
/* Empty segment needed, instead? */
if (absWork->pathHead == NULL) {
URI_TYPE(PathSegment) * const segment = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
@@ -203,7 +203,7 @@ static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest,
/* [06/32] else */
} else {
/* [07/32] if defined(R.authority) then */
if (URI_FUNC(IsHostSet)(relSource)) {
if (URI_FUNC(HasHost)(relSource)) {
/* [08/32] T.authority = R.authority; */
if (!URI_FUNC(CopyAuthority)(absDest, relSource, memory)) {
return URI_ERROR_MALLOC;