mirror of
https://github.com/php/php-src.git
synced 2026-03-24 00:02:20 +01:00
5688 lines
155 KiB
C
5688 lines
155 KiB
C
/*
|
||
* Copyright (C) 2023-2024 Alexander Borisov
|
||
*
|
||
* Author: Alexander Borisov <borisov@lexbor.com>
|
||
*/
|
||
|
||
#include <math.h>
|
||
#include <float.h>
|
||
|
||
#include "lexbor/url/url.h"
|
||
#include "lexbor/core/conv.h"
|
||
#include "lexbor/core/utils.h"
|
||
#include "lexbor/core/serialize.h"
|
||
#include "lexbor/core/swar.h"
|
||
#include "lexbor/unicode/idna.h"
|
||
|
||
#define LEXBOR_STR_RES_MAP_LOWERCASE
|
||
#define LEXBOR_STR_RES_ALPHANUMERIC_CHARACTER
|
||
#define LEXBOR_STR_RES_ALPHA_CHARACTER
|
||
#define LEXBOR_STR_RES_CHAR_TO_TWO_HEX_VALUE
|
||
#define LEXBOR_STR_RES_MAP_HEX
|
||
#define LEXBOR_STR_RES_MAP_NUM
|
||
#include "lexbor/core/str_res.h"
|
||
|
||
|
||
#define LXB_URL_BUFFER_SIZE 4096
|
||
#define LXB_URL_BUFFER_NUM_SIZE 128
|
||
|
||
|
||
typedef enum {
|
||
LXB_URL_MAP_UNDEF = 0x00,
|
||
LXB_URL_MAP_C0 = 0x01,
|
||
LXB_URL_MAP_FRAGMENT = 0x02,
|
||
LXB_URL_MAP_QUERY = 0x04,
|
||
LXB_URL_MAP_SPECIAL_QUERY = 0x08,
|
||
LXB_URL_MAP_PATH = 0x10,
|
||
LXB_URL_MAP_USERINFO = 0x20,
|
||
LXB_URL_MAP_COMPONENT = 0x40,
|
||
LXB_URL_MAP_X_WWW_FORM = 0x80,
|
||
LXB_URL_MAP_ALL = 0xff
|
||
}
|
||
lxb_url_map_type_t;
|
||
|
||
typedef enum {
|
||
LXB_URL_HOST_OPT_UNDEF = 0 << 0,
|
||
LXB_URL_HOST_OPT_NOT_SPECIAL = 1 << 0,
|
||
LXB_URL_HOST_OPT_DECODE = 1 << 1,
|
||
LXB_URL_HOST_OPT_IDNA = 1 << 2
|
||
}
|
||
lxb_url_host_opt_t;
|
||
|
||
typedef struct {
|
||
lexbor_mraw_t *mraw;
|
||
lexbor_str_t *str;
|
||
}
|
||
lxb_url_idna_ctx_t;
|
||
|
||
typedef struct {
|
||
lexbor_str_t **out_buf;
|
||
size_t out_length;
|
||
size_t out_size;
|
||
}
|
||
lxb_url_search_params_ctx_t;
|
||
|
||
|
||
static const uint8_t lxb_url_map[256] =
|
||
{
|
||
LXB_URL_MAP_ALL, /* 0x00 */
|
||
LXB_URL_MAP_ALL, /* 0x01 */
|
||
LXB_URL_MAP_ALL, /* 0x02 */
|
||
LXB_URL_MAP_ALL, /* 0x03 */
|
||
LXB_URL_MAP_ALL, /* 0x04 */
|
||
LXB_URL_MAP_ALL, /* 0x05 */
|
||
LXB_URL_MAP_ALL, /* 0x06 */
|
||
LXB_URL_MAP_ALL, /* 0x07 */
|
||
LXB_URL_MAP_ALL, /* 0x08 */
|
||
LXB_URL_MAP_ALL, /* 0x09 */
|
||
LXB_URL_MAP_ALL, /* 0x0a */
|
||
LXB_URL_MAP_ALL, /* 0x0b */
|
||
LXB_URL_MAP_ALL, /* 0x0c */
|
||
LXB_URL_MAP_ALL, /* 0x0d */
|
||
LXB_URL_MAP_ALL, /* 0x0e */
|
||
LXB_URL_MAP_ALL, /* 0x0f */
|
||
LXB_URL_MAP_ALL, /* 0x10 */
|
||
LXB_URL_MAP_ALL, /* 0x11 */
|
||
LXB_URL_MAP_ALL, /* 0x12 */
|
||
LXB_URL_MAP_ALL, /* 0x13 */
|
||
LXB_URL_MAP_ALL, /* 0x14 */
|
||
LXB_URL_MAP_ALL, /* 0x15 */
|
||
LXB_URL_MAP_ALL, /* 0x16 */
|
||
LXB_URL_MAP_ALL, /* 0x17 */
|
||
LXB_URL_MAP_ALL, /* 0x18 */
|
||
LXB_URL_MAP_ALL, /* 0x19 */
|
||
LXB_URL_MAP_ALL, /* 0x1a */
|
||
LXB_URL_MAP_ALL, /* 0x1b */
|
||
LXB_URL_MAP_ALL, /* 0x1c */
|
||
LXB_URL_MAP_ALL, /* 0x1d */
|
||
LXB_URL_MAP_ALL, /* 0x1e */
|
||
LXB_URL_MAP_ALL, /* 0x1f */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_FRAGMENT|LXB_URL_MAP_QUERY|LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_PATH|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x20 ( ) */
|
||
LXB_URL_MAP_X_WWW_FORM, /* 0x21 (!) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_FRAGMENT|LXB_URL_MAP_QUERY|LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_PATH|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x22 (") */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_QUERY|LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_PATH|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x23 (#) */
|
||
LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x24 ($) */
|
||
LXB_URL_MAP_X_WWW_FORM, /* 0x25 (%) */
|
||
LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x26 (&) */
|
||
LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_X_WWW_FORM, /* 0x27 (') */
|
||
LXB_URL_MAP_X_WWW_FORM, /* 0x28 (() */
|
||
LXB_URL_MAP_X_WWW_FORM, /* 0x29 ()) */
|
||
LXB_URL_MAP_UNDEF, /* 0x2a (*) */
|
||
LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x2b (+) */
|
||
LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x2c (,) */
|
||
LXB_URL_MAP_UNDEF, /* 0x2d (-) */
|
||
LXB_URL_MAP_UNDEF, /* 0x2e (.) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x2f (/) */
|
||
LXB_URL_MAP_UNDEF, /* 0x30 (0) */
|
||
LXB_URL_MAP_UNDEF, /* 0x31 (1) */
|
||
LXB_URL_MAP_UNDEF, /* 0x32 (2) */
|
||
LXB_URL_MAP_UNDEF, /* 0x33 (3) */
|
||
LXB_URL_MAP_UNDEF, /* 0x34 (4) */
|
||
LXB_URL_MAP_UNDEF, /* 0x35 (5) */
|
||
LXB_URL_MAP_UNDEF, /* 0x36 (6) */
|
||
LXB_URL_MAP_UNDEF, /* 0x37 (7) */
|
||
LXB_URL_MAP_UNDEF, /* 0x38 (8) */
|
||
LXB_URL_MAP_UNDEF, /* 0x39 (9) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3a (:) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3b (;) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_FRAGMENT|LXB_URL_MAP_QUERY|LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_PATH|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3c (<) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3d (=) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_FRAGMENT|LXB_URL_MAP_QUERY|LXB_URL_MAP_SPECIAL_QUERY|LXB_URL_MAP_PATH|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3e (>) */
|
||
LXB_URL_MAP_PATH|LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x3f (?) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x40 (@) */
|
||
LXB_URL_MAP_UNDEF, /* 0x41 (A) */
|
||
LXB_URL_MAP_UNDEF, /* 0x42 (B) */
|
||
LXB_URL_MAP_UNDEF, /* 0x43 (C) */
|
||
LXB_URL_MAP_UNDEF, /* 0x44 (D) */
|
||
LXB_URL_MAP_UNDEF, /* 0x45 (E) */
|
||
LXB_URL_MAP_UNDEF, /* 0x46 (F) */
|
||
LXB_URL_MAP_UNDEF, /* 0x47 (G) */
|
||
LXB_URL_MAP_UNDEF, /* 0x48 (H) */
|
||
LXB_URL_MAP_UNDEF, /* 0x49 (I) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4a (J) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4b (K) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4c (L) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4d (M) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4e (N) */
|
||
LXB_URL_MAP_UNDEF, /* 0x4f (O) */
|
||
LXB_URL_MAP_UNDEF, /* 0x50 (P) */
|
||
LXB_URL_MAP_UNDEF, /* 0x51 (Q) */
|
||
LXB_URL_MAP_UNDEF, /* 0x52 (R) */
|
||
LXB_URL_MAP_UNDEF, /* 0x53 (S) */
|
||
LXB_URL_MAP_UNDEF, /* 0x54 (T) */
|
||
LXB_URL_MAP_UNDEF, /* 0x55 (U) */
|
||
LXB_URL_MAP_UNDEF, /* 0x56 (V) */
|
||
LXB_URL_MAP_UNDEF, /* 0x57 (W) */
|
||
LXB_URL_MAP_UNDEF, /* 0x58 (X) */
|
||
LXB_URL_MAP_UNDEF, /* 0x59 (Y) */
|
||
LXB_URL_MAP_UNDEF, /* 0x5a (Z) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x5b ([) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x5c (\) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x5d (]) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x5e (^) */
|
||
LXB_URL_MAP_UNDEF, /* 0x5f (_) */
|
||
LXB_URL_MAP_PATH|LXB_URL_MAP_FRAGMENT|LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x60 (`) */
|
||
LXB_URL_MAP_UNDEF, /* 0x61 (a) */
|
||
LXB_URL_MAP_UNDEF, /* 0x62 (b) */
|
||
LXB_URL_MAP_UNDEF, /* 0x63 (c) */
|
||
LXB_URL_MAP_UNDEF, /* 0x64 (d) */
|
||
LXB_URL_MAP_UNDEF, /* 0x65 (e) */
|
||
LXB_URL_MAP_UNDEF, /* 0x66 (f) */
|
||
LXB_URL_MAP_UNDEF, /* 0x67 (g) */
|
||
LXB_URL_MAP_UNDEF, /* 0x68 (h) */
|
||
LXB_URL_MAP_UNDEF, /* 0x69 (i) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6a (j) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6b (k) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6c (l) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6d (m) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6e (n) */
|
||
LXB_URL_MAP_UNDEF, /* 0x6f (o) */
|
||
LXB_URL_MAP_UNDEF, /* 0x70 (p) */
|
||
LXB_URL_MAP_UNDEF, /* 0x71 (q) */
|
||
LXB_URL_MAP_UNDEF, /* 0x72 (r) */
|
||
LXB_URL_MAP_UNDEF, /* 0x73 (s) */
|
||
LXB_URL_MAP_UNDEF, /* 0x74 (t) */
|
||
LXB_URL_MAP_UNDEF, /* 0x75 (u) */
|
||
LXB_URL_MAP_UNDEF, /* 0x76 (v) */
|
||
LXB_URL_MAP_UNDEF, /* 0x77 (w) */
|
||
LXB_URL_MAP_UNDEF, /* 0x78 (x) */
|
||
LXB_URL_MAP_UNDEF, /* 0x79 (y) */
|
||
LXB_URL_MAP_UNDEF, /* 0x7a (z) */
|
||
LXB_URL_MAP_PATH|LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x7b ({) */
|
||
LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x7c (|) */
|
||
LXB_URL_MAP_PATH|LXB_URL_MAP_USERINFO|LXB_URL_MAP_COMPONENT|LXB_URL_MAP_X_WWW_FORM, /* 0x7d (}) */
|
||
LXB_URL_MAP_X_WWW_FORM, /* 0x7e (~) */
|
||
LXB_URL_MAP_ALL, /* 0x7f */
|
||
LXB_URL_MAP_ALL, /* 0x80 */
|
||
LXB_URL_MAP_ALL, /* 0x81 */
|
||
LXB_URL_MAP_ALL, /* 0x82 */
|
||
LXB_URL_MAP_ALL, /* 0x83 */
|
||
LXB_URL_MAP_ALL, /* 0x84 */
|
||
LXB_URL_MAP_ALL, /* 0x85 */
|
||
LXB_URL_MAP_ALL, /* 0x86 */
|
||
LXB_URL_MAP_ALL, /* 0x87 */
|
||
LXB_URL_MAP_ALL, /* 0x88 */
|
||
LXB_URL_MAP_ALL, /* 0x89 */
|
||
LXB_URL_MAP_ALL, /* 0x8a */
|
||
LXB_URL_MAP_ALL, /* 0x8b */
|
||
LXB_URL_MAP_ALL, /* 0x8c */
|
||
LXB_URL_MAP_ALL, /* 0x8d */
|
||
LXB_URL_MAP_ALL, /* 0x8e */
|
||
LXB_URL_MAP_ALL, /* 0x8f */
|
||
LXB_URL_MAP_ALL, /* 0x90 */
|
||
LXB_URL_MAP_ALL, /* 0x91 */
|
||
LXB_URL_MAP_ALL, /* 0x92 */
|
||
LXB_URL_MAP_ALL, /* 0x93 */
|
||
LXB_URL_MAP_ALL, /* 0x94 */
|
||
LXB_URL_MAP_ALL, /* 0x95 */
|
||
LXB_URL_MAP_ALL, /* 0x96 */
|
||
LXB_URL_MAP_ALL, /* 0x97 */
|
||
LXB_URL_MAP_ALL, /* 0x98 */
|
||
LXB_URL_MAP_ALL, /* 0x99 */
|
||
LXB_URL_MAP_ALL, /* 0x9a */
|
||
LXB_URL_MAP_ALL, /* 0x9b */
|
||
LXB_URL_MAP_ALL, /* 0x9c */
|
||
LXB_URL_MAP_ALL, /* 0x9d */
|
||
LXB_URL_MAP_ALL, /* 0x9e */
|
||
LXB_URL_MAP_ALL, /* 0x9f */
|
||
LXB_URL_MAP_ALL, /* 0xa0 */
|
||
LXB_URL_MAP_ALL, /* 0xa1 */
|
||
LXB_URL_MAP_ALL, /* 0xa2 */
|
||
LXB_URL_MAP_ALL, /* 0xa3 */
|
||
LXB_URL_MAP_ALL, /* 0xa4 */
|
||
LXB_URL_MAP_ALL, /* 0xa5 */
|
||
LXB_URL_MAP_ALL, /* 0xa6 */
|
||
LXB_URL_MAP_ALL, /* 0xa7 */
|
||
LXB_URL_MAP_ALL, /* 0xa8 */
|
||
LXB_URL_MAP_ALL, /* 0xa9 */
|
||
LXB_URL_MAP_ALL, /* 0xaa */
|
||
LXB_URL_MAP_ALL, /* 0xab */
|
||
LXB_URL_MAP_ALL, /* 0xac */
|
||
LXB_URL_MAP_ALL, /* 0xad */
|
||
LXB_URL_MAP_ALL, /* 0xae */
|
||
LXB_URL_MAP_ALL, /* 0xaf */
|
||
LXB_URL_MAP_ALL, /* 0xb0 */
|
||
LXB_URL_MAP_ALL, /* 0xb1 */
|
||
LXB_URL_MAP_ALL, /* 0xb2 */
|
||
LXB_URL_MAP_ALL, /* 0xb3 */
|
||
LXB_URL_MAP_ALL, /* 0xb4 */
|
||
LXB_URL_MAP_ALL, /* 0xb5 */
|
||
LXB_URL_MAP_ALL, /* 0xb6 */
|
||
LXB_URL_MAP_ALL, /* 0xb7 */
|
||
LXB_URL_MAP_ALL, /* 0xb8 */
|
||
LXB_URL_MAP_ALL, /* 0xb9 */
|
||
LXB_URL_MAP_ALL, /* 0xba */
|
||
LXB_URL_MAP_ALL, /* 0xbb */
|
||
LXB_URL_MAP_ALL, /* 0xbc */
|
||
LXB_URL_MAP_ALL, /* 0xbd */
|
||
LXB_URL_MAP_ALL, /* 0xbe */
|
||
LXB_URL_MAP_ALL, /* 0xbf */
|
||
LXB_URL_MAP_ALL, /* 0xc0 */
|
||
LXB_URL_MAP_ALL, /* 0xc1 */
|
||
LXB_URL_MAP_ALL, /* 0xc2 */
|
||
LXB_URL_MAP_ALL, /* 0xc3 */
|
||
LXB_URL_MAP_ALL, /* 0xc4 */
|
||
LXB_URL_MAP_ALL, /* 0xc5 */
|
||
LXB_URL_MAP_ALL, /* 0xc6 */
|
||
LXB_URL_MAP_ALL, /* 0xc7 */
|
||
LXB_URL_MAP_ALL, /* 0xc8 */
|
||
LXB_URL_MAP_ALL, /* 0xc9 */
|
||
LXB_URL_MAP_ALL, /* 0xca */
|
||
LXB_URL_MAP_ALL, /* 0xcb */
|
||
LXB_URL_MAP_ALL, /* 0xcc */
|
||
LXB_URL_MAP_ALL, /* 0xcd */
|
||
LXB_URL_MAP_ALL, /* 0xce */
|
||
LXB_URL_MAP_ALL, /* 0xcf */
|
||
LXB_URL_MAP_ALL, /* 0xd0 */
|
||
LXB_URL_MAP_ALL, /* 0xd1 */
|
||
LXB_URL_MAP_ALL, /* 0xd2 */
|
||
LXB_URL_MAP_ALL, /* 0xd3 */
|
||
LXB_URL_MAP_ALL, /* 0xd4 */
|
||
LXB_URL_MAP_ALL, /* 0xd5 */
|
||
LXB_URL_MAP_ALL, /* 0xd6 */
|
||
LXB_URL_MAP_ALL, /* 0xd7 */
|
||
LXB_URL_MAP_ALL, /* 0xd8 */
|
||
LXB_URL_MAP_ALL, /* 0xd9 */
|
||
LXB_URL_MAP_ALL, /* 0xda */
|
||
LXB_URL_MAP_ALL, /* 0xdb */
|
||
LXB_URL_MAP_ALL, /* 0xdc */
|
||
LXB_URL_MAP_ALL, /* 0xdd */
|
||
LXB_URL_MAP_ALL, /* 0xde */
|
||
LXB_URL_MAP_ALL, /* 0xdf */
|
||
LXB_URL_MAP_ALL, /* 0xe0 */
|
||
LXB_URL_MAP_ALL, /* 0xe1 */
|
||
LXB_URL_MAP_ALL, /* 0xe2 */
|
||
LXB_URL_MAP_ALL, /* 0xe3 */
|
||
LXB_URL_MAP_ALL, /* 0xe4 */
|
||
LXB_URL_MAP_ALL, /* 0xe5 */
|
||
LXB_URL_MAP_ALL, /* 0xe6 */
|
||
LXB_URL_MAP_ALL, /* 0xe7 */
|
||
LXB_URL_MAP_ALL, /* 0xe8 */
|
||
LXB_URL_MAP_ALL, /* 0xe9 */
|
||
LXB_URL_MAP_ALL, /* 0xea */
|
||
LXB_URL_MAP_ALL, /* 0xeb */
|
||
LXB_URL_MAP_ALL, /* 0xec */
|
||
LXB_URL_MAP_ALL, /* 0xed */
|
||
LXB_URL_MAP_ALL, /* 0xee */
|
||
LXB_URL_MAP_ALL, /* 0xef */
|
||
LXB_URL_MAP_ALL, /* 0xf0 */
|
||
LXB_URL_MAP_ALL, /* 0xf1 */
|
||
LXB_URL_MAP_ALL, /* 0xf2 */
|
||
LXB_URL_MAP_ALL, /* 0xf3 */
|
||
LXB_URL_MAP_ALL, /* 0xf4 */
|
||
LXB_URL_MAP_ALL, /* 0xf5 */
|
||
LXB_URL_MAP_ALL, /* 0xf6 */
|
||
LXB_URL_MAP_ALL, /* 0xf7 */
|
||
LXB_URL_MAP_ALL, /* 0xf8 */
|
||
LXB_URL_MAP_ALL, /* 0xf9 */
|
||
LXB_URL_MAP_ALL, /* 0xfa */
|
||
LXB_URL_MAP_ALL, /* 0xfb */
|
||
LXB_URL_MAP_ALL, /* 0xfc */
|
||
LXB_URL_MAP_ALL, /* 0xfd */
|
||
LXB_URL_MAP_ALL, /* 0xfe */
|
||
LXB_URL_MAP_ALL, /* 0xff */
|
||
};
|
||
|
||
/*
|
||
* U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, U+0020 SPACE, U+0023 (#),
|
||
* U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?), U+0040 (@),
|
||
* U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|).
|
||
* U+0000 NULL to U+001F, U+0025 (%), or U+007F DELETE.
|
||
*/
|
||
static const lxb_char_t lxb_url_map_forbidden_domain_cp[128] =
|
||
{
|
||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
|
||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
|
||
0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||
0x1e, 0x1f, 0x20, 0xff, 0xff, 0x23, 0xff, 0x25, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2f, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3a, 0xff,
|
||
0x3c, 0xff, 0x3e, 0x3f, 0x40, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0x5b, 0x5c, 0x5d, 0x5e, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0x7c, 0xff, 0xff, 0x7f
|
||
};
|
||
|
||
/*
|
||
* U+0000 NULL, U+0009 TAB, U+000A LF, U+000D CR, U+0020 SPACE, U+0023 (#),
|
||
* U+002F (/), U+003A (:), U+003C (<), U+003E (>), U+003F (?), U+0040 (@),
|
||
* U+005B ([), U+005C (\), U+005D (]), U+005E (^), or U+007C (|).
|
||
*/
|
||
static const lxb_char_t lxb_url_map_forbidden_host_cp[128] =
|
||
{
|
||
0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x09,
|
||
0x0a, 0xff, 0xff, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0x20, 0xff, 0xff, 0x23, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2f, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3a, 0xff,
|
||
0x3c, 0xff, 0x3e, 0x3f, 0x40, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0x5b, 0x5c, 0x5d, 0x5e, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0x7c, 0xff, 0xff, 0xff
|
||
};
|
||
|
||
static const lxb_char_t lxb_url_map_num_8[] = {
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||
0x06, 0x07, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff
|
||
};
|
||
|
||
static const lxb_char_t lxb_url_codepoint_alphanumeric[0xA0] = {
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0x21, 0xff, 0xff, 0x24, 0xff, 0x26, 0x27,
|
||
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||
0x38, 0x39, 0x3a, 0x3b, 0xff, 0x3d, 0xff, 0x3f,
|
||
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
|
||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||
0x58, 0x59, 0x5a, 0xff, 0xff, 0xff, 0xff, 0x5f,
|
||
0xff, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||
0x78, 0x79, 0x7a, 0xff, 0xff, 0xff, 0x7e, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||
};
|
||
|
||
static const lxb_char_t lxb_url_path_map[256] =
|
||
{
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x01, 0x00, 0x01, 0x01, 0x00, 0x04, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x01,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x02, 0x01, 0x02, 0x02, 0x00,
|
||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||
0x00, 0x00, 0x00, 0x01, 0x02, 0x01, 0x00, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
|
||
0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02
|
||
};
|
||
|
||
static const lxb_url_scheme_data_t
|
||
lxb_url_scheme_res[LXB_URL_SCHEMEL_TYPE__LAST_ENTRY] =
|
||
{
|
||
{.name = lexbor_str("#undef"), 0, LXB_URL_SCHEMEL_TYPE__UNDEF },
|
||
{.name = lexbor_str("#unknown"), 0, LXB_URL_SCHEMEL_TYPE__UNKNOWN},
|
||
{.name = lexbor_str("http"), 80, LXB_URL_SCHEMEL_TYPE_HTTP },
|
||
{.name = lexbor_str("https"), 443, LXB_URL_SCHEMEL_TYPE_HTTPS },
|
||
{.name = lexbor_str("ws"), 80, LXB_URL_SCHEMEL_TYPE_WS },
|
||
{.name = lexbor_str("wss"), 443, LXB_URL_SCHEMEL_TYPE_WSS },
|
||
{.name = lexbor_str("ftp"), 21, LXB_URL_SCHEMEL_TYPE_FTP },
|
||
{.name = lexbor_str("file"), 0, LXB_URL_SCHEMEL_TYPE_FILE }
|
||
};
|
||
|
||
static const size_t
|
||
lxb_url_scheme_length = sizeof(lxb_url_scheme_res) / sizeof(lxb_url_scheme_data_t);
|
||
|
||
|
||
#define lxb_url_parse_return(data, buf, status) \
|
||
do { \
|
||
if ((buf) != (data)) { \
|
||
parser->buffer = (lxb_char_t *) (buf); \
|
||
} \
|
||
return (status); \
|
||
} \
|
||
while (false)
|
||
|
||
#define LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last) \
|
||
do { \
|
||
size_t new_len, offset, lst; \
|
||
lxb_char_t *tmp; \
|
||
\
|
||
lst = (last) - (sbuf_begin); \
|
||
offset = (sbuf) - (sbuf_begin); \
|
||
new_len = offset << 1; \
|
||
\
|
||
if ((sbuf_begin) == (sbuffer)) { \
|
||
tmp = lexbor_malloc(new_len); \
|
||
if (tmp == NULL) { \
|
||
return NULL; \
|
||
} \
|
||
} \
|
||
else { \
|
||
tmp = lexbor_realloc((sbuf_begin), new_len); \
|
||
if (tmp == NULL) { \
|
||
lexbor_free(sbuf_begin); \
|
||
return NULL; \
|
||
} \
|
||
} \
|
||
\
|
||
(sbuf) = tmp + offset; \
|
||
(last) = sbuf + lst; \
|
||
(sbuf_begin) = tmp; \
|
||
(sbuf_end) = tmp + new_len; \
|
||
} \
|
||
while (false)
|
||
|
||
#define lxb_url_is_windows_letter(data) \
|
||
(((data) >= 'a' && (data) <= 'z') || ((data) >= 'A' && (data) <= 'Z'))
|
||
|
||
|
||
static lxb_status_t
|
||
lxb_url_leading_trailing(lxb_url_parser_t *parser,
|
||
const lxb_char_t **data, size_t *length);
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_remove_tab_newline(lxb_url_parser_t *parser,
|
||
const lxb_char_t *data, size_t *length);
|
||
|
||
static const lxb_url_scheme_data_t *
|
||
lxb_url_scheme_find(const lxb_char_t *data, size_t length);
|
||
|
||
static lxb_status_t
|
||
lxb_url_parse_basic_h(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_url_t *base_url,
|
||
const lxb_char_t *data, size_t length,
|
||
lxb_url_state_t override_state, lxb_encoding_t encoding);
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_fast_path(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_char_t *data, const lxb_char_t *end, bool bqs);
|
||
|
||
const lxb_char_t *
|
||
lxb_url_path_slow_path(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_char_t *data, const lxb_char_t *end, bool bqs);
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_try_dot(lxb_url_t *url, const lxb_char_t **begin,
|
||
const lxb_char_t **last, const lxb_char_t **start,
|
||
const lxb_char_t *end, bool bqs);
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_dot_count(lxb_url_t *url, const lxb_char_t *p,
|
||
const lxb_char_t *end, const lxb_char_t *sbuf_begin,
|
||
lxb_char_t **sbuf, lxb_char_t **last, size_t *path_count,
|
||
bool bqs);
|
||
|
||
static void
|
||
lxb_url_path_fix_windows_drive(lxb_url_t *url, lxb_char_t *sbuf,
|
||
const lxb_char_t *last, size_t count);
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_encode_after_encoding(const lxb_char_t *data,
|
||
const lxb_char_t *end, lexbor_str_t *str,
|
||
lexbor_mraw_t *mraw,
|
||
const lxb_encoding_data_t *encoding,
|
||
lxb_url_map_type_t enmap,
|
||
bool space_as_plus);
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_encode_after_utf_8(const lxb_char_t *data,
|
||
const lxb_char_t *end, lexbor_str_t *str,
|
||
lexbor_mraw_t *mraw,
|
||
lxb_url_map_type_t enmap,
|
||
bool space_as_plus);
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, lxb_url_host_t *host,
|
||
lexbor_mraw_t *mraw, lxb_url_host_opt_t opt);
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_idna_cb(const lxb_char_t *data, size_t len, void *ctx);
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, uint32_t *ipv6);
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_number_parse(const lxb_char_t *data,
|
||
const lxb_char_t *end, uint64_t *num);
|
||
|
||
static bool
|
||
lxb_url_is_ipv4(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end);
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv6_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, uint16_t *ipv6);
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_in_ipv6_parse(lxb_url_parser_t *parser, const lxb_char_t **data,
|
||
const lxb_char_t *end, uint16_t **pieces);
|
||
|
||
static lxb_status_t
|
||
lxb_url_opaque_host_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, lxb_url_host_t *host,
|
||
lexbor_mraw_t *mraw);
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_decode(const lxb_char_t *data, const lxb_char_t *end,
|
||
lexbor_str_t *str, lexbor_mraw_t *mraw,
|
||
lxb_url_host_opt_t *opt);
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_decode_plus(const lxb_char_t *data, const lxb_char_t *end,
|
||
lexbor_str_t *str, lexbor_mraw_t *mraw);
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_part_by_index(const lxb_url_t *url, size_t index,
|
||
size_t *out_length);
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_set_h(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *host, size_t length,
|
||
lxb_url_state_t override_state);
|
||
|
||
static lxb_status_t
|
||
lxb_url_search_params_parse(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *query, size_t length);
|
||
|
||
|
||
lxb_url_parser_t *
|
||
lxb_url_parser_create(void)
|
||
{
|
||
return lexbor_calloc(1, sizeof(lxb_url_parser_t));
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_parser_init(lxb_url_parser_t *parser, lexbor_mraw_t *mraw)
|
||
{
|
||
bool itmy;
|
||
lxb_status_t status;
|
||
|
||
if (parser == NULL) {
|
||
return LXB_STATUS_ERROR_OBJECT_IS_NULL;
|
||
}
|
||
|
||
itmy = false;
|
||
|
||
if (mraw == NULL) {
|
||
mraw = lexbor_mraw_create();
|
||
status = lexbor_mraw_init(mraw, LXB_URL_BUFFER_SIZE);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
itmy = true;
|
||
}
|
||
|
||
parser->mraw = mraw;
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
|
||
return LXB_STATUS_OK;
|
||
|
||
failed:
|
||
|
||
if (itmy) {
|
||
(void) lexbor_mraw_destroy(mraw, true);
|
||
}
|
||
|
||
memset(parser, 0x00, sizeof(lxb_url_parser_t));
|
||
|
||
return status;
|
||
}
|
||
|
||
void
|
||
lxb_url_parser_clean(lxb_url_parser_t *parser)
|
||
{
|
||
parser->url = NULL;
|
||
|
||
if (parser->log != NULL) {
|
||
lexbor_plog_clean(parser->log);
|
||
}
|
||
|
||
if (parser->buffer != NULL) {
|
||
parser->buffer = lexbor_free(parser->buffer);
|
||
}
|
||
}
|
||
|
||
lxb_url_parser_t *
|
||
lxb_url_parser_destroy(lxb_url_parser_t *parser, bool destroy_self)
|
||
{
|
||
if (parser == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
parser->log = lexbor_plog_destroy(parser->log, true);
|
||
parser->idna = lxb_unicode_idna_destroy(parser->idna, true);
|
||
|
||
if (parser->buffer != NULL) {
|
||
parser->buffer = lexbor_free(parser->buffer);
|
||
}
|
||
|
||
if (destroy_self) {
|
||
return lexbor_free(parser);
|
||
}
|
||
|
||
return parser;
|
||
}
|
||
|
||
void
|
||
lxb_url_parser_memory_destroy(lxb_url_parser_t *parser)
|
||
{
|
||
parser->mraw = lexbor_mraw_destroy(parser->mraw, true);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_log_append(lxb_url_parser_t *parser, const lxb_char_t *pos,
|
||
lxb_url_error_type_t type)
|
||
{
|
||
void *entry;
|
||
lxb_status_t status;
|
||
|
||
if (parser->log == NULL) {
|
||
parser->log = lexbor_plog_create();
|
||
status = lexbor_plog_init(parser->log, 5, sizeof(lexbor_plog_entry_t));
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
entry = lexbor_plog_push(parser->log, pos, NULL, type);
|
||
if (entry == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_str_init(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t length)
|
||
{
|
||
size_t size;
|
||
const lxb_char_t *p;
|
||
|
||
if (str->data == NULL) {
|
||
p = lexbor_str_init(str, mraw, length);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
else {
|
||
size = str->length + length;
|
||
|
||
if (size > lexbor_str_size(str)) {
|
||
p = lexbor_str_realloc(str, mraw, size);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_str_copy(const lexbor_str_t *src, lexbor_str_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
if (src->data == NULL) {
|
||
*dst = *src;
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (dst->data != NULL) {
|
||
if (dst->length >= src->length) {
|
||
|
||
/* +1 == '\0' */
|
||
memcpy(dst->data, src->data, src->length + 1);
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
(void) lexbor_str_destroy(dst, dst_mraw, false);
|
||
}
|
||
|
||
(void) lexbor_str_init_append(dst, dst_mraw,
|
||
src->data, src->length);
|
||
|
||
return (dst->data != NULL) ? LXB_STATUS_OK
|
||
: LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_is_noncharacter(lxb_codepoint_t cp)
|
||
{
|
||
if (cp >= 0xFDD0 && cp <= 0xFDEF) {
|
||
return true;
|
||
}
|
||
|
||
switch (cp) {
|
||
case 0xFFFE:
|
||
case 0xFFFF:
|
||
case 0x1FFFE:
|
||
case 0x1FFFF:
|
||
case 0x2FFFE:
|
||
case 0x2FFFF:
|
||
case 0x3FFFE:
|
||
case 0x3FFFF:
|
||
case 0x4FFFE:
|
||
case 0x4FFFF:
|
||
case 0x5FFFE:
|
||
case 0x5FFFF:
|
||
case 0x6FFFE:
|
||
case 0x6FFFF:
|
||
case 0x7FFFE:
|
||
case 0x7FFFF:
|
||
case 0x8FFFE:
|
||
case 0x8FFFF:
|
||
case 0x9FFFE:
|
||
case 0x9FFFF:
|
||
case 0xAFFFE:
|
||
case 0xAFFFF:
|
||
case 0xBFFFE:
|
||
case 0xBFFFF:
|
||
case 0xCFFFE:
|
||
case 0xCFFFF:
|
||
case 0xDFFFE:
|
||
case 0xDFFFF:
|
||
case 0xEFFFE:
|
||
case 0xEFFFF:
|
||
case 0xFFFFE:
|
||
case 0xFFFFF:
|
||
case 0x10FFFE:
|
||
case 0x10FFFF:
|
||
return true;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_is_url_codepoint(lxb_codepoint_t cp)
|
||
{
|
||
if (cp >= 0x00A0 && cp <= 0x1FFFFF) {
|
||
/* Leading and trailing surrogate. */
|
||
if ((cp >= 0xD800 && cp <= 0xDFFF)) {
|
||
return false;
|
||
}
|
||
|
||
return !(cp > 0x10FFFF || lxb_url_is_noncharacter(cp));
|
||
}
|
||
|
||
return lxb_url_codepoint_alphanumeric[(lxb_char_t) cp] != 0xFF;
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_is_special(const lxb_url_t *url)
|
||
{
|
||
return url->scheme.type != LXB_URL_SCHEMEL_TYPE__UNKNOWN;
|
||
}
|
||
|
||
lxb_inline const lxb_url_scheme_data_t *
|
||
lxb_url_scheme_by_type(lxb_url_scheme_type_t type)
|
||
{
|
||
return &lxb_url_scheme_res[type];
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_scheme_is_special(const lxb_url_scheme_data_t *scheme)
|
||
{
|
||
return scheme->type != LXB_URL_SCHEMEL_TYPE__UNKNOWN;
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_scheme_is_equal(const lxb_url_scheme_t *first,
|
||
const lxb_url_scheme_t *second)
|
||
{
|
||
if (first->type != second->type) {
|
||
return false;
|
||
}
|
||
|
||
if (first->type == LXB_URL_SCHEMEL_TYPE__UNKNOWN) {
|
||
if (first->name.length != second->name.length) {
|
||
return false;
|
||
}
|
||
|
||
return memcmp(first->name.data, second->name.data,
|
||
first->name.length) == 0;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_scheme_equal_port(lxb_url_scheme_type_t type, uint16_t port)
|
||
{
|
||
return lxb_url_scheme_res[type].port == port;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_scheme_copy(const lxb_url_scheme_t *src, lxb_url_scheme_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
dst->type = src->type;
|
||
|
||
return lxb_url_str_copy(&src->name, &dst->name, dst_mraw);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_scheme_copy_special(const lxb_url_scheme_data_t *src,
|
||
lxb_url_scheme_t *dst, lexbor_mraw_t *dst_mraw)
|
||
{
|
||
dst->type = src->type;
|
||
|
||
return lxb_url_str_copy(&src->name, &dst->name, dst_mraw);
|
||
}
|
||
|
||
static void
|
||
lxb_url_path_set_null(lxb_url_t *url)
|
||
{
|
||
if (url->path.str.data == NULL) {
|
||
return;
|
||
}
|
||
|
||
(void) lexbor_str_destroy(&url->path.str, url->mraw, false);
|
||
|
||
url->path.str.length = 0;
|
||
url->path.length = 0;
|
||
url->path.opaque = false;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_copy(const lxb_url_t *src, lxb_url_t *dst)
|
||
{
|
||
lexbor_str_t *to;
|
||
const lexbor_str_t *str;
|
||
|
||
if (dst->path.str.data != NULL) {
|
||
lxb_url_path_set_null(dst);
|
||
}
|
||
|
||
dst->path.opaque = src->path.opaque;
|
||
|
||
if (src->path.str.data == NULL) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
dst->path.length = src->path.length;
|
||
|
||
str = &src->path.str;
|
||
to = &dst->path.str;
|
||
|
||
to->data = lexbor_mraw_alloc(dst->mraw, (str->length + 1));
|
||
if (to->data == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
/* +1 == '\0' */
|
||
memcpy(to->data, str->data, str->length + 1);
|
||
|
||
to->length = str->length;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static void
|
||
lxb_url_path_shorten(lxb_url_t *url)
|
||
{
|
||
lexbor_str_t *str;
|
||
lxb_char_t *p, *begin;
|
||
|
||
str = &url->path.str;
|
||
|
||
if (url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE
|
||
&& url->path.length == 1)
|
||
{
|
||
if (str->length >= 3
|
||
&& lxb_url_is_windows_letter(str->data[1])
|
||
&& str->data[2] == ':')
|
||
{
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (url->path.str.data != NULL) {
|
||
url->path.length -= 1;
|
||
|
||
begin = str->data;
|
||
p = begin + str->length;
|
||
|
||
while (p > begin) {
|
||
p -= 1;
|
||
|
||
if (*p == '/') {
|
||
*p = '\0';
|
||
break;
|
||
}
|
||
}
|
||
|
||
str->length = p - begin;
|
||
}
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_append_wo_slash(lxb_url_t *url,
|
||
const lxb_char_t *data, size_t length)
|
||
{
|
||
lxb_char_t *p;
|
||
|
||
if (url->path.str.data == NULL) {
|
||
p = lexbor_str_init(&url->path.str, url->mraw, length);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
p = lexbor_str_append(&url->path.str, url->mraw, data, length);
|
||
|
||
return (p != NULL) ? LXB_STATUS_OK : LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_append(lxb_url_t *url, const lxb_char_t *data, size_t length)
|
||
{
|
||
size_t len;
|
||
lxb_char_t *p;
|
||
lexbor_str_t *str;
|
||
|
||
str = &url->path.str;
|
||
|
||
if (str->data == NULL) {
|
||
p = lexbor_str_init(str, url->mraw, length + 1);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
len = str->length;
|
||
str->length += 1;
|
||
|
||
p = lexbor_str_append(&url->path.str, url->mraw, data, length);
|
||
|
||
str->data[len] = '/';
|
||
|
||
return (p != NULL) ? LXB_STATUS_OK : LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_list_push(lxb_url_t *url, lexbor_str_t *data)
|
||
{
|
||
return lxb_url_path_append_wo_slash(url, data->data, data->length);
|
||
}
|
||
|
||
lxb_inline lxb_status_t
|
||
lxb_url_query_copy(const lexbor_str_t *src, lexbor_str_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
return lxb_url_str_copy(src, dst, dst_mraw);
|
||
}
|
||
|
||
lxb_inline lxb_status_t
|
||
lxb_url_username_copy(const lexbor_str_t *src, lexbor_str_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
return lxb_url_str_copy(src, dst, dst_mraw);
|
||
}
|
||
|
||
lxb_inline lxb_status_t
|
||
lxb_url_password_copy(const lexbor_str_t *src, lexbor_str_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
return lxb_url_str_copy(src, dst, dst_mraw);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_copy(const lxb_url_host_t *src, lxb_url_host_t *dst,
|
||
lexbor_mraw_t *dst_mraw)
|
||
{
|
||
if (dst->type != LXB_URL_HOST_TYPE__UNDEF) {
|
||
if (src->type == LXB_URL_HOST_TYPE__UNDEF) {
|
||
if (dst->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
(void) lexbor_str_destroy(&dst->u.domain, dst_mraw, false);
|
||
}
|
||
|
||
dst->type = LXB_URL_HOST_TYPE__UNDEF;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (dst->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
if (src->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
dst->type = src->type;
|
||
|
||
return lxb_url_str_copy(&src->u.domain,
|
||
&dst->u.domain, dst_mraw);
|
||
}
|
||
|
||
(void) lexbor_str_destroy(&dst->u.domain, dst_mraw, false);
|
||
}
|
||
}
|
||
|
||
if (src->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
dst->type = src->type;
|
||
|
||
if (src->type == LXB_URL_HOST_TYPE__UNDEF) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
return lxb_url_str_copy(&src->u.domain,
|
||
&dst->u.domain, dst_mraw);
|
||
}
|
||
|
||
if (src->type == LXB_URL_HOST_TYPE_IPV6) {
|
||
memcpy(dst->u.ipv6, src->u.ipv6, sizeof(src->u.ipv6));
|
||
}
|
||
else {
|
||
dst->u.ipv4 = src->u.ipv4;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_inline void
|
||
lxb_url_host_destroy(lxb_url_host_t *host, lexbor_mraw_t *mraw)
|
||
{
|
||
if (host->type != LXB_URL_HOST_TYPE__UNDEF) {
|
||
if (host->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
(void) lexbor_str_destroy(&host->u.domain, mraw, false);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
lxb_url_host_set_empty(lxb_url_host_t *host, lexbor_mraw_t *mraw)
|
||
{
|
||
lxb_url_host_destroy(host, mraw);
|
||
|
||
host->type = LXB_URL_HOST_TYPE_EMPTY;
|
||
}
|
||
|
||
static bool
|
||
lxb_url_host_eq(lxb_url_host_t *host, const lxb_char_t *data, size_t length)
|
||
{
|
||
lexbor_str_t *str;
|
||
|
||
if (host->type != LXB_URL_HOST_TYPE__UNDEF) {
|
||
if (host->type <= LXB_URL_HOST_TYPE_OPAQUE) {
|
||
str = &host->u.domain;
|
||
|
||
return str->length == length
|
||
&& memcmp(data, str->data, length) == 0;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
lxb_inline void
|
||
lxb_url_port_set(lxb_url_t *url, uint16_t port)
|
||
{
|
||
url->port = port;
|
||
url->has_port = true;
|
||
}
|
||
|
||
static void
|
||
lxb_url_fragment_set_null(lxb_url_t *url)
|
||
{
|
||
if (url->fragment.data != NULL) {
|
||
(void) lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
}
|
||
}
|
||
|
||
lxb_inline bool
|
||
lxb_url_includes_credentials(const lxb_url_t *url)
|
||
{
|
||
return url->username.length != 0 || url->password.length != 0;
|
||
}
|
||
|
||
lxb_inline void
|
||
lxb_url_encoding_init(const lxb_encoding_data_t *encoding,
|
||
lxb_encoding_encode_t *encode)
|
||
{
|
||
(void) lxb_encoding_encode_init_single(encode, encoding);
|
||
}
|
||
|
||
static bool
|
||
lxb_url_start_windows_drive_letter(const lxb_char_t *data,
|
||
const lxb_char_t *end)
|
||
{
|
||
size_t length = end - data;
|
||
|
||
if (length < 2) {
|
||
return false;
|
||
}
|
||
|
||
if (!lxb_url_is_windows_letter(data[0])
|
||
|| (data[1] != ':' && data[1] != '|'))
|
||
{
|
||
return false;
|
||
}
|
||
|
||
if (length > 2 && !( data[2] == '/' || data[2] == '\\'
|
||
|| data[2] == '?' || data[2] == '#'))
|
||
{
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
static bool
|
||
lxb_url_windows_drive_letter(const lxb_char_t *data, const lxb_char_t *end)
|
||
{
|
||
size_t length = end - data;
|
||
|
||
if (length < 2) {
|
||
return false;
|
||
}
|
||
|
||
return lxb_url_is_windows_letter(data[0])
|
||
&& (data[1] == ':' || data[1] == '|');
|
||
}
|
||
|
||
static bool
|
||
lxb_url_normalized_windows_drive_letter(const lxb_char_t *data,
|
||
const lxb_char_t *end)
|
||
{
|
||
size_t length = end - data;
|
||
|
||
if (length < 2) {
|
||
return false;
|
||
}
|
||
|
||
return lxb_url_is_windows_letter(data[0]) && data[1] == ':';
|
||
}
|
||
|
||
static bool
|
||
lxb_url_cannot_have_user_pass_port(lxb_url_t *url)
|
||
{
|
||
return url->host.type == LXB_URL_HOST_TYPE_EMPTY
|
||
|| url->host.type == LXB_URL_HOST_TYPE__UNDEF
|
||
|| url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE;
|
||
}
|
||
|
||
lxb_url_t *
|
||
lxb_url_parse(lxb_url_parser_t *parser, const lxb_url_t *base_url,
|
||
const lxb_char_t *data, size_t length)
|
||
{
|
||
(void) lxb_url_parse_basic(parser, NULL, base_url, data, length,
|
||
LXB_URL_STATE__UNDEF, LXB_ENCODING_AUTO);
|
||
return parser->url;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_parse_basic(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_url_t *base_url,
|
||
const lxb_char_t *data, size_t length,
|
||
lxb_url_state_t override_state, lxb_encoding_t encoding)
|
||
{
|
||
lxb_status_t status;
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, base_url, data,
|
||
length, override_state, encoding);
|
||
if (status != LXB_STATUS_OK) {
|
||
if (parser->url != url) {
|
||
parser->url = lxb_url_destroy(parser->url);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_parse_basic_h(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_url_t *base_url,
|
||
const lxb_char_t *data, size_t length,
|
||
lxb_url_state_t override_state, lxb_encoding_t encoding)
|
||
{
|
||
bool at_sign, inside_bracket;
|
||
size_t len;
|
||
uint32_t port;
|
||
lxb_status_t status;
|
||
lexbor_str_t tmp_str;
|
||
lxb_url_state_t state;
|
||
const lxb_char_t *p, *begin, *end, *tmp, *pswd, *buf, *orig_data;
|
||
lxb_char_t c;
|
||
lxb_codepoint_t cp;
|
||
lxb_url_map_type_t map_type;
|
||
const lxb_url_scheme_data_t *schm;
|
||
const lxb_encoding_data_t *enc;
|
||
lxb_url_host_opt_t opt;
|
||
|
||
static const lexbor_str_t mp_str = lexbor_str("");
|
||
static const lexbor_str_t lh_str = lexbor_str("localhost");
|
||
|
||
if (url == NULL) {
|
||
url = lexbor_mraw_calloc(parser->mraw, sizeof(lxb_url_t));
|
||
if (url == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
url->mraw = parser->mraw;
|
||
|
||
status = lxb_url_leading_trailing(parser, &data, &length);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
parser->url = url;
|
||
orig_data = data;
|
||
|
||
buf = lxb_url_remove_tab_newline(parser, data, &length);
|
||
if (buf != data) {
|
||
if (buf == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
data = buf;
|
||
}
|
||
|
||
state = LXB_URL_STATE_SCHEME_START_STATE;
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
state = override_state;
|
||
}
|
||
|
||
if (encoding <= LXB_ENCODING_UNDEFINED
|
||
|| encoding == LXB_ENCODING_UTF_16BE
|
||
|| encoding == LXB_ENCODING_UTF_16LE)
|
||
{
|
||
encoding = LXB_ENCODING_UTF_8;
|
||
}
|
||
|
||
enc = lxb_encoding_data(encoding);
|
||
if (enc == NULL) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_WRONG_ARGS);
|
||
}
|
||
|
||
inside_bracket = false;
|
||
|
||
p = data;
|
||
end = data + length;
|
||
|
||
/* And go. */
|
||
|
||
schm = lxb_url_scheme_by_type(LXB_URL_SCHEMEL_TYPE__UNDEF);
|
||
|
||
again:
|
||
|
||
switch (state) {
|
||
case LXB_URL_STATE_SCHEME_START_STATE:
|
||
if (p >= end || lexbor_str_res_alpha_character[*p] == 0xff) {
|
||
if (override_state == LXB_URL_STATE__UNDEF) {
|
||
state = LXB_URL_STATE_NO_SCHEME_STATE;
|
||
goto again;
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
|
||
/* Fall through. */
|
||
|
||
case LXB_URL_STATE_SCHEME_STATE:
|
||
do {
|
||
p++;
|
||
}
|
||
while (p < end && (lexbor_str_res_alphanumeric_character[*p] != 0xff
|
||
|| *p == '+' || *p == '-' || *p == '.'));
|
||
|
||
if (p >= end || *p != ':') {
|
||
if (override_state == LXB_URL_STATE__UNDEF) {
|
||
p = data;
|
||
|
||
state = LXB_URL_STATE_NO_SCHEME_STATE;
|
||
goto again;
|
||
}
|
||
else if (p < end || override_state != LXB_URL_STATE_SCHEME_START_STATE) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
}
|
||
|
||
schm = lxb_url_scheme_find(data, p - data);
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
if (lxb_url_is_special(url) != lxb_url_scheme_is_special(schm)) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
if (url->has_port || lxb_url_includes_credentials(url)) {
|
||
if (schm->type == LXB_URL_SCHEMEL_TYPE_FILE) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
}
|
||
|
||
if (url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE
|
||
&& url->host.type == LXB_URL_HOST_TYPE_EMPTY)
|
||
{
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
}
|
||
|
||
url->scheme.type = schm->type;
|
||
url->scheme.name.length = 0;
|
||
|
||
status = lxb_url_str_init(&url->scheme.name, url->mraw, p - data);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
||
}
|
||
|
||
(void) lexbor_str_append_lowercase(&url->scheme.name, url->mraw,
|
||
data, p - data);
|
||
|
||
p += 1;
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
if (url->has_port && url->port == schm->port) {
|
||
url->port = 0;
|
||
url->has_port = false;
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
if (schm->type == LXB_URL_SCHEMEL_TYPE_FILE) {
|
||
if (end - p < 2 || p[0] != '/' || p[1] != '/') {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_SPECIAL_SCHEME_MISSING_FOLLOWING_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
|
||
state = LXB_URL_STATE_FILE_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (lxb_url_scheme_is_special(schm)) {
|
||
if (base_url != NULL
|
||
&& lxb_url_scheme_is_equal(&url->scheme, &base_url->scheme))
|
||
{
|
||
state = LXB_URL_STATE_SPECIAL_RELATIVE_OR_AUTHORITY_STATE;
|
||
}
|
||
else {
|
||
state = LXB_URL_STATE_SPECIAL_AUTHORITY_SLASHES_STATE;
|
||
}
|
||
|
||
goto again;
|
||
}
|
||
|
||
if (p < end && *p == '/') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_PATH_OR_AUTHORITY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
lxb_url_path_set_null(url);
|
||
|
||
state = LXB_URL_STATE_OPAQUE_PATH_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_NO_SCHEME_STATE:
|
||
if (base_url == NULL) {
|
||
goto failed_non_relative_url;
|
||
}
|
||
|
||
if (base_url->path.opaque) {
|
||
if (p >= end || *p != '#') {
|
||
goto failed_non_relative_url;
|
||
}
|
||
|
||
p += 1;
|
||
|
||
status = lxb_url_scheme_copy(&base_url->scheme,
|
||
&url->scheme, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_path_copy(base_url, url);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_query_copy(&base_url->query, &url->query,
|
||
url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_fragment_set_null(url);
|
||
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (base_url->scheme.type != LXB_URL_SCHEMEL_TYPE_FILE) {
|
||
state = LXB_URL_STATE_RELATIVE_STATE;
|
||
goto again;
|
||
}
|
||
|
||
state = LXB_URL_STATE_FILE_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_SPECIAL_RELATIVE_OR_AUTHORITY_STATE:
|
||
if (end - p > 1 && p[0] == '/' && p[1] == '/') {
|
||
p += 2;
|
||
state = LXB_URL_STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE;
|
||
goto again;
|
||
}
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_SPECIAL_SCHEME_MISSING_FOLLOWING_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
state = LXB_URL_STATE_RELATIVE_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_PATH_OR_AUTHORITY_STATE:
|
||
if (p < end && *p == '/') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_AUTHORITY_STATE;
|
||
}
|
||
else {
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
}
|
||
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_RELATIVE_STATE:
|
||
status = lxb_url_scheme_copy(&base_url->scheme, &url->scheme, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (end - p >= 1) {
|
||
if (*p == '/') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_RELATIVE_SLASH_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (lxb_url_is_special(url) && *p == '\\') {
|
||
p += 1;
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
state = LXB_URL_STATE_RELATIVE_SLASH_STATE;
|
||
goto again;
|
||
}
|
||
}
|
||
|
||
status = lxb_url_username_copy(&base_url->username, &url->username,
|
||
url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_password_copy(&base_url->password, &url->password,
|
||
url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_host_copy(&base_url->host, &url->host, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (base_url->has_port) {
|
||
lxb_url_port_set(url, base_url->port);
|
||
}
|
||
|
||
status = lxb_url_path_copy(base_url, url);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_query_copy(&base_url->query, &url->query, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (end - p == 0) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
if (*p == '?') {
|
||
p += 1;
|
||
|
||
(void) lexbor_str_destroy(&url->query, url->mraw, false);
|
||
|
||
state = LXB_URL_STATE_QUERY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (*p == '#') {
|
||
p += 1;
|
||
|
||
(void) lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
goto again;
|
||
}
|
||
|
||
(void) lexbor_str_destroy(&url->query, url->mraw, false);
|
||
|
||
lxb_url_path_shorten(url);
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_RELATIVE_SLASH_STATE:
|
||
c = (end - p >= 1) ? *p : '\0';
|
||
|
||
if (lxb_url_is_special(url) && (c == '/' || c == '\\')) {
|
||
if (c == '\\') {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
|
||
p += 1;
|
||
state = LXB_URL_STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (c == '/') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_AUTHORITY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
status = lxb_url_username_copy(&base_url->username, &url->username,
|
||
url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_password_copy(&base_url->password, &url->password,
|
||
url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_host_copy(&base_url->host, &url->host, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (base_url->has_port) {
|
||
lxb_url_port_set(url, base_url->port);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_SPECIAL_AUTHORITY_SLASHES_STATE:
|
||
state = LXB_URL_STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE;
|
||
|
||
if (end - p > 1 && p[0] == '/' && p[1] == '/') {
|
||
p += 2;
|
||
}
|
||
else {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_SPECIAL_SCHEME_MISSING_FOLLOWING_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_SPECIAL_AUTHORITY_IGNORE_SLASHES_STATE:
|
||
if (p >= end || (*p != '/' && *p != '\\')) {
|
||
state = LXB_URL_STATE_AUTHORITY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_SPECIAL_SCHEME_MISSING_FOLLOWING_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
p += 1;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_AUTHORITY_STATE:
|
||
begin = p;
|
||
pswd = NULL;
|
||
at_sign = false;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
switch (c) {
|
||
case '@':
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_CREDENTIALS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (p == begin) {
|
||
at_sign = true;
|
||
break;
|
||
}
|
||
|
||
if (pswd == NULL || !at_sign) {
|
||
tmp = (pswd != NULL) ? pswd - 1 : p;
|
||
|
||
if (tmp > begin) {
|
||
status = lxb_url_percent_encode_after_utf_8(begin, tmp,
|
||
&url->username, url->mraw,
|
||
LXB_URL_MAP_USERINFO, false);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
}
|
||
|
||
if (pswd != NULL && p > pswd) {
|
||
status = lxb_url_percent_encode_after_utf_8(pswd, p,
|
||
&url->password, url->mraw,
|
||
LXB_URL_MAP_USERINFO, false);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
pswd = p;
|
||
}
|
||
|
||
at_sign = true;
|
||
begin = p;
|
||
|
||
break;
|
||
|
||
case ':':
|
||
if (pswd == NULL) {
|
||
pswd = p + 1;
|
||
}
|
||
|
||
break;
|
||
|
||
case '/':
|
||
case '\\':
|
||
case '?':
|
||
case '#':
|
||
if (c == '\\') {
|
||
if (!lxb_url_is_special(url)) {
|
||
p += 1;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
goto authority_done;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
authority_done:
|
||
|
||
if (at_sign) {
|
||
if (begin == p || begin == p - 1) {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_CREDENTIALS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
|
||
/* Skip '@'. */
|
||
begin += 1;
|
||
}
|
||
|
||
p = begin;
|
||
|
||
state = LXB_URL_STATE_HOST_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_HOST_STATE:
|
||
case LXB_URL_STATE_HOSTNAME_STATE:
|
||
begin = p;
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF
|
||
&& url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE)
|
||
{
|
||
p -= 1;
|
||
state = LXB_URL_STATE_FILE_HOST_STATE;
|
||
goto again;
|
||
}
|
||
|
||
opt = !lxb_url_is_special(url);
|
||
|
||
for (; p < end; p++) {
|
||
c = *p;
|
||
|
||
switch (c) {
|
||
case '/':
|
||
case '?':
|
||
case '#':
|
||
goto host_done;
|
||
|
||
case '\\':
|
||
if (!lxb_url_is_special(url)) {
|
||
break;
|
||
}
|
||
|
||
goto host_done;
|
||
|
||
case ':':
|
||
if (inside_bracket) {
|
||
break;
|
||
}
|
||
|
||
if (p == begin) {
|
||
goto failed_host;
|
||
}
|
||
|
||
if (override_state == LXB_URL_STATE_HOSTNAME_STATE) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR);
|
||
}
|
||
|
||
status = lxb_url_host_parse(parser, begin, p, &url->host,
|
||
url->mraw, opt);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
p += 1;
|
||
|
||
state = LXB_URL_STATE_PORT_STATE;
|
||
goto again;
|
||
|
||
case '[':
|
||
inside_bracket = true;
|
||
break;
|
||
|
||
case ']':
|
||
inside_bracket = false;
|
||
break;
|
||
|
||
case '%':
|
||
opt |= LXB_URL_HOST_OPT_DECODE;
|
||
break;
|
||
|
||
case 'X':
|
||
case 'x':
|
||
if (p + 4 <= end && (p[1] == 'n' || p[1] == 'N')
|
||
&& p[2] == '-' && p[3] == '-')
|
||
{
|
||
opt |= LXB_URL_HOST_OPT_IDNA;
|
||
p += 3;
|
||
}
|
||
|
||
break;
|
||
|
||
default:
|
||
if (c >= 0x80) {
|
||
opt |= LXB_URL_HOST_OPT_IDNA;
|
||
}
|
||
|
||
break;
|
||
}
|
||
}
|
||
|
||
host_done:
|
||
|
||
if (begin == p && lxb_url_is_special(url)) {
|
||
goto failed_host;
|
||
}
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF && begin == p
|
||
&& (lxb_url_includes_credentials(url) || url->has_port))
|
||
{
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
status = lxb_url_host_parse(parser, begin, p, &url->host,
|
||
url->mraw, opt);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_START_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_PORT_STATE:
|
||
begin = p;
|
||
|
||
while (true) {
|
||
c = (p < end) ? *p : '\0';
|
||
|
||
if (c >= '0' && c <= '9') {
|
||
p += 1;
|
||
continue;
|
||
}
|
||
|
||
if (p >= end || c == '/' || c == '?' || c == '#'
|
||
|| (lxb_url_is_special(url) && c == '\\')
|
||
|| override_state != LXB_URL_STATE__UNDEF)
|
||
{
|
||
if (begin == p) {
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_START_STATE;
|
||
goto again;
|
||
}
|
||
|
||
port = 0;
|
||
|
||
while (begin < p) {
|
||
port = lexbor_str_res_map_num[*begin++] + port * 10;
|
||
|
||
if (port > 65535) {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_PORT_OUT_OF_RANGE);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf,
|
||
LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
}
|
||
|
||
if (!lxb_url_is_special(url)
|
||
|| !lxb_url_scheme_equal_port(url->scheme.type, port))
|
||
{
|
||
url->port = port;
|
||
url->has_port = true;
|
||
}
|
||
else {
|
||
url->port = 0;
|
||
url->has_port = false;
|
||
}
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_START_STATE;
|
||
goto again;
|
||
}
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_PORT_INVALID);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
|
||
break;
|
||
|
||
case LXB_URL_STATE_FILE_STATE:
|
||
schm = lxb_url_scheme_by_type(LXB_URL_SCHEMEL_TYPE_FILE);
|
||
|
||
status = lxb_url_scheme_copy_special(schm, &url->scheme, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_host_set_empty(&url->host, url->mraw);
|
||
|
||
c = (p < end) ? *p : '\0';
|
||
|
||
if (c == '/' || c == '\\') {
|
||
if (c == '\\') {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
|
||
p += 1;
|
||
|
||
state = LXB_URL_STATE_FILE_SLASH_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (base_url != NULL
|
||
&& base_url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE)
|
||
{
|
||
status = lxb_url_host_copy(&base_url->host, &url->host, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_path_copy(base_url, url);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_query_copy(&base_url->query, &url->query, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (c == '?') {
|
||
p += 1;
|
||
|
||
(void) lexbor_str_destroy(&url->query, url->mraw, false);
|
||
|
||
state = LXB_URL_STATE_QUERY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (c == '#') {
|
||
p += 1;
|
||
|
||
(void) lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (p >= end) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
(void) lexbor_str_destroy(&url->query, url->mraw, false);
|
||
|
||
if (!lxb_url_start_windows_drive_letter(p, end)) {
|
||
lxb_url_path_shorten(url);
|
||
}
|
||
else {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_FILE_INVALID_WINDOWS_DRIVE_LETTER);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_path_set_null(url);
|
||
url->path.opaque = true;
|
||
}
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_FILE_SLASH_STATE:
|
||
c = (p < end) ? *p : '\0';
|
||
|
||
if (c == '/' || c == '\\') {
|
||
if (c == '\\') {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
|
||
p += 1;
|
||
|
||
state = LXB_URL_STATE_FILE_HOST_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (base_url != NULL
|
||
&& base_url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE)
|
||
{
|
||
status = lxb_url_host_copy(&base_url->host, &url->host, url->mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (!lxb_url_start_windows_drive_letter(p, end)
|
||
&& !base_url->path.opaque && base_url->path.length >= 1)
|
||
{
|
||
tmp = lxb_url_path_part_by_index(base_url,
|
||
base_url->path.str.data[0] == '/', &len);
|
||
|
||
if (tmp != NULL
|
||
&& lxb_url_normalized_windows_drive_letter(tmp, tmp + len))
|
||
{
|
||
len = (tmp + len) - base_url->path.str.data;
|
||
|
||
status = lxb_url_path_append_wo_slash(url,
|
||
base_url->path.str.data, len);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_FILE_HOST_STATE:
|
||
begin = p;
|
||
opt = !lxb_url_is_special(url);
|
||
|
||
while (true) {
|
||
if (p < end) {
|
||
switch (*p) {
|
||
case '/':
|
||
case '\\':
|
||
case '?':
|
||
case '#':
|
||
break;
|
||
|
||
case '%':
|
||
p += 1;
|
||
opt |= LXB_URL_HOST_OPT_DECODE;
|
||
continue;
|
||
|
||
case 'X':
|
||
case 'x':
|
||
if (p + 4 <= end && (p[1] == 'n' || p[1] == 'N')
|
||
&& p[2] == '-' && p[3] == '-')
|
||
{
|
||
opt |= LXB_URL_HOST_OPT_IDNA;
|
||
p += 3;
|
||
}
|
||
|
||
p += 1;
|
||
continue;
|
||
|
||
default:
|
||
if (*p >= 0x80) {
|
||
opt |= LXB_URL_HOST_OPT_IDNA;
|
||
}
|
||
|
||
p += 1;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if (override_state == LXB_URL_STATE__UNDEF && p - begin == 2
|
||
&& lxb_url_windows_drive_letter(begin, p))
|
||
{
|
||
status = lxb_url_log_append(parser, begin,
|
||
LXB_URL_ERROR_TYPE_FILE_INVALID_WINDOWS_DRIVE_LETTER_HOST);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
p = begin;
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (p == begin) {
|
||
lxb_url_host_set_empty(&url->host, url->mraw);
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_START_STATE;
|
||
goto again;
|
||
}
|
||
|
||
status = lxb_url_host_parse(parser, begin, p, &url->host,
|
||
url->mraw, opt);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (lxb_url_host_eq(&url->host, lh_str.data, lh_str.length)) {
|
||
lxb_url_host_set_empty(&url->host, url->mraw);
|
||
}
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_START_STATE;
|
||
goto again;
|
||
}
|
||
|
||
break;
|
||
|
||
case LXB_URL_STATE_PATH_START_STATE:
|
||
c = (p < end) ? *p : '\0';
|
||
|
||
if (lxb_url_is_special(url)) {
|
||
if (c == '\\') {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
else if (c == '/') {
|
||
p += 1;
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (override_state == LXB_URL_STATE__UNDEF) {
|
||
if (c == '?') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_QUERY_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (c == '#') {
|
||
p += 1;
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
goto again;
|
||
}
|
||
}
|
||
|
||
if (p < end) {
|
||
if (c == '/') {
|
||
p += 1;
|
||
}
|
||
|
||
state = LXB_URL_STATE_PATH_STATE;
|
||
goto again;
|
||
}
|
||
|
||
if (override_state != LXB_URL_STATE__UNDEF
|
||
&& url->host.type == LXB_URL_HOST_TYPE__UNDEF)
|
||
{
|
||
status = lxb_url_path_append(url, mp_str.data, mp_str.length);
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
|
||
case LXB_URL_STATE_PATH_STATE:
|
||
p = lxb_url_path_fast_path(parser, url, p, end,
|
||
override_state == LXB_URL_STATE__UNDEF);
|
||
if (p == NULL) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_MEMORY_ALLOCATION);
|
||
}
|
||
|
||
if (p >= end) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
switch (*p) {
|
||
case '?':
|
||
p += 1;
|
||
state = LXB_URL_STATE_QUERY_STATE;
|
||
break;
|
||
case '#':
|
||
p += 1;
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
break;
|
||
}
|
||
|
||
goto again;
|
||
|
||
case LXB_URL_STATE_OPAQUE_PATH_STATE:
|
||
begin = p;
|
||
url->path.opaque = true;
|
||
|
||
while (true) {
|
||
if (p >= end) {
|
||
tmp_str.data = NULL;
|
||
|
||
status = lxb_url_percent_encode_after_utf_8(begin, p,
|
||
&tmp_str, url->mraw,
|
||
LXB_URL_MAP_C0, false);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_path_list_push(url, &tmp_str);
|
||
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
c = *p;
|
||
|
||
if (c == '#' || c == '?') {
|
||
tmp_str.data = NULL;
|
||
|
||
status = lxb_url_percent_encode_after_utf_8(begin, p,
|
||
&tmp_str, url->mraw,
|
||
LXB_URL_MAP_C0, false);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
status = lxb_url_path_list_push(url, &tmp_str);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
p += 1;
|
||
|
||
if (c == '#') {
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
}
|
||
else {
|
||
state = LXB_URL_STATE_QUERY_STATE;
|
||
}
|
||
|
||
goto again;
|
||
}
|
||
|
||
tmp = p;
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
|
||
if ((!lxb_url_is_url_codepoint(cp) && cp != '%')
|
||
|| (cp == '%' && (end - p < 2
|
||
|| lexbor_str_res_map_hex[p[0]] == 0xff
|
||
|| lexbor_str_res_map_hex[p[1]] == 0xff)))
|
||
{
|
||
status = lxb_url_log_append(parser, tmp,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
case LXB_URL_STATE_QUERY_STATE:
|
||
if (encoding != LXB_ENCODING_UTF_8
|
||
&& (!lxb_url_is_special(url)
|
||
|| schm->type == LXB_URL_SCHEMEL_TYPE_WS
|
||
|| schm->type == LXB_URL_SCHEMEL_TYPE_WSS))
|
||
{
|
||
encoding = LXB_ENCODING_UTF_8;
|
||
|
||
enc = lxb_encoding_data(encoding);
|
||
if (enc == NULL) {
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_WRONG_ARGS);
|
||
}
|
||
}
|
||
|
||
begin = p;
|
||
|
||
while (true) {
|
||
c = (p < end) ? *p : '\0';
|
||
|
||
if (p >= end || (override_state == LXB_URL_STATE__UNDEF && *p == '#')) {
|
||
if (lxb_url_is_special(url)) {
|
||
map_type = LXB_URL_MAP_SPECIAL_QUERY;
|
||
}
|
||
else {
|
||
map_type = LXB_URL_MAP_QUERY;
|
||
}
|
||
|
||
status = lxb_url_percent_encode_after_encoding(begin, p,
|
||
&url->query,
|
||
url->mraw, enc,
|
||
map_type, false);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
if (p < end) {
|
||
p += 1;
|
||
state = LXB_URL_STATE_FRAGMENT_STATE;
|
||
goto again;
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
}
|
||
|
||
tmp = p;
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
|
||
if ((!lxb_url_is_url_codepoint(cp) && cp != '%')
|
||
|| (cp == '%' && (end - p < 2
|
||
|| lexbor_str_res_map_hex[p[0]] == 0xff
|
||
|| lexbor_str_res_map_hex[p[1]] == 0xff)))
|
||
{
|
||
status = lxb_url_log_append(parser, tmp,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
}
|
||
|
||
break;
|
||
|
||
case LXB_URL_STATE_FRAGMENT_STATE:
|
||
begin = p;
|
||
|
||
while (p < end) {
|
||
tmp = p;
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
|
||
if ((!lxb_url_is_url_codepoint(cp) && cp != '%')
|
||
|| (cp == '%' && (end - p < 2
|
||
|| lexbor_str_res_map_hex[p[0]] == 0xff
|
||
|| lexbor_str_res_map_hex[p[1]] == 0xff)))
|
||
{
|
||
status = lxb_url_log_append(parser, tmp,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
}
|
||
}
|
||
|
||
status = lxb_url_percent_encode_after_utf_8(begin, p, &url->fragment,
|
||
url->mraw,
|
||
LXB_URL_MAP_FRAGMENT, false);
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
|
||
default:
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_OK);
|
||
|
||
failed_non_relative_url:
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_MISSING_SCHEME_NON_RELATIVE_URL);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
|
||
failed_host:
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_HOST_MISSING);
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_parse_return(orig_data, buf, status);
|
||
}
|
||
|
||
lxb_url_parse_return(orig_data, buf, LXB_STATUS_ERROR_UNEXPECTED_DATA);
|
||
}
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_fast_path(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_char_t *p, const lxb_char_t *end, bool bqs)
|
||
{
|
||
size_t count;
|
||
lxb_char_t x, c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *begin, *last;
|
||
|
||
begin = p;
|
||
last = p;
|
||
count = url->path.length;
|
||
|
||
for (; p < end; p++) {
|
||
c = *p;
|
||
x = lxb_url_path_map[c];
|
||
|
||
if (x != 0x00) {
|
||
if (c == '/') {
|
||
count += 1;
|
||
last = p + 1;
|
||
}
|
||
else if (c == '%') {
|
||
if (end - p < 3
|
||
|| lexbor_str_res_map_hex[p[1]] == 0xff
|
||
|| lexbor_str_res_map_hex[p[2]] == 0xff)
|
||
{
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
p = (end - p < 3) ? end - 1 : p + 2;
|
||
}
|
||
else if (p[1] == '2' && (p[2] == 'e' || p[2] == 'E')
|
||
&& (p == begin
|
||
|| p[-1] == '/'
|
||
|| (p[-1] == '\\' && lxb_url_is_special(url))))
|
||
{
|
||
url->path.length = count;
|
||
|
||
status = lxb_url_path_try_dot(url, &begin, &last,
|
||
&p, end, bqs);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
count = url->path.length;
|
||
}
|
||
else {
|
||
p += 2;
|
||
}
|
||
}
|
||
else if ((c == '?' || c == '#') && bqs) {
|
||
break;
|
||
}
|
||
else if (c == '\\' && lxb_url_is_special(url)) {
|
||
count += 1;
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
status = lxb_url_path_append(url, begin, p - begin);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
begin = p + 1;
|
||
last = p + 1;
|
||
}
|
||
else if (c == '.') {
|
||
if (p == begin
|
||
|| p[-1] == '/'
|
||
|| (p[-1] == '\\' && lxb_url_is_special(url)))
|
||
{
|
||
url->path.length = count;
|
||
|
||
status = lxb_url_path_try_dot(url, &begin, &last,
|
||
&p, end, bqs);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
count = url->path.length;
|
||
}
|
||
}
|
||
else {
|
||
url->path.length = count;
|
||
|
||
if (last - 1 > begin) {
|
||
status = lxb_url_path_append(url, begin,
|
||
(last - 1) - begin);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
}
|
||
|
||
return lxb_url_path_slow_path(parser, url, last, end, bqs);
|
||
}
|
||
}
|
||
}
|
||
|
||
status = lxb_url_path_append(url, begin, p - begin);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
if (count == 0 || p != begin) {
|
||
count += 1;
|
||
}
|
||
|
||
url->path.length = count;
|
||
|
||
return p;
|
||
}
|
||
|
||
/*
|
||
* The lxb_url_path_slow_path() function should not be static. Otherwise, the
|
||
* compiler will inline it, which will lead to cache problems and slower code
|
||
* execution.
|
||
*/
|
||
const lxb_char_t *
|
||
lxb_url_path_slow_path(lxb_url_parser_t *parser, lxb_url_t *url,
|
||
const lxb_char_t *data, const lxb_char_t *end, bool bqs)
|
||
{
|
||
size_t len, count;
|
||
lxb_char_t c, *last, *sbuf, *sbuf_begin;
|
||
lexbor_str_t *str;
|
||
lxb_status_t status;
|
||
lxb_codepoint_t cp;
|
||
const lxb_char_t *p, *tmp, *sbuf_end;
|
||
lxb_char_t sbuffer[1024];
|
||
|
||
p = data;
|
||
sbuf = sbuffer;
|
||
sbuf_begin = sbuffer;
|
||
sbuf_end = sbuffer + sizeof(sbuffer);
|
||
|
||
c = '\0';
|
||
|
||
if (url->path.str.length != 0) {
|
||
str = &url->path.str;
|
||
|
||
/* "+ 2" == "/\0" */
|
||
if (sbuf + (str->length + 2) > sbuf_end) {
|
||
len = str->length + sizeof(sbuffer);
|
||
|
||
sbuf_begin = lexbor_malloc(len);
|
||
if (sbuf_begin == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
sbuf = sbuf_begin;
|
||
sbuf_end = sbuf + len;
|
||
}
|
||
|
||
memcpy(sbuf, str->data, str->length);
|
||
sbuf += str->length;
|
||
|
||
str->length = 0;
|
||
}
|
||
|
||
*sbuf++ = '/';
|
||
|
||
last = sbuf;
|
||
count = url->path.length;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c >= 0x80) {
|
||
tmp = p;
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
|
||
if (!lxb_url_is_url_codepoint(cp)) {
|
||
status = lxb_url_log_append(parser, tmp,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
}
|
||
|
||
len = (p - tmp) * 3;
|
||
|
||
if (sbuf + len + 1 >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last);
|
||
}
|
||
|
||
while (tmp < p) {
|
||
c = *tmp++;
|
||
|
||
*sbuf++ = '%';
|
||
*sbuf++ = lexbor_str_res_char_to_two_hex_value[c][0];
|
||
*sbuf++ = lexbor_str_res_char_to_two_hex_value[c][1];
|
||
}
|
||
|
||
continue;
|
||
}
|
||
|
||
if (c == '/') {
|
||
*sbuf++ = '/';
|
||
if (sbuf >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last);
|
||
}
|
||
|
||
lxb_url_path_fix_windows_drive(url, last, sbuf, count);
|
||
|
||
count += 1;
|
||
last = sbuf;
|
||
|
||
if (p + 1 >= end) {
|
||
count += 1;
|
||
}
|
||
}
|
||
else if (c == '\\' && lxb_url_is_special(url)) {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_REVERSE_SOLIDUS);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
*sbuf++ = '/';
|
||
if (sbuf >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last);
|
||
}
|
||
|
||
lxb_url_path_fix_windows_drive(url, last, sbuf, count);
|
||
|
||
count += 1;
|
||
last = sbuf;
|
||
|
||
if (p + 1 >= end) {
|
||
count += 1;
|
||
}
|
||
}
|
||
else if ((c == '?' || c == '#') && bqs) {
|
||
lxb_url_path_fix_windows_drive(url, last, sbuf, count);
|
||
|
||
count += 1;
|
||
last = sbuf;
|
||
break;
|
||
}
|
||
else if (lxb_url_map[c] & LXB_URL_MAP_PATH) {
|
||
if (sbuf + 4 >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last);
|
||
}
|
||
|
||
*sbuf++ = '%';
|
||
*sbuf++ = lexbor_str_res_char_to_two_hex_value[c][0];
|
||
*sbuf++ = lexbor_str_res_char_to_two_hex_value[c][1];
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
}
|
||
else if (c == '.') {
|
||
if (last == sbuf) {
|
||
tmp = lxb_url_path_dot_count(url, p, end, sbuf_begin,
|
||
&sbuf, &last, &count, bqs);
|
||
|
||
if (tmp != p) {
|
||
p = tmp + 1;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
*sbuf++ = '.';
|
||
if (sbuf >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin,
|
||
sbuf_end, sbuffer, last);
|
||
}
|
||
}
|
||
else if (c == '%') {
|
||
if (end - p < 3
|
||
|| lexbor_str_res_map_hex[p[1]] == 0xff
|
||
|| lexbor_str_res_map_hex[p[2]] == 0xff)
|
||
{
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
}
|
||
else if (p[1] == '2' && (p[2] == 'e' || p[2] == 'E')
|
||
&& last == sbuf)
|
||
{
|
||
tmp = lxb_url_path_dot_count(url, p, end, sbuf_begin,
|
||
&sbuf, &last, &count, bqs);
|
||
|
||
if (tmp != p) {
|
||
p = tmp + 1;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
*sbuf++ = '%';
|
||
if (sbuf >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin,
|
||
sbuf_end, sbuffer, last);
|
||
}
|
||
}
|
||
else {
|
||
if (lxb_url_codepoint_alphanumeric[c] == 0xFF) {
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
}
|
||
|
||
*sbuf++ = c;
|
||
if (sbuf >= sbuf_end) {
|
||
LXB_URL_SBUF_REALLOC(sbuf, sbuf_begin, sbuf_end, sbuffer, last);
|
||
}
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
if (count == 0 || last < sbuf) {
|
||
lxb_url_path_fix_windows_drive(url, last, sbuf, count);
|
||
count += 1;
|
||
}
|
||
|
||
url->path.length = count;
|
||
|
||
status = lxb_url_path_append_wo_slash(url, sbuf_begin, sbuf - sbuf_begin);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
if (sbuf_begin != sbuffer) {
|
||
lexbor_free(sbuf_begin);
|
||
}
|
||
|
||
return p;
|
||
|
||
failed:
|
||
|
||
if (sbuf_begin != sbuffer) {
|
||
lexbor_free(sbuf_begin);
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_path_try_dot(lxb_url_t *url, const lxb_char_t **begin,
|
||
const lxb_char_t **last, const lxb_char_t **start,
|
||
const lxb_char_t *end, bool bqs)
|
||
{
|
||
unsigned count;
|
||
lxb_char_t c;
|
||
lexbor_str_t *str;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
|
||
p = *start;
|
||
count = 0;
|
||
|
||
for (; p < end; p++) {
|
||
c = *p;
|
||
|
||
if (c == '/'
|
||
|| (c == '\\' && lxb_url_is_special(url))
|
||
|| ((c == '?' || c == '#') && bqs))
|
||
{
|
||
break;
|
||
}
|
||
else if (c == '.') {
|
||
count += 1;
|
||
}
|
||
else if (c == '%') {
|
||
if (p + 3 <= end && p[1] == '2' && (p[2] == 'e' || p[2] == 'E')) {
|
||
count += 1;
|
||
}
|
||
else {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
p += 2;
|
||
}
|
||
else {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
}
|
||
|
||
if (count == 0 || count > 2) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (*start > *begin) {
|
||
status = lxb_url_path_append(url, *begin, (*start - *begin) - 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
if (p < end) {
|
||
*start = p;
|
||
*begin = p + 1;
|
||
*last = *begin;
|
||
}
|
||
else {
|
||
*start = end - 1;
|
||
*begin = end;
|
||
*last = end;
|
||
}
|
||
|
||
if (count == 2) {
|
||
lxb_url_path_shorten(url);
|
||
}
|
||
else if (count == 1) {
|
||
str = &url->path.str;
|
||
|
||
if (str->length > 0 && str->data[str->length - 1] == '/') {
|
||
str->length -= 1;
|
||
str->data[str->length] = '\0';
|
||
}
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_dot_count(lxb_url_t *url, const lxb_char_t *p,
|
||
const lxb_char_t *end, const lxb_char_t *sbuf_begin,
|
||
lxb_char_t **sbuf, lxb_char_t **last, size_t *path_count,
|
||
bool bqs)
|
||
{
|
||
unsigned count;
|
||
lxb_char_t c, *last_p;
|
||
const lxb_char_t *begin;
|
||
|
||
count = 0;
|
||
begin = p;
|
||
|
||
for (; p < end; p++) {
|
||
c = *p;
|
||
|
||
if (c == '/'
|
||
|| (c == '\\' && lxb_url_is_special(url))
|
||
|| ((c == '?' || c == '#') && bqs))
|
||
{
|
||
break;
|
||
}
|
||
else if (c == '.') {
|
||
count += 1;
|
||
}
|
||
else if (c == '%') {
|
||
if (p + 3 <= end && p[1] == '2' && (p[2] == 'e' || p[2] == 'E')) {
|
||
count += 1;
|
||
}
|
||
else {
|
||
return begin;
|
||
}
|
||
|
||
p += 2;
|
||
}
|
||
else {
|
||
return begin;
|
||
}
|
||
}
|
||
|
||
if (count == 0 || count > 2) {
|
||
return begin;
|
||
}
|
||
|
||
if (url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE
|
||
&& *path_count == 1
|
||
&& lxb_url_normalized_windows_drive_letter(sbuf_begin + 1, *last - 1))
|
||
{
|
||
return p;
|
||
}
|
||
|
||
if (count == 2) {
|
||
if (*path_count > 0) {
|
||
*path_count -= 1;
|
||
last_p = *last - 1;
|
||
|
||
while (last_p > sbuf_begin) {
|
||
last_p -= 1;
|
||
|
||
if (*last_p == '/') {
|
||
*sbuf = last_p + 1;
|
||
break;
|
||
}
|
||
}
|
||
|
||
*last = *sbuf;
|
||
}
|
||
}
|
||
|
||
return p;
|
||
}
|
||
|
||
static void
|
||
lxb_url_path_fix_windows_drive(lxb_url_t *url, lxb_char_t *sbuf,
|
||
const lxb_char_t *last, size_t count)
|
||
{
|
||
if (url->scheme.type == LXB_URL_SCHEMEL_TYPE_FILE
|
||
&& count == 0
|
||
&& ((last - sbuf == 3 && (last[-1] == '/')) || last - sbuf == 2)
|
||
&& lxb_url_windows_drive_letter(sbuf, last))
|
||
{
|
||
sbuf[1] = ':';
|
||
}
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_leading_trailing(lxb_url_parser_t *parser,
|
||
const lxb_char_t **data, size_t *length)
|
||
{
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
lxb_codepoint_t cp;
|
||
const lxb_char_t *p, *end, *tmp;
|
||
|
||
p = *data;
|
||
end = p + *length;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c > 0x1F && c < 0x80 && c != 0x20) {
|
||
break;
|
||
}
|
||
else if (c >= 0x80) {
|
||
tmp = p;
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
if (cp > 0x1F && cp != LXB_ENCODING_DECODE_ERROR) {
|
||
p = tmp;
|
||
break;
|
||
}
|
||
|
||
continue;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
if (p != *data) {
|
||
status = lxb_url_log_append(parser, *data,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
while (end > p) {
|
||
tmp = end;
|
||
cp = lxb_encoding_decode_valid_utf_8_single_reverse(&end, p);
|
||
|
||
if (cp > 0x1F && cp != 0x20 && cp != LXB_ENCODING_DECODE_ERROR) {
|
||
end = tmp;
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (end != *data + *length) {
|
||
status = lxb_url_log_append(parser, end,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
*data = p;
|
||
*length = end - p;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_remove_tab_newline(lxb_url_parser_t *parser,
|
||
const lxb_char_t *data, size_t *length)
|
||
{
|
||
size_t len;
|
||
lxb_char_t c, *buf, *p_buf;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p, *end;
|
||
|
||
p = data;
|
||
end = data + *length;
|
||
|
||
/* Fast path. */
|
||
|
||
p = lexbor_swar_seek3(p, end, '\n', '\r', '\t');
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c == '\n' || c == '\r' || c == '\t') {
|
||
/* Slow path. */
|
||
goto oh_my;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
return data;
|
||
|
||
oh_my:
|
||
|
||
status = lxb_url_log_append(parser, p,
|
||
LXB_URL_ERROR_TYPE_INVALID_URL_UNIT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return NULL;
|
||
}
|
||
|
||
buf = lexbor_malloc(*length + 1);
|
||
if (buf == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
p_buf = buf;
|
||
len = p - data;
|
||
p_buf += len;
|
||
|
||
memcpy(buf, data, len);
|
||
|
||
p += 1;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c == '\n' || c == '\r' || c == '\t') {
|
||
p += 1;
|
||
continue;
|
||
}
|
||
|
||
*p_buf++ = c;
|
||
p += 1;
|
||
}
|
||
|
||
*length = p_buf - buf;
|
||
|
||
buf[*length] = '\0';
|
||
|
||
return buf;
|
||
}
|
||
|
||
|
||
static const lxb_url_scheme_data_t *
|
||
lxb_url_scheme_find(const lxb_char_t *data, size_t length)
|
||
{
|
||
const lxb_url_scheme_data_t *schm;
|
||
|
||
for (size_t i = LXB_URL_SCHEMEL_TYPE__UNKNOWN + 1;
|
||
i < lxb_url_scheme_length; i++)
|
||
{
|
||
schm = &lxb_url_scheme_res[i];
|
||
|
||
if (schm->name.length == length) {
|
||
if (lexbor_str_data_ncasecmp(schm->name.data, data, length)) {
|
||
return schm;
|
||
}
|
||
}
|
||
}
|
||
|
||
return &lxb_url_scheme_res[LXB_URL_SCHEMEL_TYPE__UNKNOWN];
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_encode_after_encoding(const lxb_char_t *data,
|
||
const lxb_char_t *end, lexbor_str_t *str,
|
||
lexbor_mraw_t *mraw,
|
||
const lxb_encoding_data_t *encoding,
|
||
lxb_url_map_type_t enmap,
|
||
bool space_as_plus)
|
||
{
|
||
int8_t len;
|
||
size_t length, size;
|
||
lxb_status_t status;
|
||
lxb_codepoint_t cp;
|
||
const lxb_char_t *p, *pb;
|
||
lxb_encoding_encode_t encode;
|
||
lxb_char_t c, buffer[128], percent[3];
|
||
lxb_char_t *buf = buffer;
|
||
const lxb_char_t *buf_end = buf + sizeof(buffer);
|
||
static const lexbor_str_t esc_str = lexbor_str("%26%23");
|
||
|
||
if (encoding->encoding == LXB_ENCODING_UTF_8) {
|
||
return lxb_url_percent_encode_after_utf_8(data, end, str, mraw,
|
||
enmap, space_as_plus);
|
||
}
|
||
|
||
lxb_url_encoding_init(encoding, &encode);
|
||
|
||
p = data;
|
||
length = end - p;
|
||
|
||
/* Only valid for UTF-8. */
|
||
|
||
while (p < end) {
|
||
if (lxb_url_map[*p++] & enmap) {
|
||
length += 2;
|
||
}
|
||
}
|
||
|
||
status = lxb_url_str_init(str, mraw, length + 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
p = data;
|
||
percent[0] = '%';
|
||
|
||
while (p < end) {
|
||
cp = lxb_encoding_decode_valid_utf_8_single(&p, end);
|
||
if (cp > LXB_ENCODING_DECODE_MAX_CODEPOINT) {
|
||
continue;
|
||
}
|
||
|
||
len = encoding->encode_single(&encode, &buf, buf_end, cp);
|
||
|
||
if (len < LXB_ENCODING_ENCODE_OK) {
|
||
size = lexbor_conv_int64_to_data((int64_t) cp, buf, buf_end - buf);
|
||
|
||
if (lexbor_str_append(str, mraw, esc_str.data, esc_str.length) == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
if (lexbor_str_append(str, mraw, buf, size) == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
percent[1] = '3';
|
||
percent[2] = 'B';
|
||
|
||
if (lexbor_str_append(str, mraw, percent, 3) == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
continue;
|
||
}
|
||
|
||
iso_2022_jp:
|
||
|
||
pb = buffer;
|
||
|
||
while (pb < buf) {
|
||
c = *pb;
|
||
|
||
if (space_as_plus && c == ' ') {
|
||
pb += 1;
|
||
|
||
if (lexbor_str_append_one(str, mraw, '+') == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
else if (lxb_url_map[c] & enmap) {
|
||
percent[1] = lexbor_str_res_char_to_two_hex_value[c][0];
|
||
percent[2] = lexbor_str_res_char_to_two_hex_value[c][1];
|
||
|
||
if (lexbor_str_append(str, mraw, percent, 3) == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
else {
|
||
if (lexbor_str_append_one(str, mraw, c) == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
pb++;
|
||
}
|
||
|
||
buf = buffer;
|
||
}
|
||
|
||
if (encoding->encoding == LXB_ENCODING_ISO_2022_JP) {
|
||
len = lxb_encoding_encode_iso_2022_jp_eof_single(&encode,
|
||
&buf, buf_end);
|
||
if (len != 0) {
|
||
goto iso_2022_jp;
|
||
}
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_encode_after_utf_8(const lxb_char_t *data,
|
||
const lxb_char_t *end, lexbor_str_t *str,
|
||
lexbor_mraw_t *mraw,
|
||
lxb_url_map_type_t enmap,
|
||
bool space_as_plus)
|
||
{
|
||
size_t length;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
lxb_char_t c, *pd;
|
||
|
||
p = data;
|
||
length = end - p;
|
||
|
||
/* Only valid for UTF-8. */
|
||
|
||
while (p < end) {
|
||
if (lxb_url_map[*p++] & enmap) {
|
||
length += 2;
|
||
}
|
||
}
|
||
|
||
status = lxb_url_str_init(str, mraw, length + 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
p = data;
|
||
pd = &str->data[str->length];
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (space_as_plus && c == ' ') {
|
||
*pd++ = '+';
|
||
}
|
||
else if (lxb_url_map[c] & enmap) {
|
||
*pd++ = '%';
|
||
*pd++ = lexbor_str_res_char_to_two_hex_value[c][0];
|
||
*pd++ = lexbor_str_res_char_to_two_hex_value[c][1];
|
||
}
|
||
else {
|
||
*pd++ = c;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
*pd = '\0';
|
||
str->length += pd - &str->data[str->length];
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, lxb_url_host_t *host,
|
||
lexbor_mraw_t *mraw, lxb_url_host_opt_t opt)
|
||
{
|
||
uint32_t ipv4;
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
lexbor_str_t *domain;
|
||
const lxb_char_t *p;
|
||
lxb_url_idna_ctx_t context;
|
||
|
||
if (data < end && *data == '[') {
|
||
if (end[-1] != ']') {
|
||
status = lxb_url_log_append(parser, &end[-1],
|
||
LXB_URL_ERROR_TYPE_IPV6_UNCLOSED);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
data += 1;
|
||
end -= 1;
|
||
|
||
host->type = LXB_URL_HOST_TYPE_IPV6;
|
||
|
||
return lxb_url_ipv6_parse(parser, data, end, host->u.ipv6);
|
||
}
|
||
|
||
if (opt & LXB_URL_HOST_OPT_NOT_SPECIAL) {
|
||
return lxb_url_opaque_host_parse(parser, data, end, host, mraw);
|
||
}
|
||
|
||
domain = &host->u.domain;
|
||
|
||
if (opt & LXB_URL_HOST_OPT_DECODE) {
|
||
status = lxb_url_percent_decode(data, end, domain, mraw, &opt);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
else {
|
||
status = lxb_url_str_init(domain, mraw, (end - data) + 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
if (opt & LXB_URL_HOST_OPT_IDNA) {
|
||
domain->length = end - data;
|
||
|
||
memcpy(domain->data, data, domain->length);
|
||
domain->data[domain->length] = '\0';
|
||
}
|
||
else {
|
||
(void) lexbor_str_append_lowercase(domain, mraw, data, end - data);
|
||
}
|
||
}
|
||
|
||
if (opt & LXB_URL_HOST_OPT_IDNA) {
|
||
if (parser->idna == NULL) {
|
||
parser->idna = lxb_unicode_idna_create();
|
||
status = lxb_unicode_idna_init(parser->idna);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
context.str = domain;
|
||
context.mraw = mraw;
|
||
|
||
status = lxb_unicode_idna_to_ascii(parser->idna,
|
||
domain->data, domain->length,
|
||
lxb_url_host_idna_cb, &context,
|
||
LXB_UNICODE_IDNA_FLAG_CHECK_BIDI
|
||
| LXB_UNICODE_IDNA_FLAG_CHECK_JOINERS);
|
||
|
||
lxb_unicode_idna_clean(parser->idna);
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
if (domain->length == 0) {
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
p = domain->data;
|
||
end = p + domain->length;
|
||
|
||
while (p < end) {
|
||
c = *p++;
|
||
|
||
if (c < 128 && lxb_url_map_forbidden_domain_cp[c] != 0xff) {
|
||
status = lxb_url_log_append(parser, p - 1,
|
||
LXB_URL_ERROR_TYPE_DOMAIN_INVALID_CODE_POINT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
}
|
||
|
||
if (lxb_url_is_ipv4(parser, domain->data, end)) {
|
||
status = lxb_url_ipv4_parse(parser, domain->data, end, &ipv4);
|
||
|
||
(void) lexbor_str_destroy(domain, mraw, false);
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
host->u.ipv4 = ipv4;
|
||
host->type = LXB_URL_HOST_TYPE_IPV4;
|
||
|
||
return status;
|
||
}
|
||
|
||
host->type = LXB_URL_HOST_TYPE_DOMAIN;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_idna_cb(const lxb_char_t *data, size_t len, void *ctx)
|
||
{
|
||
lxb_char_t *p;
|
||
lxb_url_idna_ctx_t *idna = ctx;
|
||
|
||
if (lexbor_str_size(idna->str) < len + 1) {
|
||
p = lexbor_str_realloc(idna->str, idna->mraw, len + 1);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
memcpy(idna->str->data, data, len);
|
||
|
||
idna->str->length = len;
|
||
idna->str->data[len] = 0x00;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_inline lxb_status_t
|
||
lxb_url_ipv4_append(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, uint64_t *ipv,
|
||
int *out_of, unsigned i)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_error_type_t type;
|
||
|
||
status = lxb_url_ipv4_number_parse(data, end, &ipv[i]);
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
if (status == LXB_STATUS_ERROR) {
|
||
type = LXB_URL_ERROR_TYPE_IPV4_NON_NUMERIC_PART;
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_log_append(parser, data,
|
||
LXB_URL_ERROR_TYPE_IPV4_NON_DECIMAL_PART);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
if (ipv[i] > 255) {
|
||
status = lxb_url_log_append(parser, data,
|
||
LXB_URL_ERROR_TYPE_IPV4_OUT_OF_RANGE_PART);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
if (*out_of != -1) {
|
||
*out_of = (int) i;
|
||
}
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
|
||
failed:
|
||
|
||
status = lxb_url_log_append(parser, data, type);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, uint32_t *ipv4)
|
||
{
|
||
int out_of;
|
||
uint32_t ip;
|
||
uint64_t parts[5];
|
||
unsigned i;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p, *begin;
|
||
lxb_url_error_type_t type;
|
||
|
||
static const uint64_t st[] = {0, 256, 65536, 16777216, 4294967296, 0};
|
||
|
||
if (data >= end) {
|
||
return LXB_STATUS_ERROR;
|
||
}
|
||
|
||
i = 0;
|
||
p = data;
|
||
begin = data;
|
||
out_of = -1;
|
||
|
||
/* Let's make the compiler happy. */
|
||
|
||
parts[0] = 0;
|
||
|
||
while (p < end) {
|
||
if (*p == '.') {
|
||
if (i == 4) {
|
||
type = LXB_URL_ERROR_TYPE_IPV4_TOO_MANY_PARTS;
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_ipv4_append(parser, begin, p, parts, &out_of, ++i);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
begin = p + 1;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
if (begin < p) {
|
||
if (i == 4) {
|
||
type = LXB_URL_ERROR_TYPE_IPV4_TOO_MANY_PARTS;
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_ipv4_append(parser, begin, p, parts, &out_of, ++i);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
else if (p[-1] == '.') {
|
||
status = lxb_url_log_append(parser, begin,
|
||
LXB_URL_ERROR_TYPE_IPV4_EMPTY_PART);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
}
|
||
|
||
if (out_of != -1 && out_of != (int) i) {
|
||
return LXB_STATUS_ERROR_OVERFLOW;
|
||
}
|
||
|
||
if (parts[i] >= st[5 - i]) {
|
||
return LXB_STATUS_ERROR_OVERFLOW;
|
||
}
|
||
|
||
ip = (uint32_t) parts[i--];
|
||
|
||
for (unsigned j = 1; j <= i; j++) {
|
||
if (parts[j] > 255) {
|
||
return LXB_STATUS_ERROR_OVERFLOW;
|
||
}
|
||
|
||
ip += parts[j] * st[3 - (j - 1)];
|
||
}
|
||
|
||
*ipv4 = ip;
|
||
|
||
return LXB_STATUS_OK;
|
||
|
||
failed:
|
||
|
||
status = lxb_url_log_append(parser, begin, type);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_number_parse(const lxb_char_t *data,
|
||
const lxb_char_t *end, uint64_t *num)
|
||
{
|
||
uint64_t n;
|
||
unsigned r;
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *str_map;
|
||
|
||
if (data >= end) {
|
||
goto failed;
|
||
}
|
||
|
||
r = 10;
|
||
str_map = lexbor_str_res_map_num;
|
||
status = LXB_STATUS_OK;
|
||
|
||
if (data + 1 < end) {
|
||
if (data[0] == '0') {
|
||
if ((data[1] == 'x' || data[1] == 'X')) {
|
||
data += 2;
|
||
r = 16;
|
||
str_map = lexbor_str_res_map_hex;
|
||
}
|
||
else {
|
||
data += 1;
|
||
r = 8;
|
||
str_map = lxb_url_map_num_8;
|
||
}
|
||
|
||
if (data >= end) {
|
||
*num = 0;
|
||
return LXB_STATUS_WARNING;
|
||
}
|
||
|
||
status = LXB_STATUS_WARNING;
|
||
}
|
||
}
|
||
|
||
n = 0;
|
||
|
||
while (data < end) {
|
||
c = *data++;
|
||
|
||
if (str_map[c] == 0xff) {
|
||
goto failed;
|
||
}
|
||
|
||
n = str_map[c] + n * r;
|
||
|
||
if (n > UINT32_MAX) {
|
||
break;
|
||
}
|
||
}
|
||
|
||
*num = n;
|
||
|
||
return status;
|
||
|
||
failed:
|
||
|
||
*num = 0;
|
||
|
||
return LXB_STATUS_ERROR;
|
||
}
|
||
|
||
static bool
|
||
lxb_url_is_ipv4(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end)
|
||
{
|
||
bool isit, first;
|
||
uint64_t num;
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p = end;
|
||
|
||
if (data >= end) {
|
||
return false;
|
||
}
|
||
|
||
isit = true;
|
||
first = true;
|
||
|
||
do {
|
||
p -= 1;
|
||
c = *p;
|
||
|
||
if (lexbor_str_res_map_hex[c] == 0xff) {
|
||
if (c == '.') {
|
||
if (p == end - 1) {
|
||
if (first) {
|
||
end = p;
|
||
first = false;
|
||
continue;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
p += 1;
|
||
break;
|
||
}
|
||
else if (c != 'x' && c != 'X') {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
if (c < '0' || c > '9') {
|
||
isit = false;
|
||
}
|
||
}
|
||
while (p > data);
|
||
|
||
if (p == end) {
|
||
return false;
|
||
}
|
||
|
||
if (isit) {
|
||
return true;
|
||
}
|
||
|
||
status = lxb_url_ipv4_number_parse(p, end, &num);
|
||
|
||
return status != LXB_STATUS_ERROR;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv6_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, uint16_t *ipv6)
|
||
{
|
||
size_t i, idx;
|
||
uint16_t *piece, *compress, num, swap;
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
lxb_url_error_type_t err_type;
|
||
|
||
piece = ipv6;
|
||
compress = NULL;
|
||
p = data;
|
||
|
||
if (p >= end) {
|
||
goto done;
|
||
}
|
||
|
||
if (*p == ':') {
|
||
if (p + 1 >= end || p[1] != ':') {
|
||
p = (p + 1 >= end) ? p : &p[1];
|
||
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_INVALID_COMPRESSION;
|
||
goto failed;
|
||
}
|
||
|
||
p += 2;
|
||
|
||
piece += 1;
|
||
compress = piece;
|
||
}
|
||
|
||
while (p < end) {
|
||
if (piece == &ipv6[8]) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_TOO_MANY_PIECES;
|
||
goto failed;
|
||
}
|
||
|
||
if (*p == ':') {
|
||
if (compress != NULL) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_MULTIPLE_COMPRESSION;
|
||
goto failed;
|
||
}
|
||
|
||
p += 1;
|
||
|
||
piece += 1;
|
||
compress = piece;
|
||
|
||
continue;
|
||
}
|
||
|
||
num = 0;
|
||
i = 0;
|
||
|
||
while (i < 4 && p < end) {
|
||
c = lexbor_str_res_map_hex[*p];
|
||
if (c == 0xff) {
|
||
break;
|
||
}
|
||
|
||
num = num << 4 | c;
|
||
|
||
p += 1;
|
||
i += 1;
|
||
}
|
||
|
||
if (p >= end) {
|
||
*piece++ = num;
|
||
break;
|
||
}
|
||
|
||
if (*p == '.') {
|
||
if (i == 0) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
|
||
p -= i;
|
||
|
||
if (piece > &ipv6[6]) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_TOO_MANY_PIECES;
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_ipv4_in_ipv6_parse(parser, &p, end, &piece);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
break;
|
||
}
|
||
|
||
if (*p == ':') {
|
||
p += 1;
|
||
|
||
if (p >= end) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
}
|
||
else if (p < end) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
|
||
*piece++ = num;
|
||
}
|
||
|
||
done:
|
||
|
||
if (compress != NULL) {
|
||
num = piece - compress;
|
||
i = 7;
|
||
|
||
while (i != 0 && num > 0) {
|
||
idx = (compress - ipv6) + num - 1;
|
||
swap = ipv6[idx];
|
||
|
||
ipv6[idx] = ipv6[i];
|
||
ipv6[i] = swap;
|
||
|
||
i -= 1;
|
||
num -= 1;
|
||
}
|
||
}
|
||
else if (piece - ipv6 != 8) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV6_TOO_FEW_PIECES;
|
||
goto failed;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
|
||
failed:
|
||
|
||
status = lxb_url_log_append(parser, p, err_type);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_ipv4_in_ipv6_parse(lxb_url_parser_t *parser, const lxb_char_t **data,
|
||
const lxb_char_t *end, uint16_t **pieces)
|
||
{
|
||
int16_t ipv4;
|
||
uint16_t *piece;
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
unsigned int seen;
|
||
const lxb_char_t *p;
|
||
lxb_url_error_type_t err_type;
|
||
|
||
piece = *pieces;
|
||
seen = 0;
|
||
p = *data;
|
||
|
||
while (p < end) {
|
||
ipv4 = -1;
|
||
|
||
if (seen > 0) {
|
||
if (*p == '.' && seen < 4) {
|
||
p += 1;
|
||
|
||
if (p >= end) {
|
||
break;
|
||
}
|
||
}
|
||
else {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
}
|
||
|
||
do {
|
||
c = *p;
|
||
|
||
if (c < '0' || c > '9') {
|
||
if (ipv4 == -1) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
|
||
break;
|
||
}
|
||
|
||
if (ipv4 == -1) {
|
||
ipv4 = lexbor_str_res_map_num[c];
|
||
}
|
||
else if (ipv4 == 0) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_INVALID_CODE_POINT;
|
||
goto failed;
|
||
}
|
||
else {
|
||
ipv4 = ipv4 * 10 + lexbor_str_res_map_num[c];
|
||
}
|
||
|
||
if (ipv4 > 255) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_OUT_OF_RANGE_PART;
|
||
goto failed;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
while (p < end);
|
||
|
||
*piece = *piece * 0x100 + ipv4;
|
||
|
||
seen += 1;
|
||
|
||
if (seen == 2 || seen == 4) {
|
||
piece += 1;
|
||
}
|
||
}
|
||
|
||
if (seen != 4) {
|
||
err_type = LXB_URL_ERROR_TYPE_IPV4_IN_IPV6_TOO_FEW_PARTS;
|
||
goto failed;
|
||
}
|
||
|
||
*pieces = piece;
|
||
*data = p;
|
||
|
||
return LXB_STATUS_OK;
|
||
|
||
failed:
|
||
|
||
status = lxb_url_log_append(parser, p, err_type);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_opaque_host_parse(lxb_url_parser_t *parser, const lxb_char_t *data,
|
||
const lxb_char_t *end, lxb_url_host_t *host,
|
||
lexbor_mraw_t *mraw)
|
||
{
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
|
||
p = data;
|
||
|
||
while (p < end) {
|
||
c = *p++;
|
||
|
||
if (c < 128 && lxb_url_map_forbidden_host_cp[c] != 0xff) {
|
||
status = lxb_url_log_append(parser, p - 1,
|
||
LXB_URL_ERROR_TYPE_HOST_INVALID_CODE_POINT);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return LXB_STATUS_ERROR_UNEXPECTED_DATA;
|
||
}
|
||
}
|
||
|
||
host->type = LXB_URL_HOST_TYPE_OPAQUE;
|
||
|
||
return lxb_url_percent_encode_after_utf_8(data, end, &host->u.opaque, mraw,
|
||
LXB_URL_MAP_C0, false);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_decode(const lxb_char_t *data, const lxb_char_t *end,
|
||
lexbor_str_t *str, lexbor_mraw_t *mraw,
|
||
lxb_url_host_opt_t *opt)
|
||
{
|
||
lxb_char_t c, *dp;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
|
||
status = lxb_url_str_init(str, mraw, (end - data) + 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
p = data;
|
||
dp = str->data;
|
||
|
||
while (p < end) {
|
||
c = *p++;
|
||
|
||
if (c != '%') {
|
||
*dp++ = c;
|
||
continue;
|
||
}
|
||
|
||
if (p + 2 <= end && lexbor_str_res_map_hex[p[0]] != 0xff
|
||
&& lexbor_str_res_map_hex[p[1]] != 0xff)
|
||
{
|
||
c = lexbor_str_res_map_hex[p[0]] << 4 | lexbor_str_res_map_hex[p[1]];
|
||
p += 2;
|
||
|
||
if (c >= 0x80) {
|
||
*opt |= LXB_URL_HOST_OPT_IDNA;
|
||
}
|
||
}
|
||
|
||
*dp++ = c;
|
||
}
|
||
|
||
*dp = '\0';
|
||
str->length = dp - str->data;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_percent_decode_plus(const lxb_char_t *data, const lxb_char_t *end,
|
||
lexbor_str_t *str, lexbor_mraw_t *mraw)
|
||
{
|
||
lxb_char_t c, *dp;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p;
|
||
|
||
status = lxb_url_str_init(str, mraw, (end - data) + 1);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
p = data;
|
||
dp = str->data;
|
||
|
||
while (p < end) {
|
||
c = *p++;
|
||
|
||
if (c != '%') {
|
||
if (c == '+') {
|
||
c = ' ';
|
||
}
|
||
|
||
*dp++ = c;
|
||
continue;
|
||
}
|
||
|
||
if (p + 2 <= end && lexbor_str_res_map_hex[p[0]] != 0xff
|
||
&& lexbor_str_res_map_hex[p[1]] != 0xff)
|
||
{
|
||
c = lexbor_str_res_map_hex[p[0]] << 4 | lexbor_str_res_map_hex[p[1]];
|
||
p += 2;
|
||
}
|
||
|
||
*dp++ = c;
|
||
}
|
||
|
||
*dp = '\0';
|
||
str->length = dp - str->data;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
void
|
||
lxb_url_erase(lxb_url_t *url)
|
||
{
|
||
if (url == NULL) {
|
||
return;
|
||
}
|
||
|
||
if (url->scheme.name.data != NULL) {
|
||
lexbor_str_destroy(&url->scheme.name, url->mraw, false);
|
||
}
|
||
|
||
switch (url->host.type) {
|
||
case LXB_URL_HOST_TYPE_DOMAIN:
|
||
case LXB_URL_HOST_TYPE_OPAQUE:
|
||
lexbor_str_destroy(&url->host.u.domain, url->mraw, false);
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
if (url->username.data != NULL) {
|
||
lexbor_str_destroy(&url->username, url->mraw, false);
|
||
}
|
||
|
||
if (url->password.data != NULL) {
|
||
lexbor_str_destroy(&url->password, url->mraw, false);
|
||
}
|
||
|
||
if (url->path.str.data != NULL) {
|
||
lexbor_str_destroy(&url->path.str, url->mraw, false);
|
||
}
|
||
|
||
if (url->query.data != NULL) {
|
||
lexbor_str_destroy(&url->query, url->mraw, false);
|
||
}
|
||
|
||
if (url->fragment.data != NULL) {
|
||
lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
}
|
||
}
|
||
|
||
lxb_url_t *
|
||
lxb_url_destroy(lxb_url_t *url)
|
||
{
|
||
if (url == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
lxb_url_erase(url);
|
||
|
||
return lexbor_mraw_free(url->mraw, url);
|
||
}
|
||
|
||
void
|
||
lxb_url_memory_destroy(lxb_url_t *url)
|
||
{
|
||
(void) lexbor_mraw_destroy(url->mraw, true);
|
||
}
|
||
|
||
static const lxb_char_t *
|
||
lxb_url_path_part_by_index(const lxb_url_t *url, size_t index,
|
||
size_t *out_length)
|
||
{
|
||
size_t i, length, begin;
|
||
const lxb_char_t *data;
|
||
|
||
data = url->path.str.data;
|
||
length = url->path.str.length;
|
||
|
||
i = 0;
|
||
begin = 0;
|
||
|
||
while (i < length) {
|
||
if (data[i] == '/') {
|
||
if (index == 0) {
|
||
*out_length = i - begin;
|
||
return &data[begin];
|
||
}
|
||
|
||
index -= 1;
|
||
begin = i + 1;
|
||
}
|
||
|
||
i += 1;
|
||
}
|
||
|
||
*out_length = 0;
|
||
|
||
return NULL;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_href_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *href, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lexbor_mraw_t *origin_mraw;
|
||
lxb_url_parser_t self_parser;
|
||
const lxb_char_t tmp[1] = "";
|
||
|
||
if (href == NULL) {
|
||
href = tmp;
|
||
length = 0;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
origin_mraw = parser->mraw;
|
||
parser->mraw = url->mraw;
|
||
|
||
status = lxb_url_parse_basic_h(parser, NULL, NULL, href, length,
|
||
LXB_URL_STATE__UNDEF, LXB_ENCODING_AUTO);
|
||
|
||
parser->mraw = origin_mraw;
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
parser->url = lxb_url_destroy(parser->url);
|
||
}
|
||
else {
|
||
lxb_url_erase(url);
|
||
|
||
*url = *parser->url;
|
||
}
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_protocol_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *protocol, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_parser_t self_parser;
|
||
const lxb_char_t tmp[1] = "";
|
||
|
||
if (protocol == NULL) {
|
||
protocol = tmp;
|
||
length = 0;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, protocol, length,
|
||
LXB_URL_STATE_SCHEME_START_STATE,
|
||
LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_username_set(lxb_url_t *url,
|
||
const lxb_char_t *username, size_t length)
|
||
{
|
||
if (lxb_url_cannot_have_user_pass_port(url)) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
url->username.length = 0;
|
||
|
||
if (username == NULL || length == 0) {
|
||
lexbor_str_destroy(&url->username, url->mraw, false);
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
return lxb_url_percent_encode_after_utf_8(username, username + length,
|
||
&url->username, url->mraw,
|
||
LXB_URL_MAP_USERINFO, false);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_password_set(lxb_url_t *url,
|
||
const lxb_char_t *password, size_t length)
|
||
{
|
||
if (lxb_url_cannot_have_user_pass_port(url)) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
url->password.length = 0;
|
||
|
||
if (password == NULL || length == 0) {
|
||
lexbor_str_destroy(&url->password, url->mraw, false);
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
return lxb_url_percent_encode_after_utf_8(password, password + length,
|
||
&url->password, url->mraw,
|
||
LXB_URL_MAP_USERINFO, false);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_host_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *host, size_t length)
|
||
{
|
||
return lxb_url_host_set_h(url, parser, host, length,
|
||
LXB_URL_STATE_HOST_STATE);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_hostname_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *hostname, size_t length)
|
||
{
|
||
return lxb_url_host_set_h(url, parser, hostname, length,
|
||
LXB_URL_STATE_HOSTNAME_STATE);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_host_set_h(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *host, size_t length,
|
||
lxb_url_state_t override_state)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_host_t old;
|
||
lxb_url_parser_t self_parser;
|
||
const lxb_char_t tmp[1] = "";
|
||
|
||
/* If this’s URL has an opaque path, then return. */
|
||
if (url->path.opaque) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (host == NULL) {
|
||
host = tmp;
|
||
length = 0;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
old = url->host;
|
||
|
||
memset(&url->host, 0x00, sizeof(lxb_url_host_t));
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, host, length,
|
||
override_state, LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
lxb_url_host_destroy(&url->host, url->mraw);
|
||
url->host = old;
|
||
}
|
||
else {
|
||
if (override_state == LXB_URL_STATE_HOSTNAME_STATE
|
||
&& url->host.type == LXB_URL_HOST_TYPE__UNDEF)
|
||
{
|
||
url->host = old;
|
||
}
|
||
else {
|
||
lxb_url_host_destroy(&old, url->mraw);
|
||
}
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_port_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *port, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_parser_t self_parser;
|
||
|
||
if (lxb_url_cannot_have_user_pass_port(url)) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (port == NULL || length == 0) {
|
||
url->port = 0;
|
||
url->has_port = false;
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, port, length,
|
||
LXB_URL_STATE_PORT_STATE, LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_pathname_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *pathname, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_parser_t self_parser;
|
||
const lxb_char_t tmp[1] = "";
|
||
|
||
if (url->path.opaque) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (pathname == NULL) {
|
||
pathname = tmp;
|
||
length = 0;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
url->path.length = 0;
|
||
url->path.str.length = 0;
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, pathname, length,
|
||
LXB_URL_STATE_PATH_START_STATE,
|
||
LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_search_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *search, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_parser_t self_parser;
|
||
|
||
lexbor_str_destroy(&url->query, url->mraw, false);
|
||
|
||
url->query.length = 0;
|
||
|
||
if (search == NULL || length == 0) {
|
||
lexbor_str_destroy(&url->query, url->mraw, false);
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (*search == '?') {
|
||
search += 1;
|
||
length -= 1;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, search, length,
|
||
LXB_URL_STATE_QUERY_STATE,
|
||
LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_api_hash_set(lxb_url_t *url, lxb_url_parser_t *parser,
|
||
const lxb_char_t *hash, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_parser_t self_parser;
|
||
|
||
lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
|
||
url->fragment.length = 0;
|
||
|
||
if (hash == NULL || length == 0) {
|
||
lexbor_str_destroy(&url->fragment, url->mraw, false);
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (*hash == '#') {
|
||
hash += 1;
|
||
length -= 1;
|
||
}
|
||
|
||
if (parser == NULL) {
|
||
parser = &self_parser;
|
||
|
||
parser->log = NULL;
|
||
parser->idna = NULL;
|
||
parser->buffer = NULL;
|
||
}
|
||
|
||
status = lxb_url_parse_basic_h(parser, url, NULL, hash, length,
|
||
LXB_URL_STATE_FRAGMENT_STATE,
|
||
LXB_ENCODING_AUTO);
|
||
|
||
if (parser == &self_parser) {
|
||
lxb_url_parser_destroy(parser, false);
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_serialize_body(lxb_unicode_idna_t *idna, const lxb_url_t *url, lexbor_serialize_cb_f cb,
|
||
void *ctx, bool exclude_fragment)
|
||
{
|
||
lxb_status_t status;
|
||
const lexbor_str_t *str;
|
||
lxb_char_t *p;
|
||
lxb_char_t buf[LXB_URL_BUFFER_NUM_SIZE];
|
||
|
||
static const lexbor_str_t colon_str = lexbor_str(":");
|
||
static const lexbor_str_t dsol_str = lexbor_str("//");
|
||
static const lexbor_str_t at_str = lexbor_str("@");
|
||
static const lexbor_str_t dt_str = lexbor_str("/.");
|
||
static const lexbor_str_t qm_str = lexbor_str("?");
|
||
static const lexbor_str_t hs_str = lexbor_str("#");
|
||
|
||
/* Scheme. */
|
||
|
||
str = &url->scheme.name;
|
||
|
||
lexbor_serialize_write(cb, str->data, str->length, ctx, status);
|
||
lexbor_serialize_write(cb, colon_str.data, colon_str.length, ctx, status);
|
||
|
||
/* Host. */
|
||
|
||
if (url->host.type != LXB_URL_HOST_TYPE__UNDEF) {
|
||
lexbor_serialize_write(cb, dsol_str.data, dsol_str.length, ctx, status);
|
||
|
||
if (lxb_url_includes_credentials(url)) {
|
||
lexbor_serialize_write(cb, url->username.data, url->username.length,
|
||
ctx, status);
|
||
|
||
if (url->password.length != 0) {
|
||
lexbor_serialize_write(cb, colon_str.data, colon_str.length,
|
||
ctx, status);
|
||
lexbor_serialize_write(cb, url->password.data,
|
||
url->password.length, ctx, status);
|
||
}
|
||
|
||
lexbor_serialize_write(cb, at_str.data, at_str.length, ctx, status);
|
||
}
|
||
|
||
if (idna != NULL) {
|
||
status = lxb_url_serialize_host_unicode(idna, &url->host, cb, ctx);
|
||
} else {
|
||
status = lxb_url_serialize_host(&url->host, cb, ctx);
|
||
}
|
||
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
if (url->has_port) {
|
||
lexbor_serialize_write(cb, colon_str.data, colon_str.length,
|
||
ctx, status);
|
||
|
||
p = buf + lexbor_conv_int64_to_data((int64_t) url->port,
|
||
buf, LXB_URL_BUFFER_NUM_SIZE);
|
||
|
||
lexbor_serialize_write(cb, buf, p - buf, ctx, status);
|
||
}
|
||
}
|
||
else if (!url->path.opaque && url->path.str.length > 1) {
|
||
str = &url->path.str;
|
||
|
||
if (str->data[0] == '/' && str->data[1] == '/') {
|
||
lexbor_serialize_write(cb, dt_str.data, dt_str.length, ctx, status);
|
||
}
|
||
}
|
||
|
||
status = lxb_url_serialize_path(&url->path, cb, ctx);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
if (url->query.data != NULL) {
|
||
lexbor_serialize_write(cb, qm_str.data, qm_str.length,
|
||
ctx, status);
|
||
lexbor_serialize_write(cb, url->query.data, url->query.length,
|
||
ctx, status);
|
||
}
|
||
|
||
if (!exclude_fragment && url->fragment.data != NULL) {
|
||
lexbor_serialize_write(cb, hs_str.data, hs_str.length,
|
||
ctx, status);
|
||
lexbor_serialize_write(cb, url->fragment.data, url->fragment.length,
|
||
ctx, status);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize(const lxb_url_t *url, lexbor_serialize_cb_f cb, void *ctx,
|
||
bool exclude_fragment)
|
||
{
|
||
return lxb_url_serialize_body(NULL, url, cb, ctx, exclude_fragment);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_idna(lxb_unicode_idna_t *idna, const lxb_url_t *url, lexbor_serialize_cb_f cb,
|
||
void *ctx, bool exclude_fragment)
|
||
{
|
||
return lxb_url_serialize_body(idna, url, cb, ctx, exclude_fragment);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_scheme(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
const lexbor_str_t *str = &url->scheme.name;
|
||
|
||
return cb(str->data, str->length, ctx);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_username(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
if (lxb_url_includes_credentials(url)) {
|
||
return cb(url->username.data, url->username.length, ctx);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_password(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
if (lxb_url_includes_credentials(url)) {
|
||
return cb(url->password.data, url->password.length, ctx);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_host(const lxb_url_host_t *host,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
lxb_status_t status;
|
||
|
||
static const lexbor_str_t ob_str = lexbor_str("[");
|
||
static const lexbor_str_t cb_str = lexbor_str("]");
|
||
|
||
switch (host->type) {
|
||
case LXB_URL_HOST_TYPE_DOMAIN:
|
||
case LXB_URL_HOST_TYPE_OPAQUE:
|
||
return cb(host->u.domain.data, host->u.domain.length, ctx);
|
||
|
||
case LXB_URL_HOST_TYPE_IPV4:
|
||
return lxb_url_serialize_host_ipv4(host->u.ipv4, cb, ctx);
|
||
|
||
case LXB_URL_HOST_TYPE_IPV6:
|
||
lexbor_serialize_write(cb, ob_str.data, ob_str.length,
|
||
ctx, status);
|
||
|
||
status = lxb_url_serialize_host_ipv6(host->u.ipv6, cb, ctx);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
return cb(cb_str.data, cb_str.length, ctx);
|
||
|
||
default:
|
||
break;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_host_unicode(lxb_unicode_idna_t *idna,
|
||
const lxb_url_host_t *host,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
const lexbor_str_t *str;
|
||
|
||
if (host->type != LXB_URL_HOST_TYPE_DOMAIN
|
||
&& host->type != LXB_URL_HOST_TYPE_OPAQUE)
|
||
{
|
||
return lxb_url_serialize_host(host, cb, ctx);
|
||
}
|
||
|
||
str = &host->u.domain;
|
||
|
||
return lxb_unicode_idna_to_unicode(idna, str->data, str->length, cb, ctx,
|
||
LXB_UNICODE_IDNA_FLAG_CHECK_BIDI
|
||
| LXB_UNICODE_IDNA_FLAG_CHECK_JOINERS);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_host_ipv4(uint32_t ipv4,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
lxb_char_t *p, *end;
|
||
lxb_char_t buf[LXB_URL_BUFFER_NUM_SIZE];
|
||
|
||
p = buf;
|
||
end = p + LXB_URL_BUFFER_NUM_SIZE;
|
||
|
||
p += lexbor_conv_int64_to_data((uint8_t) (ipv4 >> 24), p, end - p);
|
||
*p++ = '.';
|
||
p += lexbor_conv_int64_to_data((uint8_t) (ipv4 >> 16), p, end - p);
|
||
*p++ = '.';
|
||
p += lexbor_conv_int64_to_data((uint8_t) (ipv4 >> 8), p, end - p);
|
||
*p++ = '.';
|
||
p += lexbor_conv_int64_to_data((uint8_t) (ipv4), p, end - p);
|
||
|
||
|
||
/* By specification. */
|
||
/*
|
||
uint32_t n;
|
||
|
||
for (size_t i = 0; i < 4; i++) {
|
||
n = ipv4 % 256;
|
||
|
||
p += lexbor_conv_int64_to_data((int64_t) n, p, end - p);
|
||
|
||
if (i != 3) {
|
||
*p++ = '.';
|
||
}
|
||
|
||
ipv4 = (uint32_t) floor((double) ipv4 / 256.0f);
|
||
}
|
||
*/
|
||
|
||
*p = '\0';
|
||
|
||
return cb(buf, p - buf, ctx);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_host_ipv6(const uint16_t *ipv6,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
bool ignore;
|
||
size_t i, count, tmp_count;
|
||
const uint16_t *compress, *tmp_compress;
|
||
lxb_char_t *p, *end;
|
||
lxb_char_t buf[LXB_URL_BUFFER_NUM_SIZE];
|
||
|
||
p = buf;
|
||
end = p + LXB_URL_BUFFER_NUM_SIZE;
|
||
|
||
count = 0;
|
||
tmp_count = (size_t) (ipv6[0] == 0);
|
||
compress = NULL;
|
||
tmp_compress = ipv6;
|
||
|
||
for (i = 1; i < 8; i++) {
|
||
if (ipv6[i] == 0) {
|
||
if (ipv6[i - 1] == 0) {
|
||
tmp_count += 1;
|
||
}
|
||
else {
|
||
tmp_count = 1;
|
||
tmp_compress = &ipv6[i];
|
||
}
|
||
}
|
||
else if (tmp_count > count) {
|
||
compress = tmp_compress;
|
||
count = tmp_count;
|
||
}
|
||
}
|
||
|
||
if (tmp_count > count) {
|
||
compress = tmp_compress;
|
||
count = tmp_count;
|
||
}
|
||
|
||
if (compress == &ipv6[1] && ipv6[0] == 0) {
|
||
compress = ipv6;
|
||
}
|
||
|
||
if (count < 2) {
|
||
compress = NULL;
|
||
}
|
||
|
||
ignore = false;
|
||
|
||
for (i = 0; i < 8; i++) {
|
||
if (ignore) {
|
||
if (ipv6[i] == 0) {
|
||
continue;
|
||
}
|
||
|
||
ignore = false;
|
||
}
|
||
|
||
if (compress == &ipv6[i]) {
|
||
*p++ = ':';
|
||
|
||
if (i == 0) {
|
||
*p++ = ':';
|
||
}
|
||
|
||
i += count - 1;
|
||
ignore = true;
|
||
|
||
continue;
|
||
}
|
||
|
||
p += lexbor_conv_dec_to_hex(ipv6[i], p, end - p, false);
|
||
|
||
if (i != 7) {
|
||
*p++ = ':';
|
||
}
|
||
}
|
||
|
||
*p = '\0';
|
||
|
||
return cb(buf, p - buf, ctx);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_port(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
lxb_char_t *p;
|
||
lxb_char_t buf[LXB_URL_BUFFER_NUM_SIZE];
|
||
|
||
if (url->has_port) {
|
||
p = buf + lexbor_conv_int64_to_data((int64_t) url->port,
|
||
buf, LXB_URL_BUFFER_NUM_SIZE);
|
||
return cb(buf, p - buf, ctx);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_path(const lxb_url_path_t *path,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
return cb(path->str.data, path->str.length, ctx);
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_query(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
if (url->query.data != NULL) {
|
||
return cb(url->query.data, url->query.length, ctx);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_serialize_fragment(const lxb_url_t *url,
|
||
lexbor_serialize_cb_f cb, void *ctx)
|
||
{
|
||
if (url->query.data != NULL) {
|
||
return cb(url->fragment.data, url->fragment.length, ctx);
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_url_t *
|
||
lxb_url_clone(lexbor_mraw_t *mraw, const lxb_url_t *url)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_t *new_url;
|
||
|
||
new_url = lexbor_mraw_calloc(mraw, sizeof(lxb_url_t));
|
||
if (new_url == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
new_url->mraw = mraw;
|
||
|
||
status = lxb_url_scheme_copy(&url->scheme, &new_url->scheme, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_username_copy(&url->username, &new_url->username, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_password_copy(&url->password, &new_url->password, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_host_copy(&url->host, &new_url->host, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
new_url->port = url->port;
|
||
new_url->has_port = url->has_port;
|
||
|
||
status = lxb_url_path_copy(url, new_url);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_query_copy(&url->query, &new_url->query, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_str_copy(&url->fragment, &new_url->fragment, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
return new_url;
|
||
|
||
failed:
|
||
|
||
lxb_url_destroy(new_url);
|
||
|
||
return NULL;
|
||
}
|
||
|
||
lxb_url_search_params_t *
|
||
lxb_url_search_params_init(lexbor_mraw_t *mraw,
|
||
const lxb_char_t *query, size_t length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_search_params_t *sp;
|
||
|
||
sp = lexbor_mraw_calloc(mraw, sizeof(lxb_url_search_params_t));
|
||
if (sp == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
sp->mraw = mraw;
|
||
sp->length = 0;
|
||
|
||
status = lxb_url_search_params_parse(sp, query, length);
|
||
if (status != LXB_STATUS_OK) {
|
||
return lxb_url_search_params_destroy(sp);
|
||
}
|
||
|
||
return sp;
|
||
}
|
||
|
||
lxb_inline lxb_url_search_entry_t *
|
||
lxb_url_search_params_entry(lxb_url_search_params_t *search_params,
|
||
lxb_url_search_entry_t *last, lexbor_mraw_t *mraw,
|
||
const lxb_char_t *query, const lxb_char_t *p,
|
||
lxb_status_t *status)
|
||
{
|
||
lxb_url_search_entry_t *entry;
|
||
|
||
entry = lexbor_mraw_calloc(mraw,
|
||
sizeof(lxb_url_search_entry_t));
|
||
if (entry == NULL) {
|
||
*status = LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
return NULL;
|
||
}
|
||
|
||
last->next = entry;
|
||
entry->prev = last;
|
||
entry->next = NULL;
|
||
|
||
search_params->length += 1;
|
||
|
||
*status = lxb_url_percent_decode_plus(query, p, &entry->name, mraw);
|
||
|
||
return entry;
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_search_params_parse(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *query, size_t length)
|
||
{
|
||
lxb_char_t c;
|
||
lxb_status_t status;
|
||
const lxb_char_t *p, *end;
|
||
lexbor_mraw_t *mraw;
|
||
lxb_url_search_entry_t first, *last, *entry;
|
||
|
||
if (query == NULL || length == 0) {
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
if (*query == '?') {
|
||
query += 1;
|
||
length -= 1;
|
||
}
|
||
|
||
p = query;
|
||
end = p + length;
|
||
mraw = search_params->mraw;
|
||
last = &first;
|
||
last->next = NULL;
|
||
last->prev = NULL;
|
||
|
||
while (p < end) {
|
||
c = *p++;
|
||
|
||
if (c == '=') {
|
||
entry = lxb_url_search_params_entry(search_params, last, mraw,
|
||
query, p - 1, &status);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
last = entry;
|
||
query = p;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c == '&') {
|
||
break;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
status = lxb_url_percent_decode_plus(query, p,
|
||
&entry->value, mraw);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
p += 1;
|
||
query = p;
|
||
}
|
||
else if (c == '&') {
|
||
entry = lxb_url_search_params_entry(search_params, last, mraw,
|
||
query, p - 1, &status);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
last = entry;
|
||
|
||
lexbor_str_init(&entry->value, mraw, 0);
|
||
if (entry->value.data == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
query = p;
|
||
}
|
||
}
|
||
|
||
if (query < p) {
|
||
entry = lxb_url_search_params_entry(search_params, last, mraw,
|
||
query, p, &status);
|
||
if (status != LXB_STATUS_OK) {
|
||
return status;
|
||
}
|
||
|
||
lexbor_str_init(&entry->value, mraw, 0);
|
||
if (entry->value.data == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
search_params->first = first.next;
|
||
|
||
if (first.next != NULL) {
|
||
search_params->last = last;
|
||
search_params->first->prev = NULL;
|
||
}
|
||
else {
|
||
search_params->last = NULL;
|
||
}
|
||
|
||
return LXB_STATUS_OK;
|
||
}
|
||
|
||
lxb_url_search_params_t *
|
||
lxb_url_search_params_destroy(lxb_url_search_params_t *search_params)
|
||
{
|
||
lxb_url_search_entry_t *entry, *next;
|
||
lexbor_mraw_t *mraw;
|
||
|
||
if (search_params == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
entry = search_params->first;
|
||
mraw = search_params->mraw;
|
||
|
||
while (entry != NULL) {
|
||
next = entry->next;
|
||
|
||
lexbor_str_destroy(&entry->name, mraw, false);
|
||
lexbor_str_destroy(&entry->value, mraw, false);
|
||
lexbor_mraw_free(mraw, entry);
|
||
|
||
entry = next;
|
||
}
|
||
|
||
return lexbor_mraw_free(mraw, search_params);
|
||
}
|
||
|
||
static void
|
||
lxb_url_search_params_entry_destroy(lxb_url_search_params_t *search_params,
|
||
lxb_url_search_entry_t *entry)
|
||
{
|
||
lexbor_mraw_t *mraw = search_params->mraw;
|
||
|
||
lexbor_str_destroy(&entry->name, mraw, false);
|
||
lexbor_str_destroy(&entry->value, mraw, false);
|
||
lexbor_mraw_free(mraw, entry);
|
||
}
|
||
|
||
static lxb_status_t
|
||
lxb_url_search_params_to_str(lexbor_str_t *str, lexbor_mraw_t *mraw,
|
||
const lxb_char_t *data, size_t length)
|
||
{
|
||
if (data == NULL || length == 0) {
|
||
lexbor_str_init(str, mraw, 0);
|
||
}
|
||
else {
|
||
lexbor_str_init_append(str, mraw, data, length);
|
||
}
|
||
|
||
return str->data != NULL ? LXB_STATUS_OK
|
||
: LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
|
||
lxb_url_search_entry_t *
|
||
lxb_url_search_params_append(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length)
|
||
{
|
||
lxb_status_t status;
|
||
lxb_url_search_entry_t *entry;
|
||
|
||
entry = lexbor_mraw_calloc(search_params->mraw,
|
||
sizeof(lxb_url_search_entry_t));
|
||
if (entry == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
status = lxb_url_search_params_to_str(&entry->name, search_params->mraw,
|
||
name, name_length);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_search_params_to_str(&entry->value, search_params->mraw,
|
||
value, value_length);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
if (search_params->first == NULL) {
|
||
search_params->first = entry;
|
||
}
|
||
|
||
if (search_params->last != NULL) {
|
||
search_params->last->next = entry;
|
||
}
|
||
|
||
entry->prev = search_params->last;
|
||
entry->next = NULL;
|
||
search_params->last = entry;
|
||
|
||
search_params->length += 1;
|
||
|
||
return entry;
|
||
|
||
failed:
|
||
|
||
if (entry->name.data != NULL) {
|
||
lexbor_mraw_free(search_params->mraw, entry->name.data);
|
||
}
|
||
|
||
if (entry->value.data != NULL) {
|
||
lexbor_mraw_free(search_params->mraw, entry->value.data);
|
||
}
|
||
|
||
lexbor_mraw_free(search_params->mraw, entry);
|
||
|
||
return NULL;
|
||
}
|
||
|
||
static lexbor_action_t
|
||
lxb_url_search_params_delete_cb(lxb_url_search_params_t *sp,
|
||
lxb_url_search_entry_t *entry, void *ctx)
|
||
{
|
||
if (sp->first == entry) {
|
||
sp->first = entry->next;
|
||
}
|
||
|
||
if (sp->last == entry) {
|
||
sp->last = entry->prev;
|
||
}
|
||
|
||
if (entry->next != NULL) {
|
||
entry->next->prev = entry->prev;
|
||
}
|
||
|
||
if (entry->prev != NULL) {
|
||
entry->prev->next = entry->next;
|
||
}
|
||
|
||
lxb_url_search_params_entry_destroy(sp, entry);
|
||
|
||
sp->length -= 1;
|
||
|
||
return LEXBOR_ACTION_OK;
|
||
}
|
||
|
||
void
|
||
lxb_url_search_params_delete(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length)
|
||
{
|
||
lxb_url_search_params_match(search_params, name, name_length,
|
||
value, value_length,
|
||
lxb_url_search_params_delete_cb, NULL);
|
||
}
|
||
|
||
lxb_url_search_entry_t *
|
||
lxb_url_search_params_get_entry(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t length)
|
||
{
|
||
lexbor_str_t *str;
|
||
lxb_url_search_entry_t *entry;
|
||
|
||
entry = search_params->first;
|
||
|
||
while (entry != NULL) {
|
||
str = &entry->name;
|
||
|
||
if (str->length == length
|
||
&& memcmp(str->data, name, length) == 0)
|
||
{
|
||
return entry;
|
||
}
|
||
|
||
entry = entry->next;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
lexbor_str_t *
|
||
lxb_url_search_params_get(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t length)
|
||
{
|
||
lxb_url_search_entry_t *entry;
|
||
|
||
entry = lxb_url_search_params_get_entry(search_params, name, length);
|
||
if (entry == NULL) {
|
||
return NULL;
|
||
}
|
||
|
||
return &entry->value;
|
||
}
|
||
|
||
static lexbor_action_t
|
||
lxb_url_search_params_get_all_cb(lxb_url_search_params_t *sp,
|
||
lxb_url_search_entry_t *entry, void *context)
|
||
{
|
||
lxb_url_search_params_ctx_t *ctx = context;
|
||
|
||
if (ctx->out_length >= ctx->out_size) {
|
||
return LEXBOR_ACTION_STOP;
|
||
}
|
||
|
||
ctx->out_buf[ctx->out_length] = &entry->value;
|
||
ctx->out_length += 1;
|
||
|
||
return LEXBOR_ACTION_OK;
|
||
}
|
||
|
||
size_t
|
||
lxb_url_search_params_get_all(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t length,
|
||
lexbor_str_t **out_buf, size_t out_size)
|
||
{
|
||
lxb_url_search_params_ctx_t ctx;
|
||
|
||
ctx.out_buf = out_buf;
|
||
ctx.out_length = 0;
|
||
ctx.out_size = out_size;
|
||
|
||
lxb_url_search_params_match(search_params, name, length, NULL, 0,
|
||
lxb_url_search_params_get_all_cb, &ctx);
|
||
return ctx.out_length;
|
||
}
|
||
|
||
static lexbor_action_t
|
||
lxb_url_search_params_get_count_cb(lxb_url_search_params_t *sp,
|
||
lxb_url_search_entry_t *entry, void *context)
|
||
{
|
||
size_t *count = context;
|
||
|
||
*count += 1;
|
||
|
||
return LEXBOR_ACTION_OK;
|
||
}
|
||
|
||
size_t
|
||
lxb_url_search_params_get_count(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t length)
|
||
{
|
||
size_t count = 0;
|
||
|
||
lxb_url_search_params_match(search_params, name, length, NULL, 0,
|
||
lxb_url_search_params_get_count_cb, &count);
|
||
return count;
|
||
}
|
||
|
||
lxb_url_search_entry_t *
|
||
lxb_url_search_params_match_entry(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length,
|
||
lxb_url_search_entry_t *entry)
|
||
{
|
||
lexbor_str_t *str;
|
||
|
||
if (entry == NULL) {
|
||
entry = search_params->first;
|
||
}
|
||
|
||
while (entry != NULL) {
|
||
str = &entry->name;
|
||
|
||
if (str->length == name_length
|
||
&& memcmp(str->data, name, name_length) == 0)
|
||
{
|
||
if (value != NULL) {
|
||
str = &entry->value;
|
||
|
||
if (str->length == value_length
|
||
&& memcmp(str->data, value, value_length) == 0)
|
||
{
|
||
return entry;
|
||
}
|
||
}
|
||
else {
|
||
return entry;
|
||
}
|
||
}
|
||
|
||
entry = entry->next;
|
||
}
|
||
|
||
return NULL;
|
||
}
|
||
|
||
void
|
||
lxb_url_search_params_match(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length,
|
||
lxb_url_search_params_match_f cb, void *ctx)
|
||
{
|
||
lexbor_str_t *str;
|
||
lexbor_action_t action;
|
||
lxb_url_search_entry_t *entry, *next;
|
||
|
||
entry = search_params->first;
|
||
|
||
while (entry != NULL) {
|
||
str = &entry->name;
|
||
next = entry->next;
|
||
|
||
if (str->length == name_length
|
||
&& memcmp(str->data, name, name_length) == 0)
|
||
{
|
||
if (value != NULL) {
|
||
str = &entry->value;
|
||
|
||
if (str->length == value_length
|
||
&& memcmp(str->data, value, value_length) == 0)
|
||
{
|
||
action = cb(search_params, entry, ctx);
|
||
if (action == LEXBOR_ACTION_STOP) {
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
else {
|
||
action = cb(search_params, entry, ctx);
|
||
if (action == LEXBOR_ACTION_STOP) {
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
entry = next;
|
||
}
|
||
}
|
||
|
||
static lexbor_action_t
|
||
lxb_url_search_params_has_cb(lxb_url_search_params_t *sp,
|
||
lxb_url_search_entry_t *entry, void *ctx)
|
||
{
|
||
bool *is = ctx;
|
||
|
||
*is = true;
|
||
|
||
return LEXBOR_ACTION_STOP;
|
||
}
|
||
|
||
bool
|
||
lxb_url_search_params_has(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length)
|
||
{
|
||
bool is = false;
|
||
|
||
lxb_url_search_params_match(search_params, name, name_length,
|
||
value, value_length,
|
||
lxb_url_search_params_has_cb, &is);
|
||
return is;
|
||
}
|
||
|
||
lxb_url_search_entry_t *
|
||
lxb_url_search_params_set(lxb_url_search_params_t *search_params,
|
||
const lxb_char_t *name, size_t name_length,
|
||
const lxb_char_t *value, size_t value_length)
|
||
{
|
||
bool changed;
|
||
lxb_status_t status;
|
||
lexbor_str_t *str, str_name, str_value;
|
||
lexbor_mraw_t *mraw;
|
||
lxb_url_search_entry_t *entry, *next, *root;
|
||
|
||
mraw = search_params->mraw;
|
||
entry = search_params->first;
|
||
changed = false;
|
||
root = NULL;
|
||
|
||
while (entry != NULL) {
|
||
str = &entry->name;
|
||
next = entry->next;
|
||
|
||
if (str->length == name_length
|
||
&& memcmp(str->data, name, name_length) == 0)
|
||
{
|
||
if (changed) {
|
||
lxb_url_search_params_delete_cb(search_params, entry, NULL);
|
||
entry = next;
|
||
continue;
|
||
}
|
||
|
||
str_name = entry->name;
|
||
str_value = entry->value;
|
||
|
||
status = lxb_url_search_params_to_str(&entry->name, mraw,
|
||
name, name_length);
|
||
if (status != LXB_STATUS_OK) {
|
||
goto failed;
|
||
}
|
||
|
||
status = lxb_url_search_params_to_str(&entry->value, mraw,
|
||
value, value_length);
|
||
if (status != LXB_STATUS_OK) {
|
||
lexbor_str_destroy(&entry->name, mraw, false);
|
||
goto failed;
|
||
}
|
||
|
||
lexbor_str_destroy(&str_name, mraw, false);
|
||
lexbor_str_destroy(&str_value, mraw, false);
|
||
|
||
changed = true;
|
||
root = entry;
|
||
}
|
||
|
||
entry = next;
|
||
}
|
||
|
||
if (!changed) {
|
||
return lxb_url_search_params_append(search_params, name, name_length,
|
||
value, value_length);
|
||
}
|
||
|
||
return root;
|
||
|
||
failed:
|
||
|
||
entry->name = str_name;
|
||
entry->value = str_value;
|
||
|
||
return NULL;
|
||
}
|
||
|
||
lxb_inline int
|
||
lxb_url_search_params_compare(lxb_url_search_entry_t *first,
|
||
lxb_url_search_entry_t *second)
|
||
{
|
||
return strcmp((const char *) first->name.data,
|
||
(const char *) second->name.data);
|
||
}
|
||
|
||
void
|
||
lxb_url_search_params_sort(lxb_url_search_params_t *search_params)
|
||
{
|
||
lxb_url_search_entry_t *root, *new_root, *node, *current;
|
||
|
||
root = search_params->first;
|
||
|
||
if (root == NULL) {
|
||
return;
|
||
}
|
||
|
||
node = root;
|
||
root = root->next;
|
||
|
||
node->next = NULL;
|
||
new_root = node;
|
||
|
||
while (root != NULL) {
|
||
node = root;
|
||
root = root->next;
|
||
|
||
if (lxb_url_search_params_compare(node, new_root) < 0) {
|
||
node->next = new_root;
|
||
new_root->prev = node;
|
||
new_root = node;
|
||
}
|
||
else {
|
||
current = new_root;
|
||
|
||
while (current->next != NULL
|
||
&& lxb_url_search_params_compare(node, current->next) >= 0)
|
||
{
|
||
current = current->next;
|
||
}
|
||
|
||
node->next = current->next;
|
||
|
||
if (current->next != NULL) {
|
||
current->next->prev = node;
|
||
}
|
||
|
||
current->next = node;
|
||
node->prev = current;
|
||
}
|
||
}
|
||
|
||
new_root->prev = NULL;
|
||
|
||
search_params->first = new_root;
|
||
search_params->last = node->next != NULL ? node->next : node;
|
||
}
|
||
|
||
static size_t
|
||
lxb_url_search_params_length(const lexbor_str_t *str)
|
||
{
|
||
size_t length;
|
||
const lxb_char_t *p, *end;
|
||
|
||
p = str->data;
|
||
end = p + str->length;
|
||
length = str->length;
|
||
|
||
while (p < end) {
|
||
if (lxb_url_map[*p++] & LXB_URL_MAP_X_WWW_FORM) {
|
||
length += 2;
|
||
}
|
||
}
|
||
|
||
return length;
|
||
}
|
||
|
||
static lxb_char_t *
|
||
lxb_url_search_params_to_buf(const lexbor_str_t *str, lxb_char_t *buf)
|
||
{
|
||
lxb_char_t c;
|
||
const lxb_char_t *p, *end;
|
||
|
||
p = str->data;
|
||
end = p + str->length;
|
||
|
||
while (p < end) {
|
||
c = *p;
|
||
|
||
if (c == ' ') {
|
||
*buf++ = '+';
|
||
}
|
||
else if (lxb_url_map[c] & LXB_URL_MAP_X_WWW_FORM) {
|
||
*buf++ = '%';
|
||
*buf++ = lexbor_str_res_char_to_two_hex_value[c][0];
|
||
*buf++ = lexbor_str_res_char_to_two_hex_value[c][1];
|
||
}
|
||
else {
|
||
*buf++ = c;
|
||
}
|
||
|
||
p += 1;
|
||
}
|
||
|
||
return buf;
|
||
}
|
||
|
||
lxb_status_t
|
||
lxb_url_search_params_serialize(lxb_url_search_params_t *search_params,
|
||
lexbor_callback_f cb, void *ctx)
|
||
{
|
||
size_t length;
|
||
lxb_status_t status;
|
||
lxb_char_t *p, *begin;
|
||
lxb_url_search_entry_t *entry;
|
||
lxb_char_t buf[1024];
|
||
|
||
entry = search_params->first;
|
||
|
||
if (entry == NULL) {
|
||
buf[0] = '\0';
|
||
return cb(buf, 0, ctx);
|
||
}
|
||
|
||
/* Calc length. */
|
||
|
||
length = 0;
|
||
|
||
while (entry != NULL) {
|
||
length += lxb_url_search_params_length(&entry->name);
|
||
length += lxb_url_search_params_length(&entry->value);
|
||
length += 2; /* = and & */
|
||
|
||
entry = entry->next;
|
||
}
|
||
|
||
if (length < sizeof(buf)) {
|
||
p = buf;
|
||
}
|
||
else {
|
||
p = lexbor_malloc(length);
|
||
if (p == NULL) {
|
||
return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
|
||
}
|
||
}
|
||
|
||
/* To buffer. */
|
||
|
||
begin = p;
|
||
entry = search_params->first;
|
||
|
||
p = lxb_url_search_params_to_buf(&entry->name, p);
|
||
*p++ = '=';
|
||
p = lxb_url_search_params_to_buf(&entry->value, p);
|
||
|
||
entry = entry->next;
|
||
|
||
while (entry != NULL) {
|
||
*p++ = '&';
|
||
|
||
p = lxb_url_search_params_to_buf(&entry->name, p);
|
||
*p++ = '=';
|
||
p = lxb_url_search_params_to_buf(&entry->value, p);
|
||
|
||
entry = entry->next;
|
||
}
|
||
|
||
*p = '\0';
|
||
|
||
status = cb(begin, p - begin, ctx);
|
||
|
||
if (buf != begin) {
|
||
lexbor_free(begin);
|
||
}
|
||
|
||
return status;
|
||
}
|