From 9befad6fc2abe124c1eab80e3dc33e044d3d5db2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 17 Nov 2016 23:18:05 +0100 Subject: [PATCH 1/3] Cleanup parse_url() gotos Simplify some unnecessarily complicated code. In particular the length updates are unnecessary (length is only used at the very start) and we're goto'ing around a bit too much. --- ext/standard/url.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/ext/standard/url.c b/ext/standard/url.c index 962718459ae..94b9f20a16d 100644 --- a/ext/standard/url.c +++ b/ext/standard/url.c @@ -122,7 +122,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) if (*(e + 1) == '\0') { /* only scheme is available */ ret->scheme = estrndup(s, (e - s)); php_replace_controlchars_ex(ret->scheme, (e - s)); - goto end; + return ret; } /* @@ -145,8 +145,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); - length -= ++e - s; - s = e; + s = e + 1; goto just_path; } else { ret->scheme = estrndup(s, (e-s)); @@ -162,18 +161,12 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) if (*(e + 5) == ':') { s = e + 4; } - goto nohost; + goto just_path; } } } else { - if (!strncasecmp("file", ret->scheme, sizeof("file"))) { - s = e + 1; - goto nohost; - } else { - length -= ++e - s; - s = e; - goto just_path; - } + s = e + 1; + goto just_path; } } } else if (e) { /* no scheme; starts with colon: look for port */ @@ -212,9 +205,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) } else if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } else { - just_path: - ue = s + length; - goto nohost; + goto just_path; } e = s + strcspn(s, "/?#"); @@ -296,7 +287,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) s = e; - nohost: + just_path: if ((p = memchr(s, '?', (ue - s)))) { pp = memchr(s, '#', (ue - s)); @@ -343,7 +334,6 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) ret->path = estrndup(s, (ue-s)); php_replace_controlchars_ex(ret->path, (ue - s)); } -end: return ret; } /* }}} */ From f0f68c72744a42a1c376a832dd01c012c0929e88 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 18 Nov 2016 17:00:56 +0100 Subject: [PATCH 2/3] Cleanup parse_url() query/fragment handling The query/fragment handling was pretty convoluted, with many parts being duplicated. Simplify by checking for fragment, then for query, then for path. --- ext/standard/url.c | 65 ++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 42 deletions(-) diff --git a/ext/standard/url.c b/ext/standard/url.c index 94b9f20a16d..aa1b0312fec 100644 --- a/ext/standard/url.c +++ b/ext/standard/url.c @@ -289,51 +289,32 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) just_path: - if ((p = memchr(s, '?', (ue - s)))) { - pp = memchr(s, '#', (ue - s)); - - if (pp && pp < p) { - if (pp - s) { - ret->path = estrndup(s, (pp-s)); - php_replace_controlchars_ex(ret->path, (pp - s)); - } - p = pp; - goto label_parse; - } - - if (p - s) { - ret->path = estrndup(s, (p-s)); - php_replace_controlchars_ex(ret->path, (p - s)); - } - - if (pp) { - if (pp - ++p) { - ret->query = estrndup(p, (pp-p)); - php_replace_controlchars_ex(ret->query, (pp - p)); - } - p = pp; - goto label_parse; - } else if (++p - ue) { - ret->query = estrndup(p, (ue-p)); - php_replace_controlchars_ex(ret->query, (ue - p)); - } - } else if ((p = memchr(s, '#', (ue - s)))) { - if (p - s) { - ret->path = estrndup(s, (p-s)); - php_replace_controlchars_ex(ret->path, (p - s)); - } - - label_parse: + e = ue; + p = memchr(s, '#', (e - s)); + if (p) { p++; - - if (ue - p) { - ret->fragment = estrndup(p, (ue-p)); - php_replace_controlchars_ex(ret->fragment, (ue - p)); + if (p < e) { + ret->fragment = estrndup(p, (e - p)); + php_replace_controlchars_ex(ret->fragment, (e - p)); } - } else { - ret->path = estrndup(s, (ue-s)); - php_replace_controlchars_ex(ret->path, (ue - s)); + e = p-1; } + + p = memchr(s, '?', (e - s)); + if (p) { + p++; + if (p < e) { + ret->query = estrndup(p, (e - p)); + php_replace_controlchars_ex(ret->query, (e - p)); + } + e = p-1; + } + + if (s < e || s == ue) { + ret->path = estrndup(s, (e - s)); + php_replace_controlchars_ex(ret->path, (e - s)); + } + return ret; } /* }}} */ From 2d19c92fc2f14aa97db9094eaa0b67d1c3b12409 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 18 Nov 2016 16:41:13 +0100 Subject: [PATCH 3/3] Make php_url_parse_ex() respect length argument This should fix all out-of-bounds reads that could previously occur if the string passed to php_url_parse_ex() is not NUL terminated. --- ext/standard/url.c | 48 +++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/ext/standard/url.c b/ext/standard/url.c index aa1b0312fec..6ecace53e55 100644 --- a/ext/standard/url.c +++ b/ext/standard/url.c @@ -104,7 +104,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) ue = s + length; /* parse scheme */ - if ((e = memchr(s, ':', length)) && (e - s)) { + if ((e = memchr(s, ':', length)) && e != s) { /* validate scheme */ p = s; while (p < e) { @@ -119,7 +119,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) p++; } - if (*(e + 1) == '\0') { /* only scheme is available */ + if (e + 1 == ue) { /* only scheme is available */ ret->scheme = estrndup(s, (e - s)); php_replace_controlchars_ex(ret->scheme, (e - s)); return ret; @@ -134,11 +134,11 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) * correctly parse things like a.com:80 */ p = e + 1; - while (isdigit(*p)) { + while (p < ue && isdigit(*p)) { p++; } - if ((*p == '\0' || *p == '/') && (p - e) < 7) { + if ((p == ue || *p == '/') && (p - e) < 7) { goto parse_port; } @@ -151,14 +151,14 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) ret->scheme = estrndup(s, (e-s)); php_replace_controlchars_ex(ret->scheme, (e - s)); - if (*(e+2) == '/') { + if (e + 2 < ue && *(e + 2) == '/') { s = e + 3; if (!strncasecmp("file", ret->scheme, sizeof("file"))) { - if (*(e + 3) == '/') { + if (e + 3 < ue && *(e + 3) == '/') { /* support windows drive letters as in: file:///c:/somedir/file.txt */ - if (*(e + 5) == ':') { + if (e + 5 < ue && *(e + 5) == ':') { s = e + 4; } goto just_path; @@ -174,18 +174,18 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) p = e + 1; pp = p; - while (pp-p < 6 && isdigit(*pp)) { + while (pp < ue && pp - p < 6 && isdigit(*pp)) { pp++; } - if (pp - p > 0 && pp - p < 6 && (*pp == '/' || *pp == '\0')) { + if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) { long port; memcpy(port_buf, p, (pp - p)); port_buf[pp - p] = '\0'; port = strtol(port_buf, NULL, 10); if (port > 0 && port <= 65535) { ret->port = (unsigned short) port; - if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ + if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } } else { @@ -193,22 +193,32 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) efree(ret); return NULL; } - } else if (p == pp && *pp == '\0') { + } else if (p == pp && pp == ue) { STR_FREE(ret->scheme); efree(ret); return NULL; - } else if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ + } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } else { goto just_path; } - } else if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ + } else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */ s += 2; } else { goto just_path; } - e = s + strcspn(s, "/?#"); + /* Binary-safe strcspn(s, "/?#") */ + e = ue; + if ((p = memchr(s, '/', e - s))) { + e = p; + } + if ((p = memchr(s, '?', e - s))) { + e = p; + } + if ((p = memchr(s, '#', e - s))) { + e = p; + } /* check for login and password */ if ((p = zend_memrchr(s, '@', (e-s)))) { @@ -228,18 +238,16 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length) } /* check for port */ - if (*s == '[' && *(e-1) == ']') { + if (s < ue && *s == '[' && *(e-1) == ']') { /* Short circuit portscan, we're dealing with an IPv6 embedded address */ - p = s; + p = NULL; } else { - /* memrchr is a GNU specific extension - Emulate for wide compatibility */ - for(p = e; p >= s && *p != ':'; p--); + p = zend_memrchr(s, ':', (e-s)); } - if (p >= s && *p == ':') { + if (p) { if (!ret->port) { p++; if (e-p > 5) { /* port cannot be longer then 5 characters */