Mercurial > dovecot > original-hg > dovecot-2.2
changeset 16747:29ceb7126b91
lib-http: Added support for parsing request target URLs.
author | Stephan Bosch <stephan@rename-it.nl> |
---|---|
date | Sun, 15 Sep 2013 03:47:29 +0300 |
parents | bbe4a469e276 |
children | eeaa68773f73 |
files | src/lib-http/http-request.h src/lib-http/http-url.c src/lib-http/http-url.h |
diffstat | 3 files changed, 245 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-http/http-request.h Sun Sep 15 03:46:25 2013 +0300 +++ b/src/lib-http/http-request.h Sun Sep 15 03:47:29 2013 +0300 @@ -3,6 +3,20 @@ #include "http-header.h" +struct http_url; + +enum http_request_target_format { + HTTP_REQUEST_TARGET_FORMAT_ORIGIN = 0, + HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE, + HTTP_REQUEST_TARGET_FORMAT_AUTHORITY, + HTTP_REQUEST_TARGET_FORMAT_ASTERISK +}; + +struct http_request_target { + enum http_request_target_format format; + struct http_url *url; +}; + struct http_request { const char *method;
--- a/src/lib-http/http-url.c Sun Sep 15 03:46:25 2013 +0300 +++ b/src/lib-http/http-url.c Sun Sep 15 03:47:29 2013 +0300 @@ -5,7 +5,9 @@ #include "strfuncs.h" #include "net.h" #include "uri-util.h" + #include "http-url.h" +#include "http-request.h" /* * HTTP URL parser @@ -19,57 +21,20 @@ struct http_url *url; struct http_url *base; - unsigned int relative:1; + enum http_request_target_format req_format; + + unsigned int relative:1; + unsigned int request_target:1; }; -static bool http_url_do_parse(struct http_url_parser *url_parser) +static bool http_url_parse_authority(struct http_url_parser *url_parser) { struct uri_parser *parser = &url_parser->parser; - struct http_url *url = url_parser->url, *base = url_parser->base; + struct http_url *url = url_parser->url; struct uri_authority auth; - const char *const *path; - bool relative = TRUE, have_path = FALSE; - int path_relative; - const char *part; int ret; - /* RFC 2616 - Hypertext Transfer Protocol, Section 3.2: - * - * http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] - * - * Translated to RFC 3986: - * - * absolute-http-URL = "http:" "//" host [ ":" port ] path-absolute - * ["?" query] [ "#" fragment ] - * relative-http-ref = relative-http-part [ "?" query ] [ "#" fragment ] - * relative-http-part = "//" host [ ":" port ] path-abempty - * / path-absolute - * / path-noscheme - * / path-empty - */ - - /* "http:" / "https:" */ - if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) { - const char *scheme; - - if ((ret = uri_parse_scheme(parser, &scheme)) < 0) - return FALSE; - else if (ret > 0) { - if (strcasecmp(scheme, "https") == 0) { - if (url != NULL) - url->have_ssl = TRUE; - } else if (strcasecmp(scheme, "http") != 0) { - parser->error = "Not an HTTP URL"; - return FALSE; - } - relative = FALSE; - } - } else { - relative = FALSE; - } - - /* "//" host [ ":" port ] */ - if ((ret = uri_parse_slashslash_authority(parser, &auth)) < 0) + if ((ret = uri_parse_authority(parser, &auth)) < 0) return FALSE; if (ret > 0) { if (auth.enc_userinfo != NULL) { @@ -87,19 +52,153 @@ parser->error = "HTTP URL does not allow `userinfo@' part"; return FALSE; } - relative = FALSE; - } else if (!relative) { - parser->error = "Absolute HTTP URL requires `//' after `http:'"; - return FALSE; } - - if (ret > 0 && url != NULL) { + if (url != NULL) { url->host_name = p_strdup(parser->pool, auth.host_literal); url->host_ip = auth.host_ip; url->have_host_ip = auth.have_host_ip; url->port = auth.port; url->have_port = auth.have_port; } + return TRUE; +} + +static bool http_url_parse_authority_form(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + + if (!http_url_parse_authority(url_parser)) + return FALSE; + if (parser->cur != parser->end) + return FALSE; + url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY; + return TRUE; +} + +static bool http_url_do_parse(struct http_url_parser *url_parser) +{ + struct uri_parser *parser = &url_parser->parser; + struct http_url *url = url_parser->url, *base = url_parser->base; + const char *const *path; + bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE, + have_path = FALSE; + int path_relative; + const char *part; + int ret; + + /* + http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-23 + Appendix C: + + http-URI = "http://" authority path-abempty [ "?" query ] + [ "#" fragment ] + https-URI = "https://" authority path-abempty [ "?" query ] + [ "#" fragment ] + partial-URI = relative-part [ "?" query ] + + request-target = origin-form / absolute-form / authority-form / + asterisk-form + + origin-form = absolute-path [ "?" query ] + absolute-form = absolute-URI + authority-form = authority + asterisk-form = "*" + ; Not parsed here + + absolute-path = 1*( "/" segment ) + + http://tools.ietf.org/html/rfc3986 + Appendix A: (implemented in uri-util.h) + + absolute-URI = scheme ":" hier-part [ "?" query ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + authority = [ userinfo "@" ] host [ ":" port ] + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0<pchar> + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + query = *( pchar / "/" / "?" ) + fragment = *( pchar / "/" / "?" ) + */ + + /* "http:" / "https:" */ + if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0) { + const char *scheme; + + if ((ret = uri_parse_scheme(parser, &scheme)) < 0) + return FALSE; + else if (ret > 0) { + if (strcasecmp(scheme, "https") == 0) { + if (url != NULL) + url->have_ssl = TRUE; + } else if (strcasecmp(scheme, "http") != 0) { + if (url_parser->request_target) { + /* valid as non-HTTP scheme, but also try to parse as authority */ + parser->cur = parser->begin; + if (!http_url_parse_authority_form(url_parser)) { + url_parser->url = NULL; /* indicate non-http-url */ + url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; + } + return TRUE; + } + parser->error = "Not an HTTP URL"; + return FALSE; + } + relative = FALSE; + have_scheme = TRUE; + } + } else { + relative = FALSE; + have_scheme = TRUE; + } + + /* "//" authority ; or + * ["//"] authority ; when parsing a request target + */ + if (parser->cur < parser->end && parser->cur[0] == '/') { + if (parser->cur+1 < parser->end && parser->cur[1] == '/') { + parser->cur += 2; + relative = FALSE; + have_authority = TRUE; + } else { + /* start of absolute-path */ + } + } else if (url_parser->request_target && !have_scheme) { + if (!http_url_parse_authority_form(url_parser)) { + /* not non-HTTP scheme and invalid as authority-form */ + parser->error = "Request target is invalid"; + return FALSE; + } + return TRUE; + } + + if (have_scheme && !have_authority) { + parser->error = "Absolute HTTP URL requires `//' after `http:'"; + return FALSE; + } + + if (have_authority) { + if (!http_url_parse_authority(url_parser)) + return FALSE; + } /* path-abempty / path-absolute / path-noscheme / path-empty */ if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0) @@ -108,14 +207,15 @@ /* Relative URLs are only valid when we have a base URL */ if (relative) { if (base == NULL) { - parser->error = "Relative URL not allowed"; + parser->error = "Relative HTTP URL not allowed"; return FALSE; - } else if (url != NULL) { - url->host_name = p_strdup_empty(parser->pool, base->host_name); + } else if (!have_authority && url != NULL) { + url->host_name = p_strdup(parser->pool, base->host_name); url->host_ip = base->host_ip; url->have_host_ip = base->have_host_ip; url->port = base->port; url->have_port = base->have_port; + url->have_ssl = base->have_ssl; } url_parser->relative = TRUE; @@ -152,7 +252,7 @@ if (url != NULL && pend > pbegin) str_append_n(fullpath, pbegin, pend-pbegin); } - + /* append relative path */ while (*path != NULL) { if (!uri_data_decode(parser, *path, NULL, &part)) @@ -161,7 +261,7 @@ if (url != NULL) { str_append_c(fullpath, '/'); str_append(fullpath, part); - } + } path++; } @@ -170,7 +270,7 @@ } else if (relative && url != NULL) { url->path = p_strdup(parser->pool, base->path); } - + /* [ "?" query ] */ if ((ret = uri_parse_query(parser, &part)) < 0) return FALSE; @@ -180,13 +280,13 @@ if (url != NULL) url->enc_query = p_strdup(parser->pool, part); } else if (relative && !have_path && url != NULL) { - url->enc_query = p_strdup(parser->pool, base->enc_query); + url->enc_query = p_strdup(parser->pool, base->enc_query); } /* [ "#" fragment ] */ - if ((ret = uri_parse_fragment(parser, &part)) < 0) + if ((ret = uri_parse_fragment(parser, &part)) < 0) return FALSE; - if (ret > 0) { + if (ret > 0) { if ((url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) { parser->error = "URL fragment not allowed for HTTP URL in this context"; return FALSE; @@ -196,13 +296,16 @@ if (url != NULL) url->enc_fragment = p_strdup(parser->pool, part); } else if (relative && !have_path && url != NULL) { - url->enc_fragment = p_strdup(parser->pool, base->enc_fragment); + url->enc_fragment = p_strdup(parser->pool, base->enc_fragment); } if (parser->cur != parser->end) { - parser->error = "HTTP URL contains invalid character."; + parser->error = "HTTP URL contains invalid character"; return FALSE; } + + if (have_scheme) + url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE; return TRUE; } @@ -233,6 +336,67 @@ return 0; } +int http_url_request_target_parse(const char *request_target, + const char *host_header, pool_t pool, struct http_request_target *target, + const char **error_r) +{ + struct http_url_parser url_parser; + struct uri_parser *parser; + struct uri_authority host; + struct http_url base; + + memset(&url_parser, '\0', sizeof(url_parser)); + parser = &url_parser.parser; + uri_parser_init(parser, pool, host_header); + + if (uri_parse_authority(parser, &host) <= 0) { + parser->error = t_strdup_printf("Invalid Host header: %s", parser->error); + return -1; + } + + if (parser->cur != parser->end || host.enc_userinfo != NULL) { + parser->error = "Invalid Host header: Contains invalid character"; + return -1; + } + + if (request_target[0] == '*' && request_target[1] == '\0') { + struct http_url *url = p_new(pool, struct http_url, 1); + url->host_name = p_strdup(pool, host.host_literal); + url->host_ip = host.host_ip; + url->port = host.port; + url->have_host_ip = host.have_host_ip; + url->have_port = host.have_port; + target->url = url; + target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK; + return 0; + } + + memset(&base, 0, sizeof(base)); + base.host_name = host.host_literal; + base.host_ip = host.host_ip; + base.port = host.port; + base.have_host_ip = host.have_host_ip; + base.have_port = host.have_port; + + memset(parser, '\0', sizeof(*parser)); + uri_parser_init(parser, pool, request_target); + + url_parser.url = p_new(pool, struct http_url, 1); + url_parser.request_target = TRUE; + url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN; + url_parser.base = &base; + url_parser.flags = 0; + + if (!http_url_do_parse(&url_parser)) { + *error_r = url_parser.parser.error; + return -1; + } + + target->url = url_parser.url; + target->format = url_parser.req_format; + return 0; +} + /* * HTTP URL construction */
--- a/src/lib-http/http-url.h Sun Sep 15 03:46:25 2013 +0300 +++ b/src/lib-http/http-url.h Sun Sep 15 03:47:29 2013 +0300 @@ -3,6 +3,8 @@ #include "net.h" +struct http_request_target; + struct http_url { /* server */ const char *host_name; @@ -39,6 +41,10 @@ enum http_url_parse_flags flags, pool_t pool, struct http_url **url_r, const char **error_r); +int http_url_request_target_parse(const char *request_target, + const char *host_header, pool_t pool, + struct http_request_target *target, const char **error_r); + /* * HTTP URL construction */