view src/lib-http/http-request-parser.c @ 19822:554ef83e133c

lib-http: Always set *_obj=NULL first in deinit functions (mainly for consistency) This doesn't usually matter, but in case deinit calls some callback they may be trying to access the partially destroyed object.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Mon, 22 Feb 2016 21:17:26 +0200
parents 2ca3b2969ad8
children 59437f8764c6
line wrap: on
line source

/* Copyright (c) 2013-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "istream.h"
#include "http-url.h"
#include "http-parser.h"
#include "http-message-parser.h"
#include "http-request-parser.h"

#define HTTP_REQUEST_PARSER_MAX_METHOD_LENGTH 32

enum http_request_parser_state {
	HTTP_REQUEST_PARSE_STATE_INIT = 0,
	HTTP_REQUEST_PARSE_STATE_SKIP_LINE,
	HTTP_REQUEST_PARSE_STATE_METHOD,
	HTTP_REQUEST_PARSE_STATE_SP1,
	HTTP_REQUEST_PARSE_STATE_TARGET,
	HTTP_REQUEST_PARSE_STATE_SP2,
	HTTP_REQUEST_PARSE_STATE_VERSION,
	HTTP_REQUEST_PARSE_STATE_CR,
	HTTP_REQUEST_PARSE_STATE_LF,
	HTTP_REQUEST_PARSE_STATE_HEADER
};

struct http_request_parser {
	struct http_message_parser parser;
	enum http_request_parser_state state;

	uoff_t max_target_length;

	enum http_request_parse_error error_code;

	const char *request_method;
	const char *request_target;

	unsigned int skipping_line:1;
};

struct http_request_parser *
http_request_parser_init(struct istream *input,
	const struct http_request_limits *limits)
{
	struct http_request_parser *parser;
	struct http_header_limits hdr_limits;
	uoff_t max_payload_size;

	parser = i_new(struct http_request_parser, 1);
	if (limits != NULL) {
		hdr_limits = limits->header;
		max_payload_size = limits->max_payload_size;
	} else {
		memset(&hdr_limits, 0, sizeof(hdr_limits));
		max_payload_size = 0;
	}

	/* substitute default limits */
	if (parser->max_target_length == 0)
		parser->max_target_length =	HTTP_REQUEST_DEFAULT_MAX_TARGET_LENGTH;
	if (hdr_limits.max_size == 0)
		hdr_limits.max_size =	HTTP_REQUEST_DEFAULT_MAX_HEADER_SIZE;
	if (hdr_limits.max_field_size == 0)
		hdr_limits.max_field_size =	HTTP_REQUEST_DEFAULT_MAX_HEADER_FIELD_SIZE;
	if (hdr_limits.max_fields == 0)
		hdr_limits.max_fields =	HTTP_REQUEST_DEFAULT_MAX_HEADER_FIELDS;
	if (max_payload_size == 0)
		max_payload_size = HTTP_REQUEST_DEFAULT_MAX_PAYLOAD_SIZE;

	http_message_parser_init
		(&parser->parser, input, &hdr_limits, max_payload_size, FALSE);
	return parser;
}

void http_request_parser_deinit(struct http_request_parser **_parser)
{
	struct http_request_parser *parser = *_parser;

	*_parser = NULL;

	http_message_parser_deinit(&parser->parser);
	i_free(parser);
}

static void
http_request_parser_restart(struct http_request_parser *parser,
	pool_t pool)
{
	http_message_parser_restart(&parser->parser, pool);
	parser->request_method = NULL;
	parser->request_target = NULL;
}

static int http_request_parse_method(struct http_request_parser *parser)
{
	const unsigned char *p = parser->parser.cur;

	/* method         = token
	 */
	while (p < parser->parser.end && http_char_is_token(*p))
		p++;

	if ((p - parser->parser.cur) > HTTP_REQUEST_PARSER_MAX_METHOD_LENGTH) {
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_METHOD_TOO_LONG;
		parser->parser.error = "HTTP request method is too long";
		return -1;
	}
	if (p == parser->parser.end)
		return 0;
	parser->request_method =
		p_strdup_until(parser->parser.msg.pool, parser->parser.cur, p);
	parser->parser.cur = p;
	return 1;
}

static int http_request_parse_target(struct http_request_parser *parser)
{
	struct http_message_parser *_parser = &parser->parser;
	const unsigned char *p = parser->parser.cur;

	/* We'll just parse anything up to the first SP or a control char.
	   We could also implement workarounds for buggy HTTP clients and
	   parse anything up to the HTTP-version and return 301 with the
	   target properly encoded (FIXME). */
	while (p < _parser->end && *p > ' ')
		p++;

	/* target is too long when explicit limit is exceeded or when input buffer
	   runs out of space */
	/* FIXME: put limit on full request line rather than target and method
	   separately */
	/* FIXME: is it wise to keep target in stream buffer? It can become very
	   large for some applications, increasing the stream buffer size */
	if ((uoff_t)(p - _parser->cur) > parser->max_target_length ||
		(p == _parser->end && ((uoff_t)(p - _parser->cur) >=
			i_stream_get_max_buffer_size(_parser->input)))) {
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_TARGET_TOO_LONG;
		parser->parser.error = "HTTP request target is too long";
		return -1;
	}
	if (p == _parser->end)
		return 0;
	parser->request_target = p_strdup_until(_parser->msg.pool, _parser->cur, p);
	parser->parser.cur = p;
	return 1;
}

static inline const char *_chr_sanitize(unsigned char c)
{
	if (c >= 0x20 && c < 0x7F)
		return t_strdup_printf("`%c'", c);
	if (c == 0x0a)
		return "<LF>";
	if (c == 0x0d)
		return "<CR>";
	return t_strdup_printf("<0x%02x>", c);
}

static int http_request_parse(struct http_request_parser *parser,
			      pool_t pool)
{
	struct http_message_parser *_parser = &parser->parser;
	int ret;

	/* RFC 7230, Section 3.1.1: Request Line

	   request-line  = method SP request-target SP HTTP-version CRLF
	   method        = token
	 */
	for (;;) {
		switch (parser->state) {
		case HTTP_REQUEST_PARSE_STATE_INIT:
			http_request_parser_restart(parser, pool);
			parser->state = HTTP_REQUEST_PARSE_STATE_SKIP_LINE;
			if (_parser->cur == _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_SKIP_LINE:
			if (*_parser->cur == '\r' || *_parser->cur == '\n') {
				if (parser->skipping_line) {
					/* second extra CRLF; not allowed */
					parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
					_parser->error = "Empty request line";
					return -1;
				}
				/* HTTP/1.0 client sent one extra CRLF after body.
				   ignore it. */
				parser->skipping_line = TRUE;
				parser->state = HTTP_REQUEST_PARSE_STATE_CR;
				break;
			}
			parser->state = HTTP_REQUEST_PARSE_STATE_METHOD;
			parser->skipping_line = FALSE;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_METHOD:
			if ((ret=http_request_parse_method(parser)) <= 0)
				return ret;
			parser->state = HTTP_REQUEST_PARSE_STATE_SP1;
			if (_parser->cur == _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_SP1:
			if (*_parser->cur != ' ') {
				parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
				_parser->error = t_strdup_printf
					("Unexpected character %s in request method",
						_chr_sanitize(*_parser->cur));
				return -1;
			}
			_parser->cur++;
			parser->state = HTTP_REQUEST_PARSE_STATE_TARGET;
			if (_parser->cur >= _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_TARGET:
			if ((ret=http_request_parse_target(parser)) <= 0)
				return ret;
			parser->state = HTTP_REQUEST_PARSE_STATE_SP2;
			if (_parser->cur == _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_SP2:
			if (*_parser->cur != ' ') {
				parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
				_parser->error = t_strdup_printf
					("Unexpected character %s in request target",
						_chr_sanitize(*_parser->cur));
				return -1;
			}
			_parser->cur++;
			parser->state = HTTP_REQUEST_PARSE_STATE_VERSION;
			if (_parser->cur >= _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_VERSION:
			if ((ret=http_message_parse_version(&parser->parser)) <= 0) {
				if (ret < 0) {
					parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
					_parser->error = "Invalid HTTP version in request";
				}
				return ret;
			}
			parser->state = HTTP_REQUEST_PARSE_STATE_CR;
			if (_parser->cur == _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_CR:
			if (*_parser->cur == '\r')
				_parser->cur++;
			parser->state = HTTP_REQUEST_PARSE_STATE_LF;
			if (_parser->cur == _parser->end)
				return 0;
			/* fall through */
		case HTTP_REQUEST_PARSE_STATE_LF:
			if (*_parser->cur != '\n') {
				parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
				_parser->error = t_strdup_printf
					("Unexpected character %s at end of request line",
						_chr_sanitize(*_parser->cur));
				return -1;
			}
			_parser->cur++;
			if (!parser->skipping_line) {
				parser->state = HTTP_REQUEST_PARSE_STATE_HEADER;
				return 1;
			}
			parser->state = HTTP_REQUEST_PARSE_STATE_INIT;
			break;
 		case HTTP_REQUEST_PARSE_STATE_HEADER:
 		default:
 			i_unreached();
		}
	}

	i_unreached();
	return -1;
}

static int http_request_parse_request_line(struct http_request_parser *parser,
					   pool_t pool)
{
	struct http_message_parser *_parser = &parser->parser;
	const unsigned char *begin;
	size_t size, old_bytes = 0;
	int ret;

	while ((ret = i_stream_read_data(_parser->input, &begin, &size,
					 old_bytes)) > 0) {
		_parser->cur = begin;
		_parser->end = _parser->cur + size;

		if ((ret = http_request_parse(parser, pool)) < 0)
			return -1;

		i_stream_skip(_parser->input, _parser->cur - begin);
		if (ret > 0)
			return 1;
		old_bytes = i_stream_get_data_size(_parser->input);
	}

	if (ret == -2) {
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
		_parser->error = "HTTP request line is too long";
		return -1;
	}
	if (ret < 0) {
		if (_parser->input->eof &&
	    parser->state == HTTP_REQUEST_PARSE_STATE_INIT)
			return 0;
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_BROKEN_STREAM;
		_parser->error = "Broken stream";
		return -1;
	}
	return 0;
}

static inline enum http_request_parse_error
http_request_parser_message_error(struct http_request_parser *parser)
{
	switch (parser->parser.error_code) {
	case HTTP_MESSAGE_PARSE_ERROR_BROKEN_STREAM:
		return HTTP_REQUEST_PARSE_ERROR_BROKEN_STREAM;
	case HTTP_MESSAGE_PARSE_ERROR_BAD_MESSAGE:
		return HTTP_REQUEST_PARSE_ERROR_BAD_REQUEST;
	case HTTP_MESSAGE_PARSE_ERROR_NOT_IMPLEMENTED:
		return HTTP_REQUEST_PARSE_ERROR_NOT_IMPLEMENTED;
	case HTTP_MESSAGE_PARSE_ERROR_PAYLOAD_TOO_LARGE:
		return HTTP_REQUEST_PARSE_ERROR_PAYLOAD_TOO_LARGE;
	case HTTP_MESSAGE_PARSE_ERROR_BROKEN_MESSAGE:
		return HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
	default:
		break;
	}
	i_unreached();
	return HTTP_REQUEST_PARSE_ERROR_BROKEN_REQUEST;
}

bool http_request_parser_pending_payload(
	struct http_request_parser *parser)
{
	if (parser->parser.payload == NULL)
		return FALSE;
	return i_stream_have_bytes_left(parser->parser.payload);
}

static int
http_request_parse_expect_header(struct http_request_parser *parser,
	struct http_request *request, const struct http_header_field *hdr)
{
	struct http_message_parser *_parser = &parser->parser;
	struct http_parser hparser;
	bool parse_error = FALSE;
	unsigned int num_expectations = 0;

	/* RFC 7231, Section 5.1.1:

	   Expect  = "100-continue"
	 */
	// FIXME: simplify; RFC 7231 discarded Expect extension mechanism
	http_parser_init(&hparser, (const unsigned char *)hdr->value, hdr->size);
	while (!parse_error) {
		const char *expect_name, *expect_value;

		/* expect-name */
		if (http_parse_token(&hparser, &expect_name) > 0) {
			num_expectations++;
			if (strcasecmp(expect_name, "100-continue") == 0) {
				request->expect_100_continue = TRUE;
			} else {
				if (parser->error_code == HTTP_REQUEST_PARSE_ERROR_NONE) {
					parser->error_code = HTTP_REQUEST_PARSE_ERROR_EXPECTATION_FAILED;
					_parser->error = t_strdup_printf
						("Unknown Expectation `%s'", expect_name);
				}
			}

			/* BWS "=" BWS */
			http_parse_ows(&hparser);
			if (hparser.cur >= hparser.end)
				break;
			
			if (*hparser.cur == '=') {
				hparser.cur++;
				http_parse_ows(&hparser);

				/* value */
				if (http_parse_token_or_qstring(&hparser, &expect_value) <= 0) {
					parse_error = TRUE;
					break;
				}
		
				if (parser->error_code == HTTP_REQUEST_PARSE_ERROR_NONE) {
					parser->error_code = HTTP_REQUEST_PARSE_ERROR_EXPECTATION_FAILED;
					_parser->error = t_strdup_printf
						("Expectation `%s' has unexpected value", expect_name);
				}
			}

			/* *( OWS ";" [ OWS expect-param ] ) */
			while (!parse_error) {
				const char *attribute, *value;

				/* OWS ";" */
				http_parse_ows(&hparser);
				if (hparser.cur >= hparser.end || *hparser.cur != ';')
					break;
				hparser.cur++;
				http_parse_ows(&hparser);

				/* expect-param */
				if (http_parse_token(&hparser, &attribute) <= 0) {
					parse_error = TRUE;
					break;
				}

				/* BWS "=" BWS */
				http_parse_ows(&hparser);
				if (hparser.cur >= hparser.end || *hparser.cur != '=') {
					parse_error = TRUE;
					break;
				}
				hparser.cur++;
				http_parse_ows(&hparser);

				/* value */
				if (http_parse_token_or_qstring(&hparser, &value) <= 0) {
					parse_error = TRUE;
					break;
				}

				if (parser->error_code == HTTP_REQUEST_PARSE_ERROR_NONE) {
					parser->error_code = HTTP_REQUEST_PARSE_ERROR_EXPECTATION_FAILED;
					_parser->error = t_strdup_printf
						("Expectation `%s' has unknown parameter `'%s'",
							expect_name, attribute);
				}
			}
			if (parse_error)
				break;		
		}
		http_parse_ows(&hparser);
		if (hparser.cur >= hparser.end || *hparser.cur != ',')
			break;
		hparser.cur++;
		http_parse_ows(&hparser);
	}

	if (parse_error || hparser.cur < hparser.end) {
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_BAD_REQUEST;
		_parser->error = "Invalid Expect header";
		return -1;
	}

	if (parser->error_code != HTTP_REQUEST_PARSE_ERROR_NONE)
		return -1;

	if (num_expectations == 0) {
		parser->error_code = HTTP_REQUEST_PARSE_ERROR_BAD_REQUEST;
		_parser->error = "Empty Expect header";
		return -1;
	}
	return 0;
}

static int
http_request_parse_headers(struct http_request_parser *parser,
	struct http_request *request)
{
	const ARRAY_TYPE(http_header_field) *hdrs;
	const struct http_header_field *hdr;
	
	hdrs = http_header_get_fields(parser->parser.msg.header);
	array_foreach(hdrs, hdr) {
		int ret = 0;

		/* Expect: */
		if (http_header_field_is(hdr, "Expect"))
			ret = http_request_parse_expect_header(parser, request, hdr);

		if (ret < 0)
			return -1;
	}
	return 0;
}

int http_request_parse_finish_payload(
	struct http_request_parser *parser,
	enum http_request_parse_error *error_code_r,
	const char **error_r)
{
	int ret;

	*error_code_r = parser->error_code = HTTP_REQUEST_PARSE_ERROR_NONE;
	*error_r = parser->parser.error = NULL;

	/* make sure we finished streaming payload from previous request
	   before we continue. */
	if ((ret = http_message_parse_finish_payload(&parser->parser)) <= 0) {
		if (ret < 0) {
			*error_code_r = http_request_parser_message_error(parser);
			*error_r = parser->parser.error;
		}
	}
	return ret;
}

int http_request_parse_next(struct http_request_parser *parser,
			    pool_t pool, struct http_request *request,
			    enum http_request_parse_error *error_code_r, const char **error_r)
{
	const struct http_header_field *hdr;
	const char *error;
	int ret;

	/* initialize and get rid of any payload of previous request */
	if ((ret=http_request_parse_finish_payload
		(parser, error_code_r, error_r)) <= 0)
		return ret;

	/* RFC 7230, Section 3:
		
	   HTTP-message   = start-line
	                   *( header-field CRLF )
	                    CRLF
	                    [ message-body ]
	 */
	if (parser->state != HTTP_REQUEST_PARSE_STATE_HEADER) {
		ret = http_request_parse_request_line(parser, pool);

		/* assign early for error reporting */
		request->method = parser->request_method;
		request->target_raw = parser->request_target; 
		request->version_major = parser->parser.msg.version_major;
		request->version_minor = parser->parser.msg.version_minor;

		if (ret <= 0) {
			if (ret < 0) {
				*error_code_r = parser->error_code;
				*error_r = parser->parser.error;
			}
			return ret;
		}
	}

	if ((ret = http_message_parse_headers(&parser->parser)) <= 0) {
		if (ret < 0) {
			*error_code_r = http_request_parser_message_error(parser);
			*error_r = parser->parser.error;
		}
		return ret;
	}

	if (http_message_parse_body(&parser->parser, TRUE) < 0) {
		*error_code_r = http_request_parser_message_error(parser);
		*error_r = parser->parser.error;
		return -1;
	}
	parser->state = HTTP_REQUEST_PARSE_STATE_INIT;

	/* RFC 7230, Section 5.4: Host

	   A server MUST respond with a 400 (Bad Request) status code to any
	   HTTP/1.1 request message that lacks a Host header field and to any
	   request message that contains more than one Host header field or a
	   Host header field with an invalid field-value.
	 */
	if ((ret=http_header_field_find_unique
		(parser->parser.msg.header, "Host", &hdr)) <= 0) {
		*error_code_r = HTTP_REQUEST_PARSE_ERROR_BAD_REQUEST;
		if (ret == 0)
			*error_r = "Missing Host header";
		else
			*error_r = "Duplicate Host header";
		return -1;
	}

	memset(request, 0, sizeof(*request));

	if (http_url_request_target_parse(parser->request_target, hdr->value,
		parser->parser.msg.pool, &request->target, &error) < 0) {
		*error_code_r = HTTP_REQUEST_PARSE_ERROR_BAD_REQUEST;
		*error_r = t_strdup_printf("Bad request target `%s': %s",
			parser->request_target, error);
		return -1;
	}

	/* parse request-specific headers */
	if (http_request_parse_headers(parser, request) < 0) {
		*error_code_r = parser->error_code;
		*error_r = parser->parser.error;
		return -1;
	}

	request->method = parser->request_method;
	request->target_raw = parser->request_target;
	request->version_major = parser->parser.msg.version_major;
	request->version_minor = parser->parser.msg.version_minor;
	request->date = parser->parser.msg.date;
	request->payload = parser->parser.payload;
	request->header = parser->parser.msg.header;
	request->connection_options = parser->parser.msg.connection_options;
	request->connection_close = parser->parser.msg.connection_close;

	/* reset this state early */
	parser->request_method = NULL;
	parser->request_target = NULL;
	return 1;
}