view src/lib-imap/imap-parser.c @ 896:21ffcce83c70 HEAD

Rewrote rfc822-tokenize.c to work one token at a time so it won't uselessly take memory, maybe also a bit faster. This caused pretty large changes all around. Also moved all string (un)escaping code to lib/strescape.c.
author Timo Sirainen <tss@iki.fi>
date Fri, 03 Jan 2003 17:57:12 +0200
parents 7935347f54f1
children 0d5be52d7131
line wrap: on
line source

/* Copyright (C) 2002 Timo Sirainen */

#include "lib.h"
#include "istream.h"
#include "ostream.h"
#include "strescape.h"
#include "imap-parser.h"

#define is_linebreak(c) \
	((c) == '\r' || (c) == '\n')

#define LIST_ALLOC_SIZE 7

typedef enum {
	ARG_PARSE_NONE = 0,
	ARG_PARSE_ATOM,
	ARG_PARSE_STRING,
	ARG_PARSE_LITERAL,
	ARG_PARSE_LITERAL_DATA
} ArgParseType;

struct _ImapParser {
	/* permanent */
	Pool pool;
	IStream *input;
	OStream *output;
	size_t max_literal_size, max_elements;
        ImapParserFlags flags;

	/* reset by imap_parser_reset(): */
        ImapArgList *root_list;
        ImapArgList *cur_list;
	ImapArg *list_arg;
	size_t element_count;

	ArgParseType cur_type;
	size_t cur_pos; /* parser position in input buffer */

	int str_first_escape; /* ARG_PARSE_STRING: index to first '\' */
	uoff_t literal_size; /* ARG_PARSE_LITERAL: string size */

	const char *error;

	unsigned int literal_skip_crlf:1;
	unsigned int inside_bracket:1;
	unsigned int eol:1;
};

#define LIST_REALLOC(parser, old_list, size) \
	p_realloc((parser)->pool, old_list, \
		  sizeof(ImapArgList) + sizeof(ImapArg) * ((size)-1))

static void imap_args_realloc(ImapParser *parser, size_t size)
{
	parser->cur_list = LIST_REALLOC(parser, parser->cur_list, size);
	parser->cur_list->alloc = size;

	if (parser->list_arg == NULL)
		parser->root_list = parser->cur_list;
	else
		parser->list_arg->_data.list = parser->cur_list;
}

ImapParser *imap_parser_create(IStream *input, OStream *output,
			       size_t max_literal_size, size_t max_elements)
{
	ImapParser *parser;

	parser = i_new(ImapParser, 1);
        parser->pool = pool_alloconly_create("IMAP parser", 8192);
	parser->input = input;
	parser->output = output;
	parser->max_literal_size = max_literal_size;
	parser->max_elements = max_elements;

	imap_args_realloc(parser, LIST_ALLOC_SIZE);
	return parser;
}

void imap_parser_destroy(ImapParser *parser)
{
	pool_unref(parser->pool);
	i_free(parser);
}

void imap_parser_reset(ImapParser *parser)
{
	p_clear(parser->pool);

	parser->root_list = NULL;
	parser->cur_list = NULL;
	parser->list_arg = NULL;
	parser->element_count = 0;

	parser->cur_type = ARG_PARSE_NONE;
	parser->cur_pos = 0;

	parser->str_first_escape = 0;
	parser->literal_size = 0;

	parser->error = NULL;

	parser->literal_skip_crlf = FALSE;
	parser->inside_bracket = FALSE;
	parser->eol = FALSE;

	imap_args_realloc(parser, LIST_ALLOC_SIZE);
}

const char *imap_parser_get_error(ImapParser *parser)
{
	return parser->error;
}

/* skip over everything parsed so far, plus the following whitespace */
static int imap_parser_skip_to_next(ImapParser *parser, const char **data,
				    size_t *data_size)
{
	size_t i;

	for (i = parser->cur_pos; i < *data_size; i++) {
		if ((*data)[i] != ' ')
			break;
	}

        i_stream_skip(parser->input, i);
	parser->cur_pos = 0;

	*data += i;
	*data_size -= i;
	return *data_size > 0;
}

static ImapArg *imap_arg_create(ImapParser *parser)
{
	ImapArg *arg;

	i_assert(parser->cur_list != NULL);

	/* @UNSAFE */
	if (parser->cur_list->size == parser->cur_list->alloc)
		imap_args_realloc(parser, parser->cur_list->alloc * 2);

	arg = &parser->cur_list->args[parser->cur_list->size];
	arg->parent = parser->list_arg;
	parser->cur_list->size++;
	parser->element_count++;

	return arg;
}

static void imap_parser_open_list(ImapParser *parser)
{
	parser->list_arg = imap_arg_create(parser);

	parser->cur_list = NULL;
	imap_args_realloc(parser, LIST_ALLOC_SIZE);

	parser->list_arg->type = IMAP_ARG_LIST;
	parser->list_arg->_data.list = parser->cur_list;

	parser->cur_type = ARG_PARSE_NONE;
}

static int imap_parser_close_list(ImapParser *parser)
{
	ImapArg *arg;

	if (parser->list_arg == NULL) {
		/* we're not inside list */
		parser->error = "Unexpected ')'";
		return FALSE;
	}

	arg = imap_arg_create(parser);
	arg->type = IMAP_ARG_EOL;
	parser->cur_list->size--; /* EOL doesn't belong to argument count */

	parser->list_arg = parser->list_arg->parent;
	if (parser->list_arg == NULL) {
		parser->cur_list = parser->root_list;
	} else {
		parser->cur_list = parser->list_arg->_data.list;
	}

	parser->cur_type = ARG_PARSE_NONE;
	return TRUE;
}

static void imap_parser_save_arg(ImapParser *parser, const char *data,
				 size_t lastpos)
{
	ImapArg *arg;

	arg = imap_arg_create(parser);

	switch (parser->cur_type) {
	case ARG_PARSE_ATOM:
		if (lastpos == 3 && strncmp(data, "NIL", 3) == 0) {
			/* NIL argument */
			arg->type = IMAP_ARG_NIL;
		} else {
			/* simply save the string */
			arg->type = IMAP_ARG_ATOM;
			arg->_data.str = p_strndup(parser->pool, data, lastpos);
		}
		break;
	case ARG_PARSE_STRING:
		/* data is quoted and may contain escapes. */
		i_assert(lastpos > 0);

		arg->type = IMAP_ARG_STRING;
		arg->_data.str = p_strndup(parser->pool, data+1, lastpos-1);

		/* remove the escapes */
		if (parser->str_first_escape >= 0 &&
		    (parser->flags & IMAP_PARSE_FLAG_NO_UNESCAPE) == 0) {
			/* -1 because we skipped the '"' prefix */
			str_unescape(arg->_data.str +
				     parser->str_first_escape-1);
		}
		break;
	case ARG_PARSE_LITERAL_DATA:
		if ((parser->flags & IMAP_PARSE_FLAG_LITERAL_SIZE) == 0) {
			/* simply save the string */
			arg->type = IMAP_ARG_STRING;
			arg->_data.str = p_strndup(parser->pool, data, lastpos);
		} else {
			/* save literal size */
			arg->type = IMAP_ARG_LITERAL_SIZE;
			arg->_data.literal_size = parser->literal_size;
		}
		break;
	default:
                i_unreached();
	}

	parser->cur_type = ARG_PARSE_NONE;
}

static int imap_parser_read_atom(ImapParser *parser, const char *data,
				 size_t data_size)
{
	size_t i;

	/* read until we've found space, CR or LF. Data inside '[' and ']'
	   characters are an exception though, allow spaces inside them. */
	for (i = parser->cur_pos; i < data_size; i++) {
		if (parser->inside_bracket) {
			if (data[i] == '[') {
				/* nested '[' characters not allowed
				   (too much trouble and imap doesn't need) */
				parser->error = "Unexpected '['";
			}
			if (is_linebreak(data[i])) {
				/* missing ']' character */
				parser->error = "Missing ']'";
				return FALSE;
			}

			if (data[i] == ']')
				parser->inside_bracket = FALSE;
		} else {
			if (data[i] == '[')
				parser->inside_bracket = TRUE;
			else if (data[i] == ' ' || data[i] == ')' ||
				 is_linebreak(data[i])) {
				imap_parser_save_arg(parser, data, i);
				break;
			}
		}
	}

	parser->cur_pos = i;
	return parser->cur_type == ARG_PARSE_NONE;
}

static int imap_parser_read_string(ImapParser *parser, const char *data,
				   size_t data_size)
{
	size_t i;

	/* read until we've found non-escaped ", CR or LF */
	for (i = parser->cur_pos; i < data_size; i++) {
		if (data[i] == '"') {
			imap_parser_save_arg(parser, data, i);

			i++; /* skip the trailing '"' too */
			break;
		}

		if (data[i] == '\\') {
			if (i+1 == data_size) {
				/* known data ends with '\' - leave it to
				   next time as well if it happens to be \" */
				break;
			}

			/* save the first escaped char */
			if (parser->str_first_escape < 0)
				parser->str_first_escape = i;

			/* skip the escaped char */
			i++;
		}

		/* check linebreaks here, so escaping CR/LF isn't possible.
		   string always ends with '"', so it's an error if we found
		   a linebreak.. */
		if (is_linebreak(data[i])) {
			parser->error = "Missing '\"'";
			return FALSE;
		}
	}

	parser->cur_pos = i;
	return parser->cur_type == ARG_PARSE_NONE;
}

static int imap_parser_literal_end(ImapParser *parser)
{
	if ((parser->flags & IMAP_PARSE_FLAG_LITERAL_SIZE) == 0) {
		if (parser->literal_size > parser->max_literal_size) {
			/* too long string, abort. */
			parser->error = "Literal size too large";
			return FALSE;
		}

		if (parser->output != NULL) {
			o_stream_send(parser->output, "+ OK\r\n", 6);
			o_stream_flush(parser->output);
		}
	}

	parser->cur_type = ARG_PARSE_LITERAL_DATA;
	parser->literal_skip_crlf = TRUE;

	parser->cur_pos = 0;
	return TRUE;
}

static int imap_parser_read_literal(ImapParser *parser, const char *data,
				    size_t data_size)
{
	size_t i, prev_size;

	/* expecting digits + "}" */
	for (i = parser->cur_pos; i < data_size; i++) {
		if (data[i] == '}') {
			i_stream_skip(parser->input, i+1);
			return imap_parser_literal_end(parser);
		}

		if (data[i] < '0' || data[i] > '9') {
			parser->error = "Invalid literal size";
			return FALSE;
		}

		prev_size = parser->literal_size;
		parser->literal_size = parser->literal_size*10 + (data[i]-'0');

		if (parser->literal_size < prev_size) {
			/* wrapped around, abort. */
			parser->error = "Literal size too large";
			return FALSE;
		}
	}

	parser->cur_pos = i;
	return FALSE;
}

static int imap_parser_read_literal_data(ImapParser *parser, const char *data,
					 size_t data_size)
{
	if (parser->literal_skip_crlf) {
		/* skip \r\n or \n, anything else gives an error */
		if (data_size == 0)
			return FALSE;

		if (*data == '\r') {
			data++; data_size--;
			i_stream_skip(parser->input, 1);

			if (data_size == 0)
				return FALSE;
		}

		if (*data != '\n') {
			parser->error = "Missing LF after literal size";
			return FALSE;
		}

		data++; data_size--;
		i_stream_skip(parser->input, 1);
		parser->literal_skip_crlf = FALSE;

		i_assert(parser->cur_pos == 0);
	}

	if ((parser->flags & IMAP_PARSE_FLAG_LITERAL_SIZE) == 0) {
		/* now we just wait until we've read enough data */
		if (data_size < parser->literal_size)
			return FALSE;
		else {
			imap_parser_save_arg(parser, data,
					     (size_t)parser->literal_size);
			parser->cur_pos = (size_t)parser->literal_size;
			return TRUE;
		}
	} else {
		/* we want to save only literal size, not the literal itself. */
		imap_parser_save_arg(parser, NULL, 0);
		return TRUE;
	}
}

/* Returns TRUE if argument was fully processed. Also returns TRUE if
   an argument inside a list was processed. */
static int imap_parser_read_arg(ImapParser *parser)
{
	const char *data;
	size_t data_size;

	data = (const char *) i_stream_get_data(parser->input, &data_size);
	if (data_size == 0)
		return FALSE;

	while (parser->cur_type == ARG_PARSE_NONE) {
		/* we haven't started parsing yet */
		if (!imap_parser_skip_to_next(parser, &data, &data_size))
			return FALSE;
		i_assert(parser->cur_pos == 0);

		switch (data[0]) {
		case '\r':
		case '\n':
			/* unexpected end of line */
			parser->eol = TRUE;
			return FALSE;
		case '"':
			parser->cur_type = ARG_PARSE_STRING;
			parser->str_first_escape = -1;
			break;
		case '{':
			parser->cur_type = ARG_PARSE_LITERAL;
			parser->literal_size = 0;
			break;
		case '(':
			imap_parser_open_list(parser);
			break;
		case ')':
			if (!imap_parser_close_list(parser))
				return FALSE;

			if (parser->list_arg == NULL) {
				/* end of argument */
				parser->cur_pos++;
				return TRUE;
			}
			break;
		default:
			parser->cur_type = ARG_PARSE_ATOM;
                        parser->inside_bracket = FALSE;
			break;
		}

		parser->cur_pos++;
	}

	i_assert(data_size > 0);

	switch (parser->cur_type) {
	case ARG_PARSE_ATOM:
		if (!imap_parser_read_atom(parser, data, data_size))
			return FALSE;
		break;
	case ARG_PARSE_STRING:
		if (!imap_parser_read_string(parser, data, data_size))
			return FALSE;
		break;
	case ARG_PARSE_LITERAL:
		if (!imap_parser_read_literal(parser, data, data_size))
			return FALSE;

		/* pass through to parsing data. since input->skip was
		   modified, we need to get the data start position again. */
		data = (const char *) i_stream_get_data(parser->input,
							&data_size);

		/* fall through */
	case ARG_PARSE_LITERAL_DATA:
		if (!imap_parser_read_literal_data(parser, data, data_size))
			return FALSE;
		break;
	default:
                i_unreached();
	}

	i_assert(parser->cur_type == ARG_PARSE_NONE);
	return TRUE;
}

#define IS_UNFINISHED(parser) \
        ((parser)->cur_type != ARG_PARSE_NONE || \
	 (parser)->cur_list != parser->root_list)

int imap_parser_read_args(ImapParser *parser, unsigned int count,
			  ImapParserFlags flags, ImapArg **args)
{
	parser->flags = flags;

	while (count == 0 || parser->root_list->size < count ||
	       IS_UNFINISHED(parser)) {
		if (!imap_parser_read_arg(parser))
			break;

		if (parser->element_count > parser->max_elements) {
			parser->error = "Too many argument elements";
			break;
		}
	}

	if (parser->error != NULL) {
		/* error, abort */
		*args = NULL;
		return -1;
	} else if ((!IS_UNFINISHED(parser) && count > 0 &&
		    parser->root_list->size >= count) || parser->eol) {
		/* all arguments read / end of line. ARG_PARSE_NONE checks
		   that last argument isn't only partially parsed. */
		if (count >= parser->root_list->alloc) {
			/* unused arguments must be NIL-filled. */
			parser->root_list->alloc = count+1;
			parser->root_list = LIST_REALLOC(parser,
							 parser->root_list,
							 count+1);
		}

		parser->root_list->args[parser->root_list->size].type =
			IMAP_ARG_EOL;

		*args = parser->root_list->args;
		return parser->root_list->size;
	} else {
		/* need more data */
		*args = NULL;
		return -2;
	}
}

const char *imap_parser_read_word(ImapParser *parser)
{
	const char *data;
	size_t i, data_size;

	data = (const char *) i_stream_get_data(parser->input, &data_size);

	for (i = 0; i < data_size; i++) {
		if (data[i] == ' ' || data[i] == '\r' || data[i] == '\n')
			break;
	}

	if (i < data_size) {
		i_stream_skip(parser->input, i + (data[i] == ' ' ? 1 : 0));
		return p_strndup(parser->pool, data, i);
	} else {
		return NULL;
	}
}

const char *imap_parser_read_line(ImapParser *parser)
{
	const char *data;
	size_t i, data_size;

	data = (const char *) i_stream_get_data(parser->input, &data_size);

	for (i = 0; i < data_size; i++) {
		if (data[i] == '\r' || data[i] == '\n')
			break;
	}

	if (i < data_size) {
		i_stream_skip(parser->input, i);
		return p_strndup(parser->pool, data, i);
	} else {
		return NULL;
	}
}

const char *imap_arg_string(ImapArg *arg)
{
	switch (arg->type) {
	case IMAP_ARG_NIL:
		return "";

	case IMAP_ARG_ATOM:
	case IMAP_ARG_STRING:
		return arg->_data.str;

	default:
		return NULL;
	}
}

char *_imap_arg_str_error(const ImapArg *arg)
{
	i_panic("Tried to access ImapArg type %d as string", arg->type);
	return NULL;
}

uoff_t _imap_arg_literal_size_error(const ImapArg *arg)
{
	i_panic("Tried to access ImapArg type %d as literal size", arg->type);
	return 0;
}

ImapArgList *_imap_arg_list_error(const ImapArg *arg)
{
	i_panic("Tried to access ImapArg type %d as list", arg->type);
	return NULL;
}