changeset 3409:4748506a4095 HEAD

Removed message-tokenizer code. All code using it was finally replaced with rfc822-parser API.
author Timo Sirainen <tss@iki.fi>
date Sun, 05 Jun 2005 23:39:53 +0300
parents 9bdf82fd33e1
children a9b623236dd3
files src/lib-mail/Makefile.am src/lib-mail/message-tokenize.c src/lib-mail/message-tokenize.h
diffstat 3 files changed, 0 insertions(+), 423 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/Makefile.am	Sun Jun 05 23:37:49 2005 +0300
+++ b/src/lib-mail/Makefile.am	Sun Jun 05 23:39:53 2005 +0300
@@ -16,7 +16,6 @@
 	message-part-serialize.c \
 	message-send.c \
 	message-size.c \
-	message-tokenize.c \
 	quoted-printable.c \
 	rfc822-parser.c
 
@@ -33,6 +32,5 @@
 	message-part-serialize.h \
 	message-send.h \
 	message-size.h \
-	message-tokenize.h \
 	quoted-printable.h \
 	rfc822-parser.h
--- a/src/lib-mail/message-tokenize.c	Sun Jun 05 23:37:49 2005 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,350 +0,0 @@
-/* Copyright (C) 2002 Timo Sirainen */
-
-#include "lib.h"
-#include "str.h"
-#include "strescape.h"
-#include "message-tokenize.h"
-
-struct message_tokenizer {
-	const unsigned char *data;
-	size_t size;
-
-	message_tokenize_error_callback_t *error_cb;
-	void *error_context;
-
-	int token;
-	size_t token_pos, token_len;
-	size_t parse_pos;
-
-	unsigned int skip_comments:1;
-	unsigned int dot_token:1;
-
-	unsigned int in_bracket:1;
-};
-
-#define PARSE_ERROR() \
-	STMT_START { \
-	if (tok->error_cb != NULL && \
-	    !tok->error_cb(data, i, '\0', tok->error_context)) { \
-		tok->token = TOKEN_LAST; \
-		return TOKEN_LAST; \
-	} \
-	} STMT_END
-
-#define PARSE_ERROR_MISSING(c) \
-	STMT_START { \
-	if (tok->error_cb != NULL && \
-	    !tok->error_cb(data, i, c, tok->error_context)) { \
-		tok->token = TOKEN_LAST; \
-		return TOKEN_LAST; \
-	} \
-	} STMT_END
-
-
-struct message_tokenizer *
-message_tokenize_init(const unsigned char *data, size_t size,
-		      message_tokenize_error_callback_t *error_cb,
-		      void *error_context)
-{
-	struct message_tokenizer *tok;
-
-	tok = i_new(struct message_tokenizer, 1);
-	tok->data = data;
-	tok->size = size;
-
-	tok->error_cb = error_cb;
-	tok->error_context = error_context;
-
-	tok->skip_comments = TRUE;
-	tok->dot_token = TRUE;
-
-	tok->token = -1;
-	return tok;
-}
-
-void message_tokenize_deinit(struct message_tokenizer *tok)
-{
-	i_free(tok);
-}
-
-void message_tokenize_skip_comments(struct message_tokenizer *tok, int set)
-{
-	tok->skip_comments = set;
-}
-
-void message_tokenize_dot_token(struct message_tokenizer *tok, int set)
-{
-	tok->dot_token = set;
-}
-
-enum message_token message_tokenize_next(struct message_tokenizer *tok)
-{
-	int token, level, last_atom;
-	const unsigned char *data;
-	size_t i, size;
-
-	if (tok->token == TOKEN_LAST)
-		return TOKEN_LAST;
-
-	data = tok->data;
-	size = tok->size;
-
-	tok->token = TOKEN_LAST;
-
-	last_atom = FALSE;
-	for (i = tok->parse_pos; i < size && data[i] != '\0'; i++) {
-		token = -1;
-		switch (data[i]) {
-		case ' ':
-		case '\t':
-		case '\r':
-		case '\n':
-			/* skip whitespace */
-			break;
-
-		case '(':
-			/* (comment) - nesting is allowed */
-			if (last_atom)
-				break;
-
-			token = '(';
-			tok->token_pos = ++i;
-
-			level = 1;
-			for (; i < size && data[i] != '\0'; i++) {
-				if (data[i] == '\\' &&
-				    i+1 < size && data[i+1] != '\0')
-					i++;
-				else if (data[i] == '(')
-					level++;
-				else if (data[i] == ')') {
-					if (--level == 0)
-						break;
-				}
-			}
-
-			if (level > 0)
-				PARSE_ERROR_MISSING(')');
-
-			tok->token_len = (size_t) (i - tok->token_pos);
-			break;
-
-		case '[':
-			/* domain literal - nesting isn't allowed */
-			if (last_atom)
-				break;
-
-			token = '[';
-			tok->token_pos = ++i;
-
-			while (i < size && data[i] != '\0' && data[i] != ']') {
-				if (data[i] == '\\' &&
-				    i+1 < size && data[i+1] != '\0')
-					i++;
-				else if (data[i] == '[') {
-					/* nesting not allowed, but
-					   continue anyway */
-					PARSE_ERROR();
-				}
-
-				i++;
-			}
-
-			if (i == size || data[i] == '\0')
-				PARSE_ERROR_MISSING(']');
-
-			tok->token_len = (size_t) (i - tok->token_pos);
-			break;
-
-		case '"':
-			/* quoted string */
-			if (last_atom)
-				break;
-
-			token = '"';
-			tok->token_pos = ++i;
-
-			while (i < size && data[i] != '\0' && data[i] != '"') {
-				if (data[i] == '\\' &&
-				    i+1 < size && data[i+1] != '\0')
-					i++;
-				i++;
-			}
-
-			if (i == size || data[i] == '\0')
-				PARSE_ERROR_MISSING('"');
-
-			tok->token_len = (size_t) (i - tok->token_pos);
-			break;
-
-		case '<':
-			if (last_atom)
-				break;
-
-			if (tok->in_bracket) {
-				/* '<' cannot be nested */
-				PARSE_ERROR();
-			}
-
-			token = '<';
-			tok->in_bracket = TRUE;
-			break;
-		case '>':
-			if (last_atom)
-				break;
-
-			if (!tok->in_bracket) {
-				/* missing '<' */
-                                PARSE_ERROR();
-			}
-
-			token = '>';
-			tok->in_bracket = FALSE;
-			break;
-
-		case ')':
-		case ']':
-		case '\\':
-			PARSE_ERROR();
-			/* fall through */
-
-		/* RFC822 specials: */
-		case '@':
-		case ',':
-		case ';':
-		case ':':
-		case '.':
-		/* RFC 2045 specials: */
-		case '/':
-		case '?':
-		case '=':
-			token = tok->data[i];
-			if (token != '.' || tok->dot_token)
-				break;
-			/* fall through */
-		default:
-			/* atom */
-			token = 'A';
-			if (!last_atom) {
-				tok->token = token;
-				tok->token_pos = i;
-				last_atom = TRUE;
-			}
-			break;
-		}
-
-		if (last_atom) {
-			if (token != 'A') {
-				/* end of atom */
-				tok->token_len = (size_t) (i - tok->token_pos);
-				last_atom = FALSE;
-				break;
-			}
-		} else {
-			if (token != -1) {
-				tok->token = token;
-				if (i < tok->size && data[i] != '\0')
-					i++;
-				break;
-			}
-		}
-
-		if (i == tok->size || data[i] == '\0') {
-			/* unexpected eol */
-			break;
-		}
-	}
-
-	if (last_atom) {
-		/* end of atom */
-		tok->token_len = (size_t) (i - tok->token_pos);
-	}
-
-	tok->parse_pos = i;
-
-	if (tok->token == TOKEN_LAST && tok->in_bracket &&
-	    tok->error_cb != NULL) {
-		if (tok->error_cb(data, i, '>', tok->error_context))
-			tok->token = TOKEN_LAST;
-	}
-
-	return tok->token;
-}
-
-enum message_token message_tokenize_get(const struct message_tokenizer *tok)
-{
-	return tok->token;
-}
-
-size_t message_tokenize_get_parse_position(const struct message_tokenizer *tok)
-{
-	return tok->parse_pos;
-}
-
-const unsigned char *
-message_tokenize_get_value(const struct message_tokenizer *tok, size_t *len)
-{
-	i_assert(IS_TOKEN_STRING(tok->token));
-
-	*len = tok->token_len;
-	return tok->data + tok->token_pos;
-}
-
-void message_tokenize_get_string(struct message_tokenizer *tok,
-				 string_t *str, string_t *comments,
-				 const enum message_token *stop_tokens)
-{
-	enum message_token token;
-	const unsigned char *value;
-	size_t len;
-	int i, token_str, last_str;
-
-	last_str = FALSE;
-	while ((token = message_tokenize_next(tok)) != TOKEN_LAST) {
-		for (i = 0; stop_tokens[i] != TOKEN_LAST; i++)
-			if (token == stop_tokens[i])
-				return;
-
-		if (token == TOKEN_COMMENT) {
-			/* handle comment specially */
-			if (comments != NULL) {
-				if (str_len(comments) > 0)
-					str_append_c(comments, ' ');
-
-				value = message_tokenize_get_value(tok, &len);
-				str_append_unescaped(comments, value, len);
-			}
-			continue;
-		}
-
-		token_str = token == TOKEN_ATOM || token == TOKEN_QSTRING ||
-			token == TOKEN_DLITERAL || token == TOKEN_COMMENT;
-
-		if (!token_str)
-			str_append_c(str, token);
-		else if (token == TOKEN_QSTRING) {
-			/* unescape only quoted strings, since we're removing
-			   the quotes. for domain literals I don't see much
-			   point in unescaping if [] is still kept.. */
-			if (last_str)
-				str_append_c(str, ' ');
-
-			value = message_tokenize_get_value(tok, &len);
-			str_append_unescaped(str, value, len);
-		} else {
-			if (last_str)
-				str_append_c(str, ' ');
-
-			if (token == TOKEN_DLITERAL)
-				str_append_c(str, '[');
-
-			value = message_tokenize_get_value(tok, &len);
-			str_append_n(str, value, len);
-
-			if (token == TOKEN_DLITERAL)
-				str_append_c(str, ']');
-		}
-
-		last_str = token_str;
-	}
-}
--- a/src/lib-mail/message-tokenize.h	Sun Jun 05 23:37:49 2005 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#ifndef __MESSAGE_TOKENIZE_H
-#define __MESSAGE_TOKENIZE_H
-
-#define IS_TOKEN_STRING(token) \
-	((token) == TOKEN_ATOM || (token) == TOKEN_QSTRING || \
-	 (token) == TOKEN_COMMENT || (token) == TOKEN_DLITERAL)
-
-enum message_token {
-	TOKEN_ATOM	= 'A',
-	TOKEN_QSTRING	= '"',
-	TOKEN_COMMENT	= '(',
-	TOKEN_DLITERAL	= '[',
-
-	/* RFC822 specials:
-
-	   '<', '>', '@', ',', ';', ':', '\'
-	   '.' (not included in RFC2045 -> optional)
-
-	   RFC2045 tspecials:
-
-	   '/', '?', '=' */
-
-	TOKEN_LAST	= 0
-};
-
-struct message_tokenizer;
-
-/* Parsing is aborted if returns FALSE. There's two kinds of errors:
-
-   missing_char == '\0': unexpected character at str[pos]
-   missing_char != '\0': missing character */
-typedef int message_tokenize_error_callback_t(const unsigned char *str,
-					      size_t pos, char missing_char,
-					      void *context);
-
-/* Initialize message tokenizer. data is parsed until \0 is found, or size
-   bytes has been parsed, so it's possible to give (size_t)-1 as size
-   if the string is \0 terminated. */
-struct message_tokenizer *
-message_tokenize_init(const unsigned char *data, size_t size,
-		      message_tokenize_error_callback_t *error_cb,
-		      void *error_context);
-void message_tokenize_deinit(struct message_tokenizer *tok);
-
-/* Specify whether comments should be silently skipped (default yes). */
-void message_tokenize_skip_comments(struct message_tokenizer *tok, int set);
-/* Specify whether '.' should be treated as a separate token (default yes). */
-void message_tokenize_dot_token(struct message_tokenizer *tok, int set);
-
-/* Parse the next token and return it. */
-enum message_token message_tokenize_next(struct message_tokenizer *tok);
-
-/* Return the current token. */
-enum message_token message_tokenize_get(const struct message_tokenizer *tok);
-
-/* Return position in string where we're currently parsing. */
-size_t message_tokenize_get_parse_position(const struct message_tokenizer *tok);
-
-/* - not including enclosing "", () or []
-   - '\' isn't expanded
-   - [CR+]LF+LWSP (continued header) isn't removed */
-const unsigned char *
-message_tokenize_get_value(const struct message_tokenizer *tok, size_t *len);
-
-/* Read tokens as a string, all quoted strings will be unquoted.
-   Reads until stop_token is found. */
-void message_tokenize_get_string(struct message_tokenizer *tok,
-				 string_t *str, string_t *comments,
-				 const enum message_token *stop_tokens);
-
-#endif