Mercurial > dovecot > original-hg > dovecot-1.2
changeset 896:21ffcce83c70 HEAD
Rewrote rfc822-tokenize.c to work one token at a time so it won't uselessly
take memory, maybe also a bit faster. This caused pretty large changes all
around.
Also moved all string (un)escaping code to lib/strescape.c.
line wrap: on
line diff
--- a/src/imap/cmd-list.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/imap/cmd-list.c Fri Jan 03 17:57:12 2003 +0200 @@ -1,6 +1,7 @@ /* Copyright (C) 2002 Timo Sirainen */ #include "common.h" +#include "strescape.h" #include "commands.h" #include "imap-match.h" @@ -48,7 +49,7 @@ /* escaping is done here to make sure we don't try to escape the separator char */ - name = imap_escape(t_strdup_until(name, path)); + name = str_escape(t_strdup_until(name, path)); /* find the node */ while (*node != NULL) {
--- a/src/lib-imap/imap-bodystructure.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-imap/imap-bodystructure.c Fri Jan 03 17:57:12 2003 +0200 @@ -33,92 +33,95 @@ static void part_write_bodystructure(MessagePart *part, String *str, int extended); -static void parse_content_type(const Rfc822Token *tokens, - int count, void *context) +static void parse_content_type(const char *value, size_t value_len, + void *context) { MessagePartBodyData *data = context; - const char *value; - int i; + size_t i; - /* find the content type separator */ - for (i = 0; i < count; i++) { - if (tokens[i].token == '/') + for (i = 0; i < value_len; i++) { + if (value[i] == '/') break; } - value = rfc822_tokens_get_value_quoted(tokens, i); - data->content_type = p_strdup(data->pool, value); + if (i == value_len) { + data->content_type = + imap_quote_value(data->pool, value, value_len); + } else { + data->content_type = + imap_quote_value(data->pool, value, i); - value = rfc822_tokens_get_value_quoted(tokens+i+1, count-i-1); - data->content_subtype = p_strdup(data->pool, value); + i++; + data->content_subtype = + imap_quote_value(data->pool, value+i, value_len-i); + } } -static void parse_save_params_list(const Rfc822Token *name, - const Rfc822Token *value, int value_count, +static void parse_save_params_list(const char *name, size_t name_len, + const char *value, size_t value_len, + int value_quoted __attr_unused__, void *context) { MessagePartBodyData *data = context; - const char *str; if (str_len(data->str) != 0) str_append_c(data->str, ' '); str_append_c(data->str, '"'); - str_append_n(data->str, name->ptr, name->len); + str_append_n(data->str, name, name_len); str_append(data->str, "\" "); - str = rfc822_tokens_get_value_quoted(value, value_count); - str_append(data->str, str); + str_append_c(data->str, '"'); + str_append_n(data->str, value, value_len); + str_append_c(data->str, '"'); } -static void parse_content_transfer_encoding(const Rfc822Token *tokens, - int count, void *context) +static void parse_content_transfer_encoding(const char *value, size_t value_len, + void *context) { MessagePartBodyData *data = context; - const char *value; - value = rfc822_tokens_get_value_quoted(tokens, count); - data->content_transfer_encoding = p_strdup(data->pool, value); + data->content_transfer_encoding = + imap_quote_value(data->pool, value, value_len); } -static void parse_content_disposition(const Rfc822Token *tokens, - int count, void *context) +static void parse_content_disposition(const char *value, size_t value_len, + void *context) { MessagePartBodyData *data = context; - const char *value; - value = rfc822_tokens_get_value_quoted(tokens, count); - data->content_disposition = p_strdup(data->pool, value); + data->content_disposition = + imap_quote_value(data->pool, value, value_len); } -static void parse_content_language(const Rfc822Token *tokens, - int count, void *context) +static void parse_content_language(const char *value, size_t value_len, + MessagePartBodyData *data) { - MessagePartBodyData *data = context; + Rfc822TokenizeContext *ctx; + Rfc822Token token; String *str; int quoted; /* Content-Language: en-US, az-arabic (comments allowed) */ - if (count <= 0) - return; + ctx = rfc822_tokenize_init(value, value_len, NULL, NULL); + t_push(); str = t_str_new(256); quoted = FALSE; - for (; count > 0; count--, tokens++) { - switch (tokens->token) { - case '(': - /* ignore comment */ + while (rfc822_tokenize_next(ctx)) { + token = rfc822_tokenize_get(ctx); + if (token == TOKEN_LAST) break; - case ',': + + if (token == ',') { /* list separator */ if (quoted) { str_append_c(str, '"'); quoted = FALSE; } - break; - default: + } else { /* anything else goes as-is. only alphabetic characters and '-' is allowed, so anything else is error which we can deal with however we want. */ @@ -129,11 +132,13 @@ quoted = TRUE; } - if (IS_TOKEN_STRING(tokens->token)) - str_append_n(str, tokens->ptr, tokens->len); - else - str_append_c(str, tokens->token); - break; + if (!IS_TOKEN_STRING(token)) + str_append_c(str, token); + else { + value = rfc822_tokenize_get_value(ctx, + &value_len); + str_append_n(str, value, value_len); + } } } @@ -141,6 +146,10 @@ str_append_c(str, '"'); data->content_language = p_strdup(data->pool, str_c(str)); + + t_pop(); + + rfc822_tokenize_deinit(ctx); } static void parse_header(MessagePart *part, @@ -174,17 +183,16 @@ if (strcasecmp(name, "Content-Type") == 0 && part_data->content_type == NULL) { part_data->str = t_str_new(256); - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_type, - parse_save_params_list, - part_data); + message_content_parse_header(value, value_len, + parse_content_type, + parse_save_params_list, part_data); part_data->content_type_params = p_strdup_empty(pool, str_c(part_data->str)); } else if (strcasecmp(name, "Content-Transfer-Encoding") == 0 && part_data->content_transfer_encoding == NULL) { - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_transfer_encoding, - NULL, part_data); + message_content_parse_header(value, value_len, + parse_content_transfer_encoding, + NULL, part_data); } else if (strcasecmp(name, "Content-ID") == 0 && part_data->content_id == NULL) { part_data->content_id = @@ -196,16 +204,13 @@ } else if (strcasecmp(name, "Content-Disposition") == 0 && part_data->content_disposition_params == NULL) { part_data->str = t_str_new(256); - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_disposition, - parse_save_params_list, - part_data); + message_content_parse_header(value, value_len, + parse_content_disposition, + parse_save_params_list, part_data); part_data->content_disposition_params = p_strdup_empty(pool, str_c(part_data->str)); } else if (strcasecmp(name, "Content-Language") == 0) { - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_language, NULL, - part_data); + parse_content_language(value, value_len, part_data); } else if (strcasecmp(name, "Content-MD5") == 0 && part_data->content_md5 == NULL) { part_data->content_md5 = @@ -262,7 +267,7 @@ if (data->content_subtype != NULL) str_append(str, data->content_subtype); else - str_append(str, "x-unknown"); + str_append(str, "\"x-unknown\""); if (!extended) return;
--- a/src/lib-imap/imap-parser.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-imap/imap-parser.c Fri Jan 03 17:57:12 2003 +0200 @@ -3,6 +3,7 @@ #include "lib.h" #include "istream.h" #include "ostream.h" +#include "strescape.h" #include "imap-parser.h" #define is_linebreak(c) \ @@ -215,8 +216,8 @@ if (parser->str_first_escape >= 0 && (parser->flags & IMAP_PARSE_FLAG_NO_UNESCAPE) == 0) { /* -1 because we skipped the '"' prefix */ - str_remove_escapes(arg->_data.str + - parser->str_first_escape-1); + str_unescape(arg->_data.str + + parser->str_first_escape-1); } break; case ARG_PARSE_LITERAL_DATA:
--- a/src/lib-imap/imap-util.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-imap/imap-util.c Fri Jan 03 17:57:12 2003 +0200 @@ -47,29 +47,3 @@ return str_c(str); } - -const char *imap_escape(const char *str) -{ - char *ret, *p; - size_t i, esc; - - /* get length of string and number of chars to escape */ - esc = 0; - for (i = 0; str[i] != '\0'; i++) { - if (IS_ESCAPED_CHAR(str[i])) - esc++; - } - - if (esc == 0) - return str; - - /* @UNSAFE: escape them */ - p = ret = t_malloc(i + esc + 1); - for (; *str != '\0'; str++) { - if (IS_ESCAPED_CHAR(*str)) - *p++ = '\\'; - *p++ = *str; - } - *p = '\0'; - return ret; -}
--- a/src/lib-imap/imap-util.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-imap/imap-util.h Fri Jan 03 17:57:12 2003 +0200 @@ -28,14 +28,9 @@ MAIL_FLAGS_COUNT = 32 }; -#define IS_ESCAPED_CHAR(c) ((c) == '"' || (c) == '\\') - /* Return flags as a space separated string. custom_flags[] is a list of names for custom flags, flags having NULL or "" entry are ignored. */ const char *imap_write_flags(MailFlags flags, const char *custom_flags[], unsigned int custom_flags_count); -/* Escape the string */ -const char *imap_escape(const char *str); - #endif
--- a/src/lib-index/mail-index-update.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-index/mail-index-update.c Fri Jan 03 17:57:12 2003 +0200 @@ -5,7 +5,6 @@ #include "istream.h" #include "ioloop.h" #include "rfc822-date.h" -#include "rfc822-tokenize.h" #include "message-parser.h" #include "message-part-serialize.h" #include "message-size.h"
--- a/src/lib-index/mbox/mbox-index.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-index/mbox/mbox-index.c Fri Jan 03 17:57:12 2003 +0200 @@ -3,7 +3,6 @@ #include "lib.h" #include "buffer.h" #include "istream.h" -#include "rfc822-tokenize.h" #include "mbox-index.h" #include "mbox-lock.h" #include "mail-index-util.h"
--- a/src/lib-mail/message-body-search.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-body-search.c Fri Jan 03 17:57:12 2003 +0200 @@ -4,8 +4,8 @@ #include "base64.h" #include "buffer.h" #include "istream.h" +#include "strescape.h" #include "charset-utf8.h" -#include "rfc822-tokenize.h" #include "quoted-printable.h" #include "message-parser.h" #include "message-content-parser.h" @@ -45,57 +45,51 @@ unsigned int found:1; } PartSearchContext; -static void parse_content_type(const Rfc822Token *tokens, int count, +static void parse_content_type(const char *value, size_t value_len, void *context) { PartSearchContext *ctx = context; - if (ctx->content_type != NULL && tokens[0].token == 'A') { - ctx->content_type = - i_strdup(rfc822_tokens_get_value(tokens, count)); + if (ctx->content_type != NULL) { + ctx->content_type = i_strndup(value, value_len); ctx->content_type_text = strncasecmp(ctx->content_type, "text/", 5) == 0 || strncasecmp(ctx->content_type, "message/", 8) == 0; } } -static void parse_content_type_param(const Rfc822Token *name, - const Rfc822Token *value, - int value_count, void *context) +static void parse_content_type_param(const char *name, size_t name_len, + const char *value, size_t value_len, + int value_quoted, void *context) { PartSearchContext *ctx = context; - if (name->len != 7 || strncasecmp(name->ptr, "charset", 7) != 0) - return; - - if (ctx->content_charset == NULL) { - ctx->content_charset = - i_strdup(rfc822_tokens_get_value(value, value_count)); + if (name_len == 7 && strncasecmp(name, "charset", 7) == 0 && + ctx->content_charset == NULL) { + ctx->content_charset = i_strndup(value, value_len); + if (value_quoted) str_unescape(ctx->content_charset); } } -static void parse_content_encoding(const Rfc822Token *tokens, - int count __attr_unused__, void *context) +static void parse_content_encoding(const char *value, size_t value_len, + void *context) { PartSearchContext *ctx = context; - if (tokens[0].token != 'A') - return; - - switch (tokens[0].len) { + switch (value_len) { case 4: - if (strncasecmp(tokens[0].ptr, "7bit", 4) != 0 && - strncasecmp(tokens[0].ptr, "8bit", 4) != 0) + if (strncasecmp(value, "7bit", 4) != 0 && + strncasecmp(value, "8bit", 4) != 0) ctx->content_unknown = TRUE; break; case 6: - if (strncasecmp(tokens[0].ptr, "base64", 6) == 0) + if (strncasecmp(value, "base64", 6) == 0) ctx->content_base64 = TRUE; - else if (strncasecmp(tokens[0].ptr, "binary", 6) != 0) + else if (strncasecmp(value, "binary", 6) != 0) ctx->content_unknown = TRUE; break; case 16: - if (strncasecmp(tokens[0].ptr, "quoted-printable", 16) == 0) + if (strncasecmp(value, "quoted-printable", 16) == 0) ctx->content_qp = TRUE; else ctx->content_unknown = TRUE; @@ -120,21 +114,17 @@ ctx->hdr_search_ctx); } - t_push(); - if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) { - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_type, - parse_content_type_param, - ctx); + message_content_parse_header(value, value_len, + parse_content_type, + parse_content_type_param, + ctx); } else if (name_len == 25 && strncasecmp(name, "Content-Transfer-Encoding", 25) == 0) { - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_encoding, - NULL, ctx); + message_content_parse_header(value, value_len, + parse_content_encoding, + NULL, ctx); } - - t_pop(); } static int message_search_header(PartSearchContext *ctx, IStream *input)
--- a/src/lib-mail/message-content-parser.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-content-parser.c Fri Jan 03 17:57:12 2003 +0200 @@ -1,52 +1,61 @@ /* Copyright (C) 2002 Timo Sirainen */ #include "lib.h" +#include "str.h" #include "rfc822-tokenize.h" #include "message-content-parser.h" -int message_content_parse_header(const char *value, ParseContentFunc func, - ParseContentParamFunc param_func, - void *context) +void message_content_parse_header(const char *data, size_t size, + ParseContentFunc func, + ParseContentParamFunc param_func, + void *context) { - const Rfc822Token *tokens; - int i, next, ntokens; + static const Rfc822Token stop_tokens[] = { ';', TOKEN_LAST }; + Rfc822TokenizeContext *ctx; + Rfc822Token token; + String *str; + const char *key, *value; + size_t key_len, value_len; - tokens = rfc822_tokenize(value, &ntokens, NULL, NULL); - if (tokens == NULL) { - /* error */ - return FALSE; - } + ctx = rfc822_tokenize_init(data, size, NULL, NULL); + rfc822_tokenize_dot_token(ctx, FALSE); - /* first ';' separates the parameters */ - for (i = 0; i < ntokens; i++) { - if (tokens[i].token == ';') - break; - } + t_push(); + str = t_str_new(256); + + /* first ';' separates the parameters */ + (void)rfc822_tokenize_get_string(ctx, str, NULL, stop_tokens); if (func != NULL) - func(tokens, i, context); + func(str_c(str), str_len(str), context); - if (param_func != NULL) { + t_pop(); + + if (param_func != NULL && rfc822_tokenize_get(ctx) == ';') { /* parse the parameters */ - i++; - while (i < ntokens) { - /* find the next ';' */ - for (next = i; next < ntokens; next++) { - if (tokens[next].token == ';') - break; - } + while (rfc822_tokenize_next(ctx)) { + token = rfc822_tokenize_get(ctx); + + /* <token> "=" <token> | <quoted-string> */ + if (token != TOKEN_ATOM) + continue; + + key = rfc822_tokenize_get_value(ctx, &key_len); - if (i+2 < next && - tokens[i].token == 'A' && - tokens[i+1].token == '=') { - /* <atom> = <value> */ - param_func(tokens + i, tokens + i + 2, - next - (i+2), context); - } + (void)rfc822_tokenize_next(ctx); + if (rfc822_tokenize_get(ctx) != '=') + continue; - i = next+1; + (void)rfc822_tokenize_next(ctx); + token = rfc822_tokenize_get(ctx); + if (token != TOKEN_ATOM && token != TOKEN_QSTRING) + continue; + + value = rfc822_tokenize_get_value(ctx, &value_len); + param_func(key, key_len, value, value_len, + token == TOKEN_QSTRING, context); } } - return TRUE; + rfc822_tokenize_deinit(ctx); }
--- a/src/lib-mail/message-content-parser.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-content-parser.h Fri Jan 03 17:57:12 2003 +0200 @@ -1,20 +1,16 @@ #ifndef __MESSAGE_CONTENT_PARSER_H #define __MESSAGE_CONTENT_PARSER_H -/* functions can safely store data into data stack, - ie. message_content_parse_header() is guaranteed not to call - t_push()/t_pop() */ - -/* Note that count can be 0 */ -typedef void (*ParseContentFunc)(const Rfc822Token *tokens, int count, +/* NOTE: name and value aren't \0-terminated. */ +typedef void (*ParseContentFunc)(const char *value, size_t value_len, void *context); -/* name is always atom, value_count is always > 0 */ -typedef void (*ParseContentParamFunc)(const Rfc822Token *name, - const Rfc822Token *value, - int value_count, void *context); +typedef void (*ParseContentParamFunc)(const char *name, size_t name_len, + const char *value, size_t value_len, + int value_quoted, void *context); -int message_content_parse_header(const char *value, ParseContentFunc func, - ParseContentParamFunc param_func, - void *context); +void message_content_parse_header(const char *data, size_t size, + ParseContentFunc func, + ParseContentParamFunc param_func, + void *context); #endif
--- a/src/lib-mail/message-header-search.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-header-search.c Fri Jan 03 17:57:12 2003 +0200 @@ -4,8 +4,8 @@ #include "base64.h" #include "buffer.h" #include "charset-utf8.h" -#include "rfc822-tokenize.h" #include "quoted-printable.h" +#include "message-parser.h" #include "message-header-decode.h" #include "message-header-search.h"
--- a/src/lib-mail/message-parser.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-parser.c Fri Jan 03 17:57:12 2003 +0200 @@ -2,7 +2,7 @@ #include "lib.h" #include "istream.h" -#include "rfc822-tokenize.h" +#include "strescape.h" #include "message-content-parser.h" #include "message-parser.h" #include "message-size.h" @@ -68,20 +68,17 @@ return part; } -static void parse_content_type(const Rfc822Token *tokens, int count, +static void parse_content_type(const char *value, size_t value_len, void *context) { MessageParseContext *parse_ctx = context; const char *str; - if (tokens[0].token != 'A') + if (parse_ctx->last_content_type != NULL || value_len == 0) return; - if (parse_ctx->last_content_type != NULL) - return; - - str = rfc822_tokens_get_value(tokens, count); - parse_ctx->last_content_type = p_strdup(parse_ctx->pool, str); + str = parse_ctx->last_content_type = + p_strndup(parse_ctx->pool, value, value_len); if (strcasecmp(str, "message/rfc822") == 0) parse_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; @@ -97,20 +94,21 @@ } } -static void parse_content_type_param(const Rfc822Token *name, - const Rfc822Token *value, - int value_count, void *context) +static void parse_content_type_param(const char *name, size_t name_len, + const char *value, size_t value_len, + int value_quoted, void *context) { MessageParseContext *parse_ctx = context; - const char *str; if ((parse_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || - name->len != 8 || strncasecmp(name->ptr, "boundary", 8) != 0) + name_len != 8 || strncasecmp(name, "boundary", 8) != 0) return; if (parse_ctx->last_boundary == NULL) { - str = rfc822_tokens_get_value(value, value_count); - parse_ctx->last_boundary = p_strdup(parse_ctx->pool, str); + parse_ctx->last_boundary = + p_strndup(parse_ctx->pool, value, value_len); + if (value_quoted) + str_unescape(parse_ctx->last_boundary); } } @@ -129,10 +127,10 @@ if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) { /* we need to know the boundary */ - (void)message_content_parse_header(t_strndup(value, value_len), - parse_content_type, - parse_content_type_param, - parse_ctx); + message_content_parse_header(value, value_len, + parse_content_type, + parse_content_type_param, + parse_ctx); } }
--- a/src/lib-mail/message-parser.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/message-parser.h Fri Jan 03 17:57:12 2003 +0200 @@ -1,6 +1,9 @@ #ifndef __MESSAGE_PARSER_H #define __MESSAGE_PARSER_H +#define IS_LWSP(c) \ + ((c) == ' ' || (c) == '\t') + typedef struct _MessagePart MessagePart; typedef struct _MessagePosition MessagePosition; typedef struct _MessageSize MessageSize;
--- a/src/lib-mail/rfc822-address.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/rfc822-address.c Fri Jan 03 17:57:12 2003 +0200 @@ -17,56 +17,34 @@ return addr; } -static int read_until(const Rfc822Token *tokens, const char *stop_tokens, - String *comment) -{ - char *c_str; - int i, pos; - - /* find the stop token */ - for (i = 0; tokens[i].token != 0; i++) { - if (strchr(stop_tokens, tokens[i].token) != NULL) - break; - - if (tokens[i].token == '(' && comment != NULL) { - /* save comment */ - if (str_len(comment) > 0) - str_append_c(comment, ' '); - pos = str_len(comment); - - str_append_n(comment, tokens[i].ptr, tokens[i].len); - c_str = str_c_modifyable(comment); - - str_remove_escapes(c_str + pos); - str_truncate(comment, strlen(c_str)); - } - } - - return i; -} - -static void read_until_get(const Rfc822Token **tokens, const char *stop_tokens, - String *phrase, String *comment) -{ - const char *value; - int count; - - count = read_until(*tokens, stop_tokens, comment); - if (count > 0) { - value = rfc822_tokens_get_value(*tokens, count); - str_append(phrase, value); - - *tokens += count; - } -} - Rfc822Address *rfc822_address_parse(Pool pool, const char *str) { + static const Rfc822Token stop_tokens_init[] = + { ',', '@', '<', ':', TOKEN_LAST }; + static const Rfc822Token stop_tokens_group[] = + { ',', '@', '<', ';', TOKEN_LAST }; + static const Rfc822Token stop_tokens_domain[] = + { ',', '<', TOKEN_LAST }; + static const Rfc822Token stop_tokens_domain_group[] = + { ',', '<', ';', TOKEN_LAST }; + static const Rfc822Token stop_tokens_post_addr[] = + { ',', TOKEN_LAST }; + static const Rfc822Token stop_tokens_post_addr_group[] = + { ',', ';', TOKEN_LAST }; + static const Rfc822Token stop_tokens_addr_route[] = + { ':', '>', TOKEN_LAST }; + static const Rfc822Token stop_tokens_addr_mailbox[] = + { '@', '>', TOKEN_LAST }; + static const Rfc822Token stop_tokens_addr_domain[] = + { '>', TOKEN_LAST }; + Rfc822Address *first_addr, **next_addr, *addr; + Rfc822TokenizeContext *ctx; + const Rfc822Token *stop_tokens; + Rfc822Token token; String *mailbox, *domain, *route, *name, *comment, *next_phrase; - const Rfc822Token *tokens; - const char *list, *value; - int ingroup, stop, count; + size_t len; + int ingroup, stop; if (str == NULL || *str == '\0') return NULL; @@ -81,36 +59,40 @@ ENVELOPE wants groups to be stored like (NIL, NIL, group, NIL), ..., (NIL, NIL, NIL, NIL) */ - tokens = rfc822_tokenize(str, NULL, NULL, NULL); + ctx = rfc822_tokenize_init(str, (size_t)-1, NULL, NULL); + rfc822_tokenize_skip_comments(ctx, FALSE); t_push(); mailbox = t_str_new(128); - domain = t_str_new(128); + domain = t_str_new(256); route = t_str_new(128); - name = t_str_new(128); - comment = t_str_new(128); + name = t_str_new(256); + comment = t_str_new(256); - ingroup = FALSE; - list = ",@<:"; + ingroup = FALSE; len = 0; + stop_tokens = stop_tokens_init; next_phrase = mailbox; stop = FALSE; while (!stop) { - count = read_until(tokens, list, comment); - if (count > 0) { - if ((tokens[count].token == '<' || - next_phrase == name) && str_len(next_phrase) > 0) { - /* continuing previously started name, - separate it from us with space */ - str_append_c(next_phrase, ' '); - } + if (next_phrase == name && str_len(name) > 0) { + /* continuing previously started name, + separate it from us with space */ + str_append_c(name, ' '); + len = str_len(name); + } else { + len = 0; + } + (void)rfc822_tokenize_get_string(ctx, next_phrase, comment, + stop_tokens); - value = rfc822_tokens_get_value(tokens, count); - str_append(next_phrase, value); - tokens += count; + if (next_phrase == name && len > 0 && len == str_len(name)) { + /* nothing appeneded, remove the space */ + str_truncate(name, len-1); } - switch (tokens->token) { - case 0: + token = rfc822_tokenize_get(ctx); + switch (token) { + case TOKEN_LAST: case ',': case ';': /* end of address */ @@ -127,18 +109,19 @@ p_strdup(pool, str_c(comment)); } - if (ingroup && tokens->token == ';') { + if (ingroup && token == ';') { /* end of group - add end of group marker */ ingroup = FALSE; (void)new_address(pool, &next_addr); } - if (tokens->token == 0) { + if (token == TOKEN_LAST) { stop = TRUE; break; } - list = ingroup ? ",@<;" : ",@<:"; + stop_tokens = ingroup ? stop_tokens_group : + stop_tokens_init; str_truncate(mailbox, 0); str_truncate(domain, 0); @@ -146,53 +129,58 @@ str_truncate(name, 0); str_truncate(comment, 0); - tokens++; next_phrase = mailbox; break; case '@': /* domain part comes next */ - tokens++; next_phrase = domain; - list = ingroup ? ",<;" : ",<"; + stop_tokens = ingroup ? stop_tokens_domain_group : + stop_tokens_domain; break; case '<': /* route-addr */ - tokens++; /* mailbox/domain name so far has actually been the real name */ str_append_str(name, mailbox); + str_truncate(mailbox, 0); + if (str_len(domain) > 0) { str_append_c(name, '@'); str_append_str(name, domain); + str_truncate(domain, 0); } - str_truncate(mailbox, 0); - str_truncate(domain, 0); + /* mailbox */ + (void)rfc822_tokenize_get_string(ctx, + mailbox, NULL, stop_tokens_addr_mailbox); - read_until_get(&tokens, "@>", mailbox, NULL); - if (tokens->token == '@' && str_len(mailbox) == 0) { + if (rfc822_tokenize_get(ctx) == '@' && + str_len(mailbox) == 0) { /* route is given */ - tokens++; - read_until_get(&tokens, ":>", route, NULL); - if (tokens->token == ':') { + (void)rfc822_tokenize_get_string(ctx, + route, NULL, stop_tokens_addr_route); + + if (rfc822_tokenize_get(ctx) == ':') { /* mailbox comes next */ - tokens++; - read_until_get(&tokens, "@>", - mailbox, NULL); + (void)rfc822_tokenize_get_string(ctx, + mailbox, NULL, + stop_tokens_addr_mailbox); } } - if (tokens->token == '@') { - tokens++; - read_until_get(&tokens, ">", domain, NULL); + if (rfc822_tokenize_get(ctx) == '@') { + /* domain */ + (void)rfc822_tokenize_get_string(ctx, + domain, NULL, stop_tokens_addr_domain); } - if (tokens->token == '>') - tokens++; + token = rfc822_tokenize_get(ctx); + i_assert(token == '>' || token == TOKEN_LAST); next_phrase = name; - list = ingroup ? ",;" : ","; + stop_tokens = ingroup ? stop_tokens_post_addr_group : + stop_tokens_post_addr; break; case ':': /* beginning of group */ @@ -200,10 +188,13 @@ addr->name = p_strdup(pool, str_c(mailbox)); str_truncate(mailbox, 0); - tokens++; + str_truncate(comment, 0); ingroup = TRUE; - list = ",@<;"; + stop_tokens = stop_tokens_group; + break; + default: + i_unreached(); break; } } @@ -212,6 +203,8 @@ (void)new_address(pool, &next_addr); t_pop(); + rfc822_tokenize_deinit(ctx); + return first_addr; }
--- a/src/lib-mail/rfc822-date.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/rfc822-date.c Fri Jan 03 17:57:12 2003 +0200 @@ -86,64 +86,58 @@ return 0; } -static const Rfc822Token *next_token(const Rfc822Token **tokens) +static Rfc822Token next_token(Rfc822TokenizeContext *ctx, + const char **value, size_t *value_len) { - const Rfc822Token *ret; + Rfc822Token token; - if ((*tokens)->token == 0) - return NULL; + (void)rfc822_tokenize_next(ctx); - ret = *tokens; - (*tokens)++; - return ret; + token = rfc822_tokenize_get(ctx); + if (token == 'A') + *value = rfc822_tokenize_get_value(ctx, value_len); + return token; } -int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset) +static int rfc822_parse_date_tokens(Rfc822TokenizeContext *ctx, time_t *time, + int *timezone_offset) { struct tm tm; - const Rfc822Token *tokens, *tok; - size_t i; - - if (str == NULL || *str == '\0') - return FALSE; + Rfc822Token token; + const char *value; + size_t i, len; - /* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone - - we support comments here even while no-one ever uses them */ - - tokens = rfc822_tokenize(str, NULL, NULL, NULL); - + /* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone */ memset(&tm, 0, sizeof(tm)); /* skip the optional weekday */ - tok = next_token(&tokens); - if (tok != NULL && tok->token == 'A' && tok->len == 3) { - tok = next_token(&tokens); - if (tok == NULL || tok->token != ',') + token = next_token(ctx, &value, &len); + if (token == 'A' && len == 3) { + token = next_token(ctx, &value, &len); + if (token != ',') return FALSE; - tok = next_token(&tokens); + token = next_token(ctx, &value, &len); } /* dd */ - if (tok == NULL || tok->token != 'A' || tok->len > 2 || - !i_isdigit(tok->ptr[0])) + if (token != 'A' || len > 2 || !i_isdigit(value[0])) return FALSE; - tm.tm_mday = tok->ptr[0]-'0'; - if (tok->len == 2) { - if (!i_isdigit(tok->ptr[1])) + tm.tm_mday = value[0]-'0'; + if (len == 2) { + if (!i_isdigit(value[1])) return FALSE; - tm.tm_mday = (tm.tm_mday * 10) + (tok->ptr[1]-'0'); + tm.tm_mday = (tm.tm_mday * 10) + (value[1]-'0'); } /* month name */ - tok = next_token(&tokens); - if (tok == NULL || tok->token != 'A' || tok->len != 3) + token = next_token(ctx, &value, &len); + if (token != 'A' || len != 3) return FALSE; for (i = 0; i < 12; i++) { - if (strncasecmp(month_names[i], tok->ptr, 3) == 0) { + if (strncasecmp(month_names[i], value, 3) == 0) { tm.tm_mon = i; break; } @@ -152,18 +146,17 @@ return FALSE; /* [yy]yy */ - tok = next_token(&tokens); - if (tok == NULL || tok->token != 'A' || - (tok->len != 2 && tok->len != 4)) + token = next_token(ctx, &value, &len); + if (token != 'A' || (len != 2 && len != 4)) return FALSE; - for (i = 0; i < tok->len; i++) { - if (!i_isdigit(tok->ptr[i])) + for (i = 0; i < len; i++) { + if (!i_isdigit(value[i])) return FALSE; - tm.tm_year = tm.tm_year * 10 + (tok->ptr[i]-'0'); + tm.tm_year = tm.tm_year * 10 + (value[i]-'0'); } - if (tok->len == 2) { + if (len == 2) { /* two digit year, assume 1970+ */ if (tm.tm_year < 70) tm.tm_year += 100; @@ -174,36 +167,36 @@ } /* hh */ - tok = next_token(&tokens); - if (tok == NULL || tok->token != 'A' || tok->len != 2 || - !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1])) + token = next_token(ctx, &value, &len); + if (token != 'A' || len != 2 || + !i_isdigit(value[0]) || !i_isdigit(value[1])) return FALSE; - tm.tm_hour = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0'); + tm.tm_hour = (value[0]-'0') * 10 + (value[1]-'0'); /* :mm */ - tok = next_token(&tokens); - if (tok == NULL || tok->token != ':') + token = next_token(ctx, &value, &len); + if (token != ':') return FALSE; - tok = next_token(&tokens); - if (tok == NULL || tok->token != 'A' || tok->len != 2 || - !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1])) + token = next_token(ctx, &value, &len); + if (token != 'A' || len != 2 || + !i_isdigit(value[0]) || !i_isdigit(value[1])) return FALSE; - tm.tm_min = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0'); + tm.tm_min = (value[0]-'0') * 10 + (value[1]-'0'); /* [:ss] */ - tok = next_token(&tokens); - if (tok != NULL && tok->token == ':') { - tok = next_token(&tokens); - if (tok == NULL || tok->token != 'A' || tok->len != 2 || - !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1])) + token = next_token(ctx, &value, &len); + if (token == ':') { + token = next_token(ctx, &value, &len); + if (token != 'A' || len != 2 || + !i_isdigit(value[0]) || !i_isdigit(value[1])) return FALSE; - tm.tm_sec = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0'); + tm.tm_sec = (value[0]-'0') * 10 + (value[1]-'0'); } /* timezone */ - if (tok == NULL || tok->token != 'A') + if (token != 'A') return FALSE; - *timezone_offset = parse_timezone(tok->ptr, tok->len); + *timezone_offset = parse_timezone(value, len); tm.tm_isdst = -1; *time = utc_mktime(&tm); @@ -215,6 +208,21 @@ return TRUE; } +int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset) +{ + Rfc822TokenizeContext *ctx; + int ret; + + if (data == NULL || *data == '\0') + return FALSE; + + ctx = rfc822_tokenize_init(data, (size_t)-1, NULL, NULL); + ret = rfc822_parse_date_tokens(ctx, time, timezone_offset); + rfc822_tokenize_deinit(ctx); + + return ret; +} + const char *rfc822_to_date(time_t time) { struct tm *tm;
--- a/src/lib-mail/rfc822-date.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/rfc822-date.h Fri Jan 03 17:57:12 2003 +0200 @@ -3,7 +3,7 @@ /* Parses RFC822 date/time string. timezone_offset is filled with the timezone's difference to UTC in minutes. */ -int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset); +int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset); /* Create RFC822 date/time string from given time in local timezone. */ const char *rfc822_to_date(time_t time);
--- a/src/lib-mail/rfc822-tokenize.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/rfc822-tokenize.c Fri Jan 03 17:57:12 2003 +0200 @@ -1,58 +1,95 @@ /* Copyright (C) 2002 Timo Sirainen */ #include "lib.h" +#include "str.h" +#include "strescape.h" #include "rfc822-tokenize.h" -#define INITIAL_COUNT 4 +struct _Rfc822TokenizeContext { + const char *data; + size_t size; + + Rfc822TokenizeErrorFunc error_func; + void *error_context; + + int token; + size_t token_pos, token_len; + size_t parse_pos; + + unsigned int skip_comments:1; + unsigned int dot_token:1; + + unsigned int in_bracket:1; +}; #define PARSE_ERROR() \ STMT_START { \ - if (error_func != NULL && \ - !error_func(str, (size_t) (p-str), '\0', context)) \ - return NULL; \ + if (ctx->error_func != NULL && \ + !ctx->error_func(data, i, '\0', ctx->error_context)) \ + return FALSE; \ } STMT_END #define PARSE_ERROR_MISSING(c) \ STMT_START { \ - if (error_func != NULL && \ - !error_func(str, (size_t) (p-str), c, context)) \ - return NULL; \ + if (ctx->error_func != NULL && \ + !ctx->error_func(data, i, c, ctx->error_context)) \ + return FALSE; \ } STMT_END -static Rfc822Token *alloc_token(Rfc822Token **tokens, int *pos, int type) -{ - Rfc822Token *token; - /* @UNSAFE */ - if (*pos+1 >= INITIAL_COUNT) - *tokens = t_buffer_reget_type(*tokens, Rfc822Token, *pos + 2); +Rfc822TokenizeContext * +rfc822_tokenize_init(const char *data, size_t size, + Rfc822TokenizeErrorFunc error_func, void *error_context) +{ + Rfc822TokenizeContext *ctx; + + ctx = i_new(Rfc822TokenizeContext, 1); + ctx->data = data; + ctx->size = size; - token = (*tokens) + *pos; - (*pos)++; + ctx->error_func = error_func; + ctx->error_context = error_context; + + ctx->skip_comments = TRUE; + ctx->dot_token = TRUE; - token->token = type; - token->ptr = NULL; - token->len = 0; - return token; + ctx->token = -1; + return ctx; +} + +void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx) +{ + i_free(ctx); } -const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count, - Rfc822TokenizeErrorFunc error_func, - void *context) +void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set) { - Rfc822Token *first_token, *token; - const char *p, *last_atom; - int level, in_bracket, pos; + ctx->skip_comments = set; +} + +void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set) +{ + ctx->dot_token = set; +} - first_token = t_buffer_get_type(Rfc822Token, INITIAL_COUNT); - pos = 0; +int rfc822_tokenize_next(Rfc822TokenizeContext *ctx) +{ + int token, level, last_atom; + const char *data; + size_t i, size; + + if (ctx->token == TOKEN_LAST) + return FALSE; - token = NULL; - last_atom = NULL; + data = ctx->data; + size = ctx->size; + + ctx->token = TOKEN_LAST; - in_bracket = FALSE; - for (p = str; *p != '\0'; p++) { - switch (*p) { + last_atom = FALSE; + for (i = ctx->parse_pos; i < size && data[i] != '\0'; i++) { + token = -1; + switch (data[i]) { case ' ': case '\t': case '\r': @@ -60,6 +97,112 @@ /* skip whitespace */ break; + case '(': + /* (comment) - nesting is allowed */ + if (last_atom) + break; + + token = '('; + ctx->token_pos = ++i; + + level = 1; + for (; i < size && data[i] != '\0'; i++) { + if (data[i] == '\\' && + i+1 < size && data[i+1] != '\0') + i++; + else if (data[i] == '(') + level++; + else if (data[i] == ')') { + if (--level == 0) + break; + } + } + + if (level > 0) + PARSE_ERROR_MISSING(')'); + + ctx->token_len = (size_t) (i - ctx->token_pos); + break; + + case '[': + /* domain literal - nesting isn't allowed */ + if (last_atom) + break; + + token = '['; + ctx->token_pos = ++i; + + while (i < size && data[i] != '\0' && data[i] != ']') { + if (data[i] == '\\' && + i+1 < size && data[i+1] != '\0') + i++; + else if (data[i] == '[') { + /* nesting not allowed, but + continue anyway */ + PARSE_ERROR(); + } + + i++; + } + + if (i == size || data[i] == '\0') + PARSE_ERROR_MISSING(']'); + + ctx->token_len = (size_t) (i - ctx->token_pos); + break; + + case '"': + /* quoted string */ + if (last_atom) + break; + + token = '"'; + ctx->token_pos = ++i; + + while (i < size && data[i] != '\0' && data[i] != '"') { + if (data[i] == '\\' && + i+1 < size && data[i+1] != '\0') + i++; + i++; + } + + if (i == size || data[i] == '\0') + PARSE_ERROR_MISSING('"'); + + ctx->token_len = (size_t) (i - ctx->token_pos); + break; + + case '<': + if (last_atom) + break; + + if (ctx->in_bracket) { + /* '<' cannot be nested */ + PARSE_ERROR(); + } + + token = '<'; + ctx->in_bracket = TRUE; + break; + case '>': + if (last_atom) + break; + + if (!ctx->in_bracket) { + /* missing '<' */ + PARSE_ERROR(); + } + + token = '>'; + ctx->in_bracket = FALSE; + break; + + case ')': + case ']': + case '\\': + PARSE_ERROR(); + /* fall through */ + /* RFC822 specials: */ case '@': case ',': @@ -70,240 +213,134 @@ case '/': case '?': case '=': - token = alloc_token(&first_token, &pos, *p); - break; - - case '(': - /* (comment) - nesting is allowed */ - token = alloc_token(&first_token, &pos, '('); - token->ptr = ++p; - - level = 1; - for (; *p != '\0'; p++) { - if (*p == '\\' && p[1] != '\0') - p++; - else if (*p == '(') - level++; - else if (*p == ')') { - if (--level == 0) - break; - } - } - - if (level > 0) - PARSE_ERROR_MISSING(')'); - - token->len = (size_t) (p - token->ptr); - break; - - case '[': - /* domain literal - nesting isn't allowed */ - token = alloc_token(&first_token, &pos, '['); - token->ptr = ++p; - - for (; *p != '\0' && *p != ']'; p++) { - if (*p == '\\' && p[1] != '\0') - p++; - else if (*p == '[') { - /* nesting not allowed, but - continue anyway */ - PARSE_ERROR(); - } - } - token->len = (size_t) (p - token->ptr); - - if (*p == '\0') - PARSE_ERROR_MISSING(']'); - break; - - case '"': - /* quoted string */ - token = alloc_token(&first_token, &pos, '"'); - token->ptr = ++p; - - for (; *p != '\0' && *p != '"'; p++) { - if (*p == '\\' && p[1] != '\0') - p++; - } - token->len = (size_t) (p - token->ptr); - - if (*p == '\0') - PARSE_ERROR_MISSING('"'); - break; - - case '<': - if (in_bracket) { - /* '<' cannot be nested */ - PARSE_ERROR(); + token = ctx->data[i]; + if (token != '.' || ctx->dot_token) break; - } - - token = alloc_token(&first_token, &pos, '<'); - in_bracket = TRUE; - break; - case '>': - if (!in_bracket) { - /* missing '<' */ - PARSE_ERROR(); - break; - } - - token = alloc_token(&first_token, &pos, '>'); - in_bracket = FALSE; - break; - - case ')': - case ']': - case '\\': - PARSE_ERROR(); - break; + /* fall through */ default: /* atom */ - if (last_atom != p-1) { - token = alloc_token(&first_token, &pos, 'A'); - token->ptr = p; + token = 'A'; + if (!last_atom) { + ctx->token = token; + ctx->token_pos = i; + last_atom = TRUE; } - - token->len++; - last_atom = p; break; } - if (*p == '\0') - break; - } - - if (in_bracket && error_func != NULL) { - if (!error_func(str, (size_t) (p-str), '>', context)) - return NULL; - } - - if (tokens_count != NULL) - *tokens_count = pos; - - /* @UNSAFE */ - first_token[pos++].token = 0; - t_buffer_alloc(sizeof(Rfc822Token) * pos); - return first_token; -} - -const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count) -{ - /* @UNSAFE */ - char *buf; - size_t i, len, buf_size; - int last_atom; - - if (count <= 0) - return ""; - - buf_size = 256; - buf = t_buffer_get(buf_size); - - len = 0; last_atom = FALSE; - for (; count > 0; count--, tokens++) { - if (tokens->token == '(') - continue; /* skip comments */ - - /* +4 == ' ' '[' ']' '\0' */ - if (len + tokens->len+4 >= buf_size) { - buf_size = nearest_power(buf_size + tokens->len + 3); - buf = t_buffer_reget(buf, buf_size); + if (last_atom) { + if (token != 'A') { + /* end of atom */ + ctx->token_len = (size_t) (i - ctx->token_pos); + last_atom = FALSE; + break; + } + } else { + if (token != -1) { + ctx->token = token; + if (i < ctx->size && data[i] != '\0') + i++; + break; + } } - switch (tokens->token) { - case '"': - case '[': - if (tokens->token == '[') - buf[len++] = '['; - - /* copy the string removing '\' chars */ - for (i = 0; i < tokens->len; i++) { - if (tokens->ptr[i] == '\\' && i+1 < tokens->len) - i++; - - buf[len++] = tokens->ptr[i]; - } - - if (tokens->token == '[') - buf[len++] = ']'; - break; - case 'A': - if (last_atom) - buf[len++] = ' '; - - memcpy(buf+len, tokens->ptr, tokens->len); - len += tokens->len; - break; - default: - i_assert(tokens->token != 0); - buf[len++] = (char) tokens->token; + if (i == ctx->size || data[i] == '\0') { + /* unexpected eol */ break; } + } - last_atom = tokens->token == 'A'; + if (last_atom) { + /* end of atom */ + ctx->token_len = (size_t) (i - ctx->token_pos); + } + + ctx->parse_pos = i; + + if (ctx->token == TOKEN_LAST && ctx->in_bracket && + ctx->error_func != NULL) { + if (!ctx->error_func(data, i, '>', ctx->error_context)) + return FALSE; } - buf[len++] = '\0'; - t_buffer_alloc(len); - return buf; + return TRUE; +} + +Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx) +{ + return ctx->token; +} + +const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx, + size_t *len) +{ + i_assert(IS_TOKEN_STRING(ctx->token)); + + *len = ctx->token_len; + return ctx->data + ctx->token_pos; } -const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens, - int count) +int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx, + String *str, String *comments, + const Rfc822Token *stop_tokens) { - /* @UNSAFE */ - char *buf; - size_t len, buf_size; - int last_atom; + Rfc822Token token; + const char *value; + size_t len; + int i, token_str, last_str; - if (count <= 0) - return "\"\""; + last_str = FALSE; + while (rfc822_tokenize_next(ctx)) { + token = rfc822_tokenize_get(ctx); + if (token == TOKEN_LAST) + return TRUE; - buf_size = 256; - buf = t_buffer_get(buf_size); - buf[0] = '"'; len = 1; last_atom = FALSE; + for (i = 0; stop_tokens[i] != TOKEN_LAST; i++) + if (token == stop_tokens[i]) + return TRUE; - for (; count > 0; count--, tokens++) { - if (tokens->token == '(') - continue; /* skip comments */ + if (token == TOKEN_COMMENT) { + /* handle comment specially */ + if (comments != NULL) { + if (str_len(comments) > 0) + str_append_c(comments, ' '); - /* +5 == ' ' '[' ']' '"' '\0' */ - if (len + tokens->len+5 >= buf_size) { - buf_size = nearest_power(buf_size + tokens->len + 3); - buf = t_buffer_reget(buf, buf_size); + value = rfc822_tokenize_get_value(ctx, &len); + str_append_unescaped(comments, value, len); + } + continue; } - switch (tokens->token) { - case '"': - case '[': - if (tokens->token == '[') - buf[len++] = '['; + token_str = token == TOKEN_ATOM || token == TOKEN_QSTRING || + token == TOKEN_DLITERAL || token == TOKEN_COMMENT; - memcpy(buf+len, tokens->ptr, tokens->len); - len += tokens->len; + if (!token_str) + str_append_c(str, token); + else if (token == TOKEN_QSTRING) { + /* unescape only quoted strings, since we're removing + the quotes. for domain literals I don't see much + point in unescaping if [] is still kept.. */ + if (last_str) + str_append_c(str, ' '); - if (tokens->token == '[') - buf[len++] = ']'; - break; - case 'A': - if (last_atom) - buf[len++] = ' '; + value = rfc822_tokenize_get_value(ctx, &len); + str_append_unescaped(str, value, len); + } else { + if (last_str) + str_append_c(str, ' '); - memcpy(buf+len, tokens->ptr, tokens->len); - len += tokens->len; - break; - default: - i_assert(tokens->token != 0); - buf[len++] = (char) tokens->token; - break; + if (token == TOKEN_DLITERAL) + str_append_c(str, '['); + + value = rfc822_tokenize_get_value(ctx, &len); + str_append_n(str, value, len); + + if (token == TOKEN_DLITERAL) + str_append_c(str, ']'); } - last_atom = tokens->token == 'A'; + last_str = token_str; } - buf[len++] = '"'; - buf[len++] = '\0'; - t_buffer_alloc(len); - return buf; + return FALSE; }
--- a/src/lib-mail/rfc822-tokenize.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-mail/rfc822-tokenize.h Fri Jan 03 17:57:12 2003 +0200 @@ -1,38 +1,29 @@ #ifndef __RFC822_TOKENIZE_H #define __RFC822_TOKENIZE_H -typedef struct _Rfc822Token Rfc822Token; - #define IS_TOKEN_STRING(token) \ - ((token) == 'A' || (token) == '"' || (token) == '(' || (token) == '[') - -#define IS_LWSP(c) \ - ((c) == ' ' || (c) == '\t') + ((token) == TOKEN_ATOM || (token) == TOKEN_QSTRING || \ + (token) == TOKEN_COMMENT || (token) == TOKEN_DLITERAL) -struct _Rfc822Token { - /* - 0 = last token - 'A' = atom - '"' = quoted string - '(' = comment - '[' = domain literal +typedef enum { + TOKEN_ATOM = 'A', + TOKEN_QSTRING = '"', + TOKEN_COMMENT = '(', + TOKEN_DLITERAL = '[', - RFC822 specials: + /* RFC822 specials: - '<', '>', '@', ',', ';', ':', '\', '.' + '<', '>', '@', ',', ';', ':', '\' + '.' (optional) RFC2045 tspecials: - '/', '?', '=' - */ - int token; + '/', '?', '=' */ - /* - not including enclosing "", () or [] - - '\' isn't expanded - - [CR+]LF+LWSP (continued header) isn't removed */ - const char *ptr; - size_t len; -}; + TOKEN_LAST = 0 +} Rfc822Token; + +typedef struct _Rfc822TokenizeContext Rfc822TokenizeContext; /* Parsing is aborted if returns FALSE. There's two kinds of errors: @@ -44,15 +35,36 @@ /* Tokenize the string. Returns NULL if string is empty. Memory for returned array is allocated from data stack. You don't have to use the tokens_count, since last token is always 0. */ -const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count, - Rfc822TokenizeErrorFunc error_func, - void *context); +Rfc822TokenizeContext * +rfc822_tokenize_init(const char *data, size_t size, + Rfc822TokenizeErrorFunc error_func, void *error_context); +void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx); + +/* Specify whether comments should be silently skipped (default yes). */ +void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set); +/* Specify whether '.' should be treated as a separate token (default yes). */ +void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set); + +/* Parse the next token. Returns FALSE if parsing error occured and error + function wanted to abort. It's not required to check the return value, + rfc822_tokenize_get() will return TOKEN_LAST after errors. Returns FALSE + also when last token was already read. */ +int rfc822_tokenize_next(Rfc822TokenizeContext *ctx); -/* Returns the tokens as a string. Tokens are merged together, except - spaces are added between atoms. */ -const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count); -/* Returns the tokens as a "string". */ -const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens, - int count); +/* Return the next token. */ +Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx); + +/* - not including enclosing "", () or [] + - '\' isn't expanded + - [CR+]LF+LWSP (continued header) isn't removed */ +const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx, + size_t *len); + +/* Return tokens as a string, all quoted strings will be unquoted. + Reads until stop_token is found. Returns FALSE if rfc822_tokenize_next() + failed. */ +int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx, + String *str, String *comments, + const Rfc822Token *stop_tokens); #endif
--- a/src/lib-storage/index/index-fetch-section.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-storage/index/index-fetch-section.c Fri Jan 03 17:57:12 2003 +0200 @@ -4,7 +4,6 @@ #include "str.h" #include "istream.h" #include "ostream.h" -#include "rfc822-tokenize.h" #include "message-send.h" #include "index-storage.h" #include "index-fetch.h"
--- a/src/lib-storage/index/index-search.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib-storage/index/index-search.c Fri Jan 03 17:57:12 2003 +0200 @@ -4,7 +4,6 @@ #include "istream.h" #include "ostream.h" #include "mmap-util.h" -#include "rfc822-tokenize.h" #include "rfc822-date.h" #include "message-size.h" #include "message-body-search.h"
--- a/src/lib/Makefile.am Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib/Makefile.am Fri Jan 03 17:57:12 2003 +0200 @@ -44,8 +44,9 @@ safe-memset.c \ safe-mkdir.c \ sendfile-util.c \ + str.c \ + strescape.c \ strfuncs.c \ - str.c \ unlink-directory.c \ unlink-lockfiles.c \ utc-offset.c \ @@ -91,8 +92,9 @@ safe-memset.h \ safe-mkdir.h \ sendfile-util.h \ + str.h \ + strescape.h \ strfuncs.h \ - str.h \ unlink-directory.h \ unlink-lockfiles.h \ utc-offset.h \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/strescape.c Fri Jan 03 17:57:12 2003 +0200 @@ -0,0 +1,90 @@ +/* + Copyright (c) 2003 Timo Sirainen + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "lib.h" +#include "str.h" +#include "strescape.h" + +const char *str_escape(const char *str) +{ + char *ret, *p; + size_t i, esc; + + /* get length of string and number of chars to escape */ + esc = 0; + for (i = 0; str[i] != '\0'; i++) { + if (IS_ESCAPED_CHAR(str[i])) + esc++; + } + + if (esc == 0) + return str; + + /* @UNSAFE: escape them */ + p = ret = t_malloc(i + esc + 1); + for (; *str != '\0'; str++) { + if (IS_ESCAPED_CHAR(*str)) + *p++ = '\\'; + *p++ = *str; + } + *p = '\0'; + return ret; +} + +void str_append_unescaped(String *dest, const char *src, size_t src_size) +{ + size_t start = 0, i = 0; + + while (i < src_size) { + start = i; + for (; i < src_size; i++) { + if (src[i] == '\\') + break; + } + + str_append_n(dest, src + start, i-start); + + if (src[i] == '\\') + i++; + start = i; + } +} + +void str_unescape(char *str) +{ + /* @UNSAFE */ + char *dest; + + while (*str != '\\') { + if (*str == '\0') + return; + str++; + } + + for (dest = str; *str != '\0'; str++) { + if (*str != '\\' || str[1] == '\0') + *dest++ = *str; + } + + *dest = '\0'; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/strescape.h Fri Jan 03 17:57:12 2003 +0200 @@ -0,0 +1,15 @@ +#ifndef __STRESCAPE_H +#define __STRESCAPE_H + +#define IS_ESCAPED_CHAR(c) ((c) == '"' || (c) == '\\') + +/* escape all '\' and '"' characters */ +const char *str_escape(const char *str); + +/* remove all '\' characters, append to given string */ +void str_append_unescaped(String *dest, const char *src, size_t src_size); + +/* remove all '\' characters */ +void str_unescape(char *str); + +#endif
--- a/src/lib/strfuncs.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib/strfuncs.c Fri Jan 03 17:57:12 2003 +0200 @@ -462,18 +462,6 @@ return str; } -void str_remove_escapes(char *str) -{ - char *dest; - - for (dest = str; *str != '\0'; str++) { - if (*str != '\\' || str[1] == '\0') - *dest++ = *str; - } - - *dest = '\0'; -} - const char **t_strsplit(const char *data, const char *separators) { const char **array;
--- a/src/lib/strfuncs.h Fri Jan 03 07:36:20 2003 +0200 +++ b/src/lib/strfuncs.h Fri Jan 03 17:57:12 2003 +0200 @@ -49,7 +49,6 @@ char *str_ucase(char *str); char *str_lcase(char *str); -void str_remove_escapes(char *str); /* seprators is an array of separator characters, not a separator string. */ const char **t_strsplit(const char *data, const char *separators);
--- a/src/login/client.c Fri Jan 03 07:36:20 2003 +0200 +++ b/src/login/client.c Fri Jan 03 17:57:12 2003 +0200 @@ -8,6 +8,7 @@ #include "ostream.h" #include "process-title.h" #include "safe-memset.h" +#include "strescape.h" #include "client.h" #include "client-authenticate.h" #include "ssl-proxy.h" @@ -157,7 +158,7 @@ if (*line == '"') *line++ = '\0'; - str_remove_escapes(start); + str_unescape(start); } else { start = line; while (*line != '\0' && *line != ' ')