Mercurial > dovecot > original-hg > dovecot-1.2
view src/lib-mail/message-parser.c @ 956:26cafa3dc09c HEAD
minor optimization
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 12 Jan 2003 01:49:45 +0200 |
parents | 411006be3c66 |
children | 60646878858e |
line wrap: on
line source
/* Copyright (C) 2002 Timo Sirainen */ #include "lib.h" #include "istream.h" #include "strescape.h" #include "message-content-parser.h" #include "message-parser.h" #include "message-size.h" struct message_boundary { struct message_boundary *next; struct message_part *part; const char *boundary; size_t len; }; struct parser_context { pool_t pool; struct message_part *part; char *last_boundary; char *last_content_type; struct message_boundary *boundaries; message_header_callback_t callback; void *context; }; static struct message_part * message_parse_part(struct istream *input, struct parser_context *parser_ctx); static struct message_part * message_parse_body(struct istream *input, struct message_boundary *boundaries, struct message_size *body_size); static struct message_part * message_skip_boundary(struct istream *input, struct message_boundary *boundaries, struct message_size *boundary_size); static void message_size_add_part(struct message_size *dest, struct message_part *part) { dest->physical_size += part->header_size.physical_size + part->body_size.physical_size; dest->virtual_size += part->header_size.virtual_size + part->body_size.virtual_size; dest->lines += part->header_size.lines + part->body_size.lines; } static struct message_part * message_part_append(pool_t pool, struct message_part *parent) { struct message_part *part, **list; part = p_new(pool, struct message_part, 1); part->parent = parent; /* set child position */ part->physical_pos = parent->physical_pos + parent->body_size.physical_size + parent->header_size.physical_size; list = &part->parent->children; while (*list != NULL) list = &(*list)->next; *list = part; return part; } static void parse_content_type(const unsigned char *value, size_t value_len, void *context) { struct parser_context *parser_ctx = context; const char *str; if (parser_ctx->last_content_type != NULL || value_len == 0) return; str = parser_ctx->last_content_type = p_strndup(parser_ctx->pool, value, value_len); if (strcasecmp(str, "message/rfc822") == 0) parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; else if (strncasecmp(str, "text/", 5) == 0) parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; else if (strncasecmp(str, "multipart/", 10) == 0) { parser_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART; if (strcasecmp(str+10, "digest") == 0) { parser_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST; } } } static void parse_content_type_param(const unsigned char *name, size_t name_len, const unsigned char *value, size_t value_len, int value_quoted, void *context) { struct parser_context *parser_ctx = context; if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || name_len != 8 || memcasecmp(name, "boundary", 8) != 0) return; if (parser_ctx->last_boundary == NULL) { parser_ctx->last_boundary = p_strndup(parser_ctx->pool, value, value_len); if (value_quoted) str_unescape(parser_ctx->last_boundary); } } static void parse_header_field(struct message_part *part, const unsigned char *name, size_t name_len, const unsigned char *value, size_t value_len, void *context) { struct parser_context *parser_ctx = context; /* call the user-defined header parser */ if (parser_ctx->callback != NULL) { parser_ctx->callback(part, name, name_len, value, value_len, parser_ctx->context); } if (name_len == 12 && memcasecmp(name, "Content-Type", 12) == 0) { /* we need to know the boundary */ message_content_parse_header(value, value_len, parse_content_type, parse_content_type_param, parser_ctx); } } static struct message_part * message_parse_multipart(struct istream *input, struct parser_context *parser_ctx) { struct message_part *parent_part, *next_part, *part; struct message_boundary *b; /* multipart message. add new boundary */ b = t_new(struct message_boundary, 1); b->part = parser_ctx->part; b->boundary = parser_ctx->last_boundary; b->len = strlen(b->boundary); b->next = parser_ctx->boundaries; parser_ctx->boundaries = b; /* reset fields */ parser_ctx->last_boundary = NULL; parser_ctx->last_content_type = NULL; /* skip the data before the first boundary */ parent_part = parser_ctx->part; next_part = message_skip_boundary(input, parser_ctx->boundaries, &parent_part->body_size); /* now, parse the parts */ while (next_part == parent_part) { /* new child */ part = message_part_append(parser_ctx->pool, parent_part); parser_ctx->part = part; next_part = message_parse_part(input, parser_ctx); /* update our size */ message_size_add_part(&parent_part->body_size, part); if (next_part != parent_part) break; /* skip the boundary */ next_part = message_skip_boundary(input, parser_ctx->boundaries, &parent_part->body_size); } /* remove boundary */ i_assert(parser_ctx->boundaries == b); parser_ctx->boundaries = b->next; return next_part; } #define MUTEX_FLAGS \ (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART) static struct message_part * message_parse_part(struct istream *input, struct parser_context *parser_ctx) { struct message_part *next_part, *part; uoff_t hdr_size; message_parse_header(parser_ctx->part, input, &parser_ctx->part->header_size, parse_header_field, parser_ctx); i_assert((parser_ctx->part->flags & MUTEX_FLAGS) != MUTEX_FLAGS); /* update message position/size */ hdr_size = parser_ctx->part->header_size.physical_size; if (parser_ctx->last_boundary != NULL) return message_parse_multipart(input, parser_ctx); if (parser_ctx->last_content_type == NULL) { if (parser_ctx->part->parent != NULL && (parser_ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART_DIGEST)) { /* when there's no content-type specified and we're below multipart/digest, the assume message/rfc822 content-type */ parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; } else { /* otherwise we default to text/plain */ parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; } } parser_ctx->last_boundary = NULL; parser_ctx->last_content_type = NULL; if (parser_ctx->part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) { /* message/rfc822 part - the message body begins with headers again, this works pretty much the same as a single multipart/mixed item */ part = message_part_append(parser_ctx->pool, parser_ctx->part); parser_ctx->part = part; next_part = message_parse_part(input, parser_ctx); parser_ctx->part = part->parent; /* our body size is the size of header+body in message/rfc822 */ message_size_add_part(&part->parent->body_size, part); } else { /* normal message, read until the next boundary */ part = parser_ctx->part; next_part = message_parse_body(input, parser_ctx->boundaries, &part->body_size); } return next_part; } struct message_part *message_parse(pool_t pool, struct istream *input, message_header_callback_t callback, void *context) { struct message_part *part; struct parser_context parser_ctx; memset(&parser_ctx, 0, sizeof(parser_ctx)); parser_ctx.pool = pool; parser_ctx.callback = callback; parser_ctx.context = context; parser_ctx.part = part = p_new(pool, struct message_part, 1); message_parse_part(input, &parser_ctx); return part; } /* skip over to next line increasing message size */ static void message_skip_line(struct istream *input, struct message_size *msg_size) { const unsigned char *msg; size_t i, size, startpos; startpos = 0; while (i_stream_read_data(input, &msg, &size, startpos) > 0) { for (i = startpos; i < size; i++) { if (msg[i] == '\n') { if (msg_size != NULL) { if (i == 0 || msg[i-1] != '\r') msg_size->virtual_size++; msg_size->lines++; } break; } } if (i < size) { startpos = i+1; break; } /* leave the last character, it may be \r */ i_stream_skip(input, i - 1); startpos = 1; if (msg_size != NULL) { msg_size->physical_size += i - 1; msg_size->virtual_size += i - 1; } } i_stream_skip(input, startpos); if (msg_size != NULL) { msg_size->physical_size += startpos; msg_size->virtual_size += startpos; } } void message_parse_header(struct message_part *part, struct istream *input, struct message_size *hdr_size, message_header_callback_t callback, void *context) { const unsigned char *msg; size_t i, size, parse_size, startpos, missing_cr_count; size_t line_start, colon_pos, end_pos, name_len, value_len; int ret; if (hdr_size != NULL) memset(hdr_size, 0, sizeof(struct message_size)); missing_cr_count = startpos = line_start = 0; colon_pos = UINT_MAX; for (;;) { ret = i_stream_read_data(input, &msg, &size, startpos+1); if (ret == -2) { /* overflow, line is too long. just skip it. */ i_assert(size > 2); message_skip_line(input, hdr_size); startpos = line_start = 0; colon_pos = UINT_MAX; continue; } if (ret < 0 || (ret <= 0 && size == startpos)) { /* EOF and nothing in buffer. the later check is needed only when there's no message body */ break; } parse_size = size <= startpos+1 ? size : size-1; for (i = startpos; i < parse_size; i++) { if (msg[i] == ':' && colon_pos == UINT_MAX) { colon_pos = i; continue; } if (msg[i] != '\n') continue; if (hdr_size != NULL) hdr_size->lines++; if (i == 0 || msg[i-1] != '\r') { /* missing CR */ missing_cr_count++; } if (i == 0 || (i == 1 && msg[i-1] == '\r')) { /* no headers at all */ break; } if ((i > 0 && msg[i-1] == '\n') || (i > 1 && msg[i-2] == '\n' && msg[i-1] == '\r')) { /* \n\n or \n\r\n - end of headers */ break; } /* make sure the header doesn't continue to next line */ if (i+1 == size || !IS_LWSP(msg[i+1])) { if (colon_pos != UINT_MAX && colon_pos != line_start && callback != NULL && !IS_LWSP(msg[line_start])) { /* we have a valid header line */ /* get length of name-field */ end_pos = colon_pos-1; while (end_pos > line_start && IS_LWSP(msg[end_pos])) end_pos--; name_len = end_pos - line_start + 1; /* get length of value field. skip only the initial LWSP after ':'. some fields may want to keep the extra spaces.. */ colon_pos++; if (colon_pos < i && IS_LWSP(msg[colon_pos])) colon_pos++; value_len = i - colon_pos; if (msg[i-1] == '\r') value_len--; /* and finally call the function */ callback(part, msg + line_start, name_len, msg + colon_pos, value_len, context); } colon_pos = UINT_MAX; line_start = i+1; } } if (i < parse_size) { /* end of header */ startpos = i+1; break; } /* leave the last line to buffer */ if (colon_pos != UINT_MAX) colon_pos -= line_start; if (hdr_size != NULL) hdr_size->physical_size += line_start; i_stream_skip(input, line_start); startpos = i-line_start; line_start = 0; } i_stream_skip(input, startpos); if (hdr_size != NULL) { hdr_size->physical_size += startpos; hdr_size->virtual_size += hdr_size->physical_size + missing_cr_count; i_assert(hdr_size->virtual_size >= hdr_size->physical_size); } if (callback != NULL) { /* "end of headers" notify */ callback(part, NULL, 0, NULL, 0, context); } } static struct message_boundary * boundary_find(struct message_boundary *boundaries, const unsigned char *msg, size_t len) { while (boundaries != NULL) { if (boundaries->len <= len && memcmp(boundaries->boundary, msg, boundaries->len) == 0) return boundaries; boundaries = boundaries->next; } return NULL; } /* read until next boundary is found. if skip_over = FALSE, stop at the [\r]\n before the boundary, otherwise leave it right after the known boundary so the ending "--" can be checked. */ static struct message_boundary * message_find_boundary(struct istream *input, struct message_boundary *boundaries, struct message_size *msg_size, int skip_over) { struct message_boundary *boundary; const unsigned char *msg; size_t i, size, startpos, line_start, missing_cr_count; boundary = NULL; missing_cr_count = startpos = line_start = 0; while (i_stream_read_data(input, &msg, &size, startpos) > 0) { for (i = startpos; i < size; i++) { if (msg[i] != '\n') continue; if (i > line_start+2 && msg[line_start] == '-' && msg[line_start+1] == '-') { /* possible boundary */ boundary = boundary_find(boundaries, msg + line_start + 2, i - line_start - 2); if (boundary != NULL) break; } if (i == 0 || msg[i-1] != '\r') { /* missing CR */ missing_cr_count++; } msg_size->lines++; line_start = i+1; } if (boundary != NULL) break; if (i - line_start > 128 && msg[line_start] == '-' && msg[line_start+1] == '-') { /* long partial line, see if it's a boundary. RFC-2046 says that the boundaries must be 70 chars without "--" or less. We allow a bit larger.. */ boundary = boundary_find(boundaries, msg + line_start + 2, i - line_start - 2); if (boundary != NULL) break; /* nope, we can skip over the line, just leave the last char since it may be \r */ i--; } else { /* leave the last line to buffer, it may be boundary */ i = line_start; if (i > 2) i -= 2; /* leave the \r\n too */ line_start -= i; } i_stream_skip(input, i); msg_size->physical_size += i; msg_size->virtual_size += i; startpos = size - i; } if (boundary != NULL) { if (skip_over) { /* leave the pointer right after the boundary */ line_start += 2 + boundary->len; } else if (line_start > 0 && msg[line_start-1] == '\n') { /* leave the \r\n before the boundary */ line_start--; msg_size->lines--; if (line_start > 0 && msg[line_start-1] == '\r') line_start--; else missing_cr_count--; } startpos = line_start; } i_stream_skip(input, startpos); msg_size->physical_size += startpos; msg_size->virtual_size += startpos + missing_cr_count; i_assert(msg_size->virtual_size >= msg_size->physical_size); return boundary; } static struct message_part * message_parse_body(struct istream *input, struct message_boundary *boundaries, struct message_size *body_size) { struct message_boundary *boundary; if (boundaries == NULL) { message_get_body_size(input, body_size, (uoff_t)-1, NULL); return NULL; } else { boundary = message_find_boundary(input, boundaries, body_size, FALSE); return boundary == NULL ? NULL : boundary->part; } } /* skip data until next boundary is found. if it's end boundary, skip the footer as well. */ static struct message_part * message_skip_boundary(struct istream *input, struct message_boundary *boundaries, struct message_size *boundary_size) { struct message_boundary *boundary; const unsigned char *msg; size_t size; int end_boundary; boundary = message_find_boundary(input, boundaries, boundary_size, TRUE); if (boundary == NULL) return NULL; /* now, see if it's end boundary */ end_boundary = FALSE; if (i_stream_read_data(input, &msg, &size, 1) > 0) end_boundary = msg[0] == '-' && msg[1] == '-'; /* skip the rest of the line */ message_skip_line(input, boundary_size); if (end_boundary) { /* skip the footer */ return message_parse_body(input, boundaries, boundary_size); } return boundary == NULL ? NULL : boundary->part; }