Mercurial > dovecot > core-2.2
changeset 13129:4ed44f06c54c
fts: Don't pass NUL bytes to FTS backend. It can confuse them.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 03 Aug 2011 18:59:07 +0300 |
parents | 62c8eadd09d2 |
children | 3b4612e2a25a |
files | src/plugins/fts/fts-build.c src/plugins/fts/fts-parser.c src/plugins/fts/fts-parser.h |
diffstat | 3 files changed, 68 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts/fts-build.c Wed Aug 03 18:58:45 2011 +0300 +++ b/src/plugins/fts/fts-build.c Wed Aug 03 18:59:07 2011 +0300 @@ -56,6 +56,31 @@ fts_build_parse_content_disposition(ctx, hdr); } +static void +fts_build_unstructured_header(struct fts_storage_build_context *ctx, + const struct message_header_line *hdr) +{ + const unsigned char *data = hdr->full_value; + unsigned char *buf = NULL; + unsigned int i; + + /* @UNSAFE: if there are any NULs, replace them with spaces */ + for (i = 0; i < hdr->full_value_len; i++) { + if (data[i] == '\0') { + if (buf == NULL) { + buf = i_malloc(hdr->full_value_len); + memcpy(buf, data, i); + } + buf[i] = ' '; + } else if (buf != NULL) { + buf[i] = data[i]; + } + } + (void)fts_backend_update_build_more(ctx->update_ctx, + data, hdr->full_value_len); + i_free(buf); +} + static void fts_build_mail_header(struct fts_storage_build_context *ctx, const struct message_block *block) { @@ -78,9 +103,8 @@ if (!message_header_is_address(hdr->name)) { /* regular unstructured header */ - (void)fts_backend_update_build_more(ctx->update_ctx, - hdr->full_value, - hdr->full_value_len); + // FIXME: get rid of potential NULs + fts_build_unstructured_header(ctx, hdr); } else T_BEGIN { /* message address. normalize it to give better search results. */ @@ -136,6 +160,8 @@ *binary_body_r = TRUE; key.type = FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY; } + if (ctx->body_parser == NULL) + ctx->body_parser = fts_parser_text_init(); key.body_content_type = content_type; key.body_content_disposition = ctx->content_disposition; return fts_backend_update_set_build_key(ctx->update_ctx, &key);
--- a/src/plugins/fts/fts-parser.c Wed Aug 03 18:58:45 2011 +0300 +++ b/src/plugins/fts/fts-parser.c Wed Aug 03 18:59:07 2011 +0300 @@ -26,11 +26,41 @@ return FALSE; } +struct fts_parser *fts_parser_text_init(void) +{ + return i_new(struct fts_parser, 1); +} + +static bool data_has_nuls(const unsigned char *data, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (data[i] == '\0') + return TRUE; + } + return FALSE; +} + +static void replace_nul_bytes(buffer_t *buf) +{ + unsigned char *data; + size_t i, size; + + data = buffer_get_modifiable_data(buf, &size); + for (i = 0; i < size; i++) { + if (data[i] == '\0') + data[i] = ' '; + } +} + void fts_parser_more(struct fts_parser *parser, struct message_block *block) { - parser->v.more(parser, block); + if (parser->v.more != NULL) + parser->v.more(parser, block); - if (!uni_utf8_data_is_valid(block->data, block->size)) { + if (!uni_utf8_data_is_valid(block->data, block->size) || + data_has_nuls(block->data, block->size)) { /* output isn't valid UTF-8. make it. */ if (parser->utf8_output == NULL) { parser->utf8_output = @@ -40,6 +70,7 @@ } (void)uni_utf8_get_valid_data(block->data, block->size, parser->utf8_output); + replace_nul_bytes(parser->utf8_output); block->data = parser->utf8_output->data; block->size = parser->utf8_output->used; } @@ -53,5 +84,8 @@ if (parser->utf8_output != NULL) buffer_free(&parser->utf8_output); - parser->v.deinit(parser); + if (parser->v.deinit != NULL) + parser->v.deinit(parser); + else + i_free(parser); }
--- a/src/plugins/fts/fts-parser.h Wed Aug 03 18:58:45 2011 +0300 +++ b/src/plugins/fts/fts-parser.h Wed Aug 03 18:59:07 2011 +0300 @@ -23,6 +23,8 @@ bool fts_parser_init(struct mail_user *user, const char *content_type, const char *content_disposition, struct fts_parser **parser_r); +struct fts_parser *fts_parser_text_init(void); + /* The parser is initially called with message body blocks. Once message is finished, it's still called with incoming size=0 while the parser increases it to non-zero. */