Mercurial > dovecot > original-hg > dovecot-1.2
view src/lib-mail/message-header-search.c @ 568:f2aa58c2afd0 HEAD
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
characters are compared case-insensitively.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 03 Nov 2002 10:39:43 +0200 |
parents | |
children | debb8468514e |
line wrap: on
line source
/* Copyright (C) 2002 Timo Sirainen */ #include "lib.h" #include "base64.h" #include "hex-binary.h" #include "charset-utf8.h" #include "rfc822-tokenize.h" #include "message-header-search.h" #include <ctype.h> struct _HeaderSearchContext { const unsigned char *key; size_t key_len; size_t *matches; /* size of strlen(key) */ ssize_t match_count; unsigned int last_newline:1; unsigned int submatch:1; unsigned int eoh:1; unsigned int unknown_charset:1; }; HeaderSearchContext * message_header_search_init(Pool pool, const char *key, const char *charset, int *unknown_charset) { HeaderSearchContext *ctx; size_t size; ctx = p_new(pool, HeaderSearchContext, 1); /* get the key uppercased */ size = strlen(key); ctx->key = charset_to_ucase_utf8((const unsigned char *) key, &size, charset, unknown_charset); if (ctx->key == NULL) return NULL; ctx->key = p_strdup(pool, ctx->key); ctx->key_len = size; ctx->unknown_charset = charset == NULL; ctx->matches = p_malloc(pool, sizeof(size_t) * ctx->key_len); i_assert(ctx->key_len <= SSIZE_T_MAX); return ctx; } static size_t quoted_printable_decode(const unsigned char *src, size_t size, unsigned char *dest) { const unsigned char *end; unsigned char *dest_start; char hexbuf[3]; hexbuf[2] = '\0'; dest_start = dest; end = src + size; for (; src != end; src++) { if (*src == '_') { *dest++ = ' '; continue; } if (*src == '=' && src+2 < end) { hexbuf[0] = src[1]; hexbuf[1] = src[2]; if (hex_to_binary(hexbuf, dest) == 1) { dest++; src += 2; continue; } } *dest++ = *src; } return (size_t) (dest - dest_start); } static int match_data(const unsigned char *data, size_t size, const char *charset, HeaderSearchContext *ctx) { int ret; if (ctx->unknown_charset) { /* we don't know the source charset, so assume we want to match using same charsets */ charset = NULL; } data = (const unsigned char *) charset_to_ucase_utf8(data, &size, charset, NULL); if (data == NULL) { /* unknown character set, or invalid data */ return FALSE; } ctx->submatch = TRUE; ret = message_header_search(data, &size, ctx); ctx->submatch = FALSE; return ret; } static int match_encoded(const unsigned char **start, const unsigned char *end, HeaderSearchContext *ctx) { const unsigned char *p, *encoding, *text, *new_end; const char *charset; unsigned char *buf; ssize_t size; int ok, ret; /* first split the string =?charset?encoding?text?= */ ok = FALSE; charset = (const char *) *start; encoding = NULL; text = NULL; for (p = *start; p != end; p++) { if (*p == '?') { if (encoding == NULL) { charset = t_strdup_until(charset, p); encoding = p+1; } else if (text == NULL) { if (p != encoding+1) encoding = "?"; else if (*encoding == 'Q' || *encoding == 'q') encoding = "Q"; else if (*encoding == 'B' || *encoding == 'b') encoding = "B"; else encoding = "?"; text = p+1; } else { new_end = p; p++; if (p != end && *p == '=') p++; end = new_end; *start = p-1; ok = TRUE; break; } } } if (ok && *encoding != '?') { t_push(); size = (ssize_t) (end - text); buf = t_malloc(size); if (*encoding == 'Q') size = quoted_printable_decode(text, size, buf); else size = base64_decode(text, size, buf); if (size >= 0) { /* non-corrupted encoding */ ret = match_data(buf, size, charset, ctx); t_pop(); return ret; } t_pop(); } /* non-supported encoding, we can't match it */ ctx->match_count = 0; return FALSE; } int message_header_search(const unsigned char *header_block, size_t *header_size, HeaderSearchContext *ctx) { const unsigned char *p, *end; unsigned char chr; ssize_t i; int found; if (ctx->eoh || *header_size == 0) return FALSE; end = header_block + *header_size; found = FALSE; for (p = header_block; p != end; p++) { if (p[0] == '=' && p+1 != end && p[1] == '?' && !ctx->submatch) { /* encoded string. read it. */ p += 2; if (match_encoded(&p, end, ctx)) { found = TRUE; break; } i_assert(p != end); continue; } chr = ctx->submatch || (*p & 0x80) != 0 ? *p : i_toupper(*p); if (((p == header_block && ctx->last_newline) || (p != header_block && p[-1] == '\n')) && !ctx->submatch) { /* newline */ if (!IS_LWSP(*p)) { /* not a long header, reset matches */ ctx->match_count = 0; /* and see if we're at end of header */ if (*p == '\n') { p++; ctx->eoh = TRUE; break; } if (*p == '\r' && p[1] == '\n') { p += 2; ctx->eoh = TRUE; break; } } chr = ' '; } if (*p == '\r' || *p == '\n') continue; for (i = ctx->match_count-1; i >= 0; i--) { if (ctx->key[ctx->matches[i]] == chr) { if (++ctx->matches[i] == ctx->key_len) { /* full match */ p++; found = TRUE; break; } } else { /* non-match */ ctx->match_count--; if (i != ctx->match_count) { memmove(ctx->matches + i, ctx->matches + i + 1, ctx->match_count - i); } } } if (found) break; if (chr == ctx->key[0]) { if (ctx->key_len == 1) { /* only one character in search key */ p++; found = TRUE; break; } i_assert((size_t)ctx->match_count < ctx->key_len); ctx->matches[ctx->match_count++] = 1; } } *header_size = (size_t) (p - header_block); ctx->last_newline = end[-1] == '\n'; return found; } void message_header_search_reset(HeaderSearchContext *ctx) { ctx->eoh = FALSE; ctx->match_count = 0; }