Mercurial > dovecot > core-2.2
changeset 793:ab093fefe04b HEAD
Forgot from last SORT commit.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 17 Dec 2002 06:33:38 +0200 |
parents | d573c53946ac |
children | 555e620ee8f1 |
files | src/lib-imap/imap-base-subject.c src/lib-imap/imap-base-subject.h |
diffstat | 2 files changed, 269 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-imap/imap-base-subject.c Tue Dec 17 06:33:38 2002 +0200 @@ -0,0 +1,260 @@ +/* Copyright (C) 2002 Timo Sirainen */ + +#include "lib.h" +#include "buffer.h" +#include "charset-utf8.h" +#include "message-header-decode.h" +#include "imap-base-subject.h" + +static int header_decode(const unsigned char *data, size_t size, + const char *charset, void *context) +{ + Buffer *buf = context; + const char *utf8_str; + unsigned char *buf_data; + size_t i, used_size; + + if (charset == NULL) { + /* It's ASCII. */ + buffer_append(buf, data, size); + } else { + t_push(); + utf8_str = charset_to_utf8_string(charset, NULL, data, size, &size); + if (utf8_str == NULL) + size = 0; + else + buffer_append(buf, utf8_str, size); + t_pop(); + } + + if (size > 0) { + /* @UNSAFE: uppercase it. Current draft specifies that we + should touch only ASCII. */ + buf_data = buffer_get_modifyable_data(buf, &used_size); + for (i = used_size - size; i < used_size; i++) { + if (buf_data[i] >= 'a' && buf_data[i] <= 'z') + buf_data[i] = buf_data[i] - 'a' + 'A'; + } + } + + return TRUE; +} + +static void pack_whitespace(Buffer *buf) +{ + char *data, *dest; + int last_lwsp; + + data = buffer_get_modifyable_data(buf, NULL); + + /* check if we need to do anything */ + while (*data != '\0') { + if (*data == '\t' || + (*data == ' ' && (data[1] == ' ' || data[1] == '\t'))) + break; + data++; + } + + if (*data == '\0') + return; + + /* @UNSAFE: convert/pack the whitespace */ + dest = data; last_lwsp = FALSE; + while (*data != '\0') { + if (*data == '\t' || *data == ' ') { + if (!last_lwsp) { + *dest++ = ' '; + last_lwsp = TRUE; + } + } else { + *dest++ = *data; + last_lwsp = FALSE; + } + data++; + } + *dest = '\0'; + + data = buffer_get_modifyable_data(buf, NULL); + buffer_set_used_size(buf, (size_t) (dest - data)+1); +} + +static void remove_subj_trailers(Buffer *buf) +{ + const char *data; + size_t orig_size, size; + + /* subj-trailer = "(fwd)" / WSP */ + data = buffer_get_data(buf, &orig_size); + + if (orig_size < 2) /* size includes trailing \0 */ + return; + + for (size = orig_size-2; size > 0; ) { + if (data[size] == ' ') + size--; + else if (size >= 5 && memcmp(data + size - 5, "(fwd)", 5) == 0) + size -= 5; + else + break; + } + + if (size != orig_size-2) { + buffer_set_used_size(buf, size); + buffer_append_c(buf, '\0'); + } +} + +static int remove_blob(const char **datap) +{ + const char *data = *datap; + + if (*data != '[') + return FALSE; + + while (*data != '\0' && *data != '[' && *data != ']') + data++; + + if (*data != ']') + return FALSE; + + data++; + if (*data == ' ') + data++; + + *datap = data; + return TRUE; +} + +static int remove_subj_leader(Buffer *buf) +{ + const char *data, *orig_data; + int ret = FALSE; + + /* subj-leader = (*subj-blob subj-refwd) / WSP + + subj-blob = "[" *BLOBCHAR "]" *WSP + subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" + + BLOBCHAR = %x01-5a / %x5c / %x5e-7f + ; any CHAR except '[' and ']' */ + orig_data = data = buffer_get_data(buf, NULL); + + if (*data == ' ') { + /* independent from checks below - always removed */ + data++; + buffer_set_start_pos(buf, buffer_get_start_pos(buf)+1); + ret = TRUE; + } + + while (*data == '[') { + if (!remove_blob(&data)) + return ret; + } + + if (strncasecmp(data, "re", 2) == 0) + data += 2; + else if (strncasecmp(data, "fwd", 3) == 0) + data += 3; + else if (strncasecmp(data, "fw", 2) == 0) + data += 2; + else + return ret; + + if (*data == ' ') + data++; + + if (*data == '[' && !remove_blob(&data)) + return ret; + + if (*data != ':') + return ret; + + data++; + buffer_set_start_pos(buf, buffer_get_start_pos(buf) + + (size_t) (data - orig_data)); + return TRUE; +} + +static int remove_blob_when_nonempty(Buffer *buf) +{ + const char *data, *orig_data; + + orig_data = data = buffer_get_data(buf, NULL); + if (*data == '[' && remove_blob(&data) && *data != '\0') { + buffer_set_start_pos(buf, buffer_get_start_pos(buf) + + (size_t) (data - orig_data)); + return TRUE; + } + + return FALSE; +} + +static int remove_subj_fwd_hdr(Buffer *buf) +{ + const char *data; + size_t size; + + /* subj-fwd = subj-fwd-hdr subject subj-fwd-trl + subj-fwd-hdr = "[fwd:" + subj-fwd-trl = "]" */ + data = buffer_get_data(buf, &size); + + if (strncasecmp(data, "[fwd:", 5) != 0) + return FALSE; + + if (data[size-2] != ']') + return FALSE; + + buffer_set_used_size(buf, size-2); + buffer_append_c(buf, '\0'); + + buffer_set_start_pos(buf, buffer_get_start_pos(buf) + 5); + return TRUE; +} + +const char *imap_get_base_subject_cased(Pool pool, const char *subject) +{ + Buffer *buf; + size_t subject_len; + int found; + + subject_len = strlen(subject); + buf = buffer_create_dynamic(pool, subject_len, (size_t)-1); + + /* (1) Convert any RFC 2047 encoded-words in the subject to + UTF-8. Convert all tabs and continuations to space. + Convert all multiple spaces to a single space. */ + message_header_decode(subject, subject_len, header_decode, buf); + buffer_append_c(buf, '\0'); + + pack_whitespace(buf); + + do { + /* (2) Remove all trailing text of the subject that matches + the subj-trailer ABNF, repeat until no more matches are + possible. */ + remove_subj_trailers(buf); + + do { + /* (3) Remove all prefix text of the subject that + matches the subj-leader ABNF. */ + found = remove_subj_leader(buf); + + /* (4) If there is prefix text of the subject that + matches the subj-blob ABNF, and removing that prefix + leaves a non-empty subj-base, then remove the prefix + text. */ + found = remove_blob_when_nonempty(buf) || found; + + /* (5) Repeat (3) and (4) until no matches remain. */ + } while (found); + + /* (6) If the resulting text begins with the subj-fwd-hdr ABNF + and ends with the subj-fwd-trl ABNF, remove the + subj-fwd-hdr and subj-fwd-trl and repeat from step (2). */ + } while (remove_subj_fwd_hdr(buf)); + + /* (7) The resulting text is the "base subject" used in the + SORT. */ + return buffer_get_data(buf, NULL); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-imap/imap-base-subject.h Tue Dec 17 06:33:38 2002 +0200 @@ -0,0 +1,9 @@ +#ifndef __IMAP_BASE_SUBJECT_H +#define __IMAP_BASE_SUBJECT_H + +/* Returns the base subject of the given string, according to + draft-ietf-imapext-sort-10. String is returned so that it's suitable for + strcmp() comparing with another base subject. */ +const char *imap_get_base_subject_cased(Pool pool, const char *subject); + +#endif