changeset 792:d573c53946ac HEAD

Full not-too-well-tested support for SORT extension. Required a few library interface changes.
author Timo Sirainen <tss@iki.fi>
date Tue, 17 Dec 2002 06:28:40 +0200
parents 6efced4f80de
children ab093fefe04b
files configure.in src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h src/lib-imap/Makefile.am src/lib-imap/imap-envelope.c src/lib-imap/imap-envelope.h src/lib-storage/index/index-search.c src/lib-storage/index/index-sort.c src/lib-storage/mail-sort.c src/lib-storage/mail-sort.h
diffstat 11 files changed, 177 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/configure.in	Tue Dec 17 05:00:44 2002 +0200
+++ b/configure.in	Tue Dec 17 06:28:40 2002 +0200
@@ -679,7 +679,7 @@
 dnl ** capabilities
 dnl **
 
-capability="IMAP4rev1"
+capability="IMAP4rev1 SORT"
 if test "$have_ssl" != "no"; then
 	capability="$capability STARTTLS"
 fi
--- a/src/lib-charset/charset-iconv.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-charset/charset-iconv.c	Tue Dec 17 06:28:40 2002 +0200
@@ -114,10 +114,10 @@
 	return ret;
 }
 
-const char *
-charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
-			     const unsigned char *data, size_t size,
-			     size_t *utf8_size_r)
+static const char *
+charset_to_utf8_string_int(const char *charset, int *unknown_charset,
+			   const unsigned char *data, size_t size,
+			   size_t *utf8_size_r, int ucase)
 {
 	iconv_t cd;
 	ICONV_CONST char *inbuf;
@@ -130,6 +130,13 @@
 	    strcasecmp(charset, "UTF8") == 0) {
 		if (unknown_charset != NULL)
 			*unknown_charset = FALSE;
+
+		if (!ucase) {
+			if (utf8_size_r != NULL)
+				*utf8_size_r = size;
+			return t_strndup((const char *) data, size);
+		}
+
 		return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
 	}
 
@@ -170,10 +177,29 @@
 	*outpos++ = '\0';
 	t_buffer_alloc((size_t) (outpos - outbuf));
 
-	str_ucase(outbuf); /* FIXME: utf8 */
+	if (ucase)
+		str_ucase(outbuf); /* FIXME: utf8 */
 
 	iconv_close(cd);
 	return outbuf;
 }
 
+const char *
+charset_to_utf8_string(const char *charset, int *unknown_charset,
+		       const unsigned char *data, size_t size,
+		       size_t *utf8_size_r)
+{
+	return charset_to_utf8_string_int(charset, unknown_charset,
+					  data, size, utf8_size_r, FALSE);
+}
+
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+			     const unsigned char *data, size_t size,
+			     size_t *utf8_size_r)
+{
+	return charset_to_utf8_string_int(charset, unknown_charset,
+					  data, size, utf8_size_r, TRUE);
+}
+
 #endif
--- a/src/lib-charset/charset-utf8.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-charset/charset-utf8.c	Tue Dec 17 06:28:40 2002 +0200
@@ -86,6 +86,27 @@
 }
 
 const char *
+charset_to_utf8_string(const char *charset, int *unknown_charset,
+		       const unsigned char *data, size_t size,
+		       size_t *utf8_size_r)
+{
+	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
+	    strcasecmp(charset, "ascii") == 0 ||
+	    strcasecmp(charset, "UTF-8") == 0 ||
+	    strcasecmp(charset, "UTF8") == 0) {
+		if (unknown_charset != NULL)
+			*unknown_charset = FALSE;
+		if (utf8_size_r != NULL)
+			*utf8_size_r = size;
+		return t_strndup((const char *) data, size);
+	} else {
+		if (unknown_charset != NULL)
+			*unknown_charset = TRUE;
+		return NULL;
+	}
+}
+
+const char *
 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
 			     const unsigned char *data, size_t size,
 			     size_t *utf8_size_r)
--- a/src/lib-charset/charset-utf8.h	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-charset/charset-utf8.h	Tue Dec 17 06:28:40 2002 +0200
@@ -24,9 +24,13 @@
 charset_to_ucase_utf8(CharsetTranslation *t,
 		      const unsigned char *src, size_t *src_size, Buffer *dest);
 
-/* Simple wrapper for above functions. If utf8_size is non-NULL, it's set
+/* Simple wrappers for above functions. If utf8_size is non-NULL, it's set
    to same as strlen(returned data). */
 const char *
+charset_to_utf8_string(const char *charset, int *unknown_charset,
+		       const unsigned char *data, size_t size,
+		       size_t *utf8_size_r);
+const char *
 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
 			     const unsigned char *data, size_t size,
 			     size_t *utf8_size_r);
--- a/src/lib-imap/Makefile.am	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-imap/Makefile.am	Tue Dec 17 06:28:40 2002 +0200
@@ -2,9 +2,11 @@
 
 INCLUDES = \
 	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-charset \
 	-I$(top_srcdir)/src/lib-mail
 
 libimap_a_SOURCES = \
+	imap-base-subject.c \
 	imap-bodystructure.c \
 	imap-date.c \
 	imap-envelope.c \
@@ -15,6 +17,7 @@
 	imap-util.c
 
 noinst_HEADERS = \
+	imap-base-subject.h \
 	imap-bodystructure.h \
 	imap-date.h \
 	imap-envelope.h \
--- a/src/lib-imap/imap-envelope.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-imap/imap-envelope.c	Tue Dec 17 06:28:40 2002 +0200
@@ -210,21 +210,44 @@
 	return str->str;
 }
 
+static const char *imap_envelope_parse_first_mailbox(ImapArg *arg)
+{
+	/* ((name route mailbox domain) ...) */
+	if (arg->type != IMAP_ARG_LIST)
+		return NULL;
+
+	if (arg->data.list->size == 0)
+		return "";
+
+	arg = arg->data.list->args;
+	if (arg->type != IMAP_ARG_LIST || arg->data.list->size != 4)
+		return NULL;
+
+	return t_strdup(arg->data.list->args[2].data.str);
+}
+
 static const char *
 imap_envelope_parse_arg(ImapArg *arg, ImapEnvelopeField field,
-			const char *envelope)
+			const char *envelope, ImapEnvelopeResult result)
 {
-	const char *value;
+	const char *value = NULL;
 
 	if (arg->type == IMAP_ARG_NIL)
 		return "";
 
-	if (field >= IMAP_ENVELOPE_FROM && field <= IMAP_ENVELOPE_BCC)
-		value = imap_envelope_parse_address(arg);
-	else if (arg->type == IMAP_ARG_STRING || arg->type == IMAP_ARG_ATOM)
-		value = t_strdup(arg->data.str);
-	else
-		value = NULL;
+	switch (result) {
+	case IMAP_ENVELOPE_RESULT_STRING:
+		if (field >= IMAP_ENVELOPE_FROM && field <= IMAP_ENVELOPE_BCC)
+			value = imap_envelope_parse_address(arg);
+		else if (arg->type == IMAP_ARG_STRING || arg->type == IMAP_ARG_ATOM)
+			value = t_strdup(arg->data.str);
+		break;
+	case IMAP_ENVELOPE_RESULT_FIRST_MAILBOX:
+		i_assert(field >= IMAP_ENVELOPE_FROM &&
+			 field <= IMAP_ENVELOPE_BCC);
+		value = imap_envelope_parse_first_mailbox(arg);
+		break;
+	}
 
 	if (value == NULL) {
 		i_error("Invalid field %u in IMAP envelope: %s",
@@ -234,7 +257,8 @@
 	return value;
 }
 
-const char *imap_envelope_parse(const char *envelope, ImapEnvelopeField field)
+const char *imap_envelope_parse(const char *envelope, ImapEnvelopeField field,
+				ImapEnvelopeResult result)
 {
 	IStream *input;
 	ImapParser *parser;
@@ -251,7 +275,8 @@
 	(void)i_stream_read(input);
 	ret = imap_parser_read_args(parser, field+1, 0, &args);
 	if (ret > (int)field) {
-		value = imap_envelope_parse_arg(&args[field], field, envelope);
+		value = imap_envelope_parse_arg(&args[field], field,
+						envelope, result);
 	} else {
 		i_error("Error parsing IMAP envelope: %s", envelope);
 		value = NULL;
--- a/src/lib-imap/imap-envelope.h	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-imap/imap-envelope.h	Tue Dec 17 06:28:40 2002 +0200
@@ -19,6 +19,11 @@
 	IMAP_ENVELOPE_FIELDS
 } ImapEnvelopeField;
 
+typedef enum {
+	IMAP_ENVELOPE_RESULT_STRING,
+	IMAP_ENVELOPE_RESULT_FIRST_MAILBOX
+} ImapEnvelopeResult;
+
 /* Update envelope data based from given header field */
 void imap_envelope_parse_header(Pool pool, MessagePartEnvelopeData **data,
 				const char *name,
@@ -32,6 +37,7 @@
 
 /* Parse envelope and return specified field unquoted, or NULL if error
    occured. NILs are returned as "". */
-const char *imap_envelope_parse(const char *envelope, ImapEnvelopeField field);
+const char *imap_envelope_parse(const char *envelope, ImapEnvelopeField field,
+				ImapEnvelopeResult result);
 
 #endif
--- a/src/lib-storage/index/index-search.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-storage/index/index-search.c	Tue Dec 17 06:28:40 2002 +0200
@@ -379,9 +379,10 @@
 
 	/* get field from hopefully cached envelope */
 	envelope = index->lookup_field(index, ctx->rec, DATA_FIELD_ENVELOPE);
-	if (envelope != NULL)
-		field = imap_envelope_parse(envelope, env_field);
-	else {
+	if (envelope != NULL) {
+		field = imap_envelope_parse(envelope, env_field,
+					    IMAP_ENVELOPE_RESULT_STRING);
+	} else {
 		index->cache_fields_later(index, DATA_FIELD_ENVELOPE);
 		field = NULL;
 	}
--- a/src/lib-storage/index/index-sort.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-storage/index/index-sort.c	Tue Dec 17 06:28:40 2002 +0200
@@ -47,7 +47,8 @@
 	return cache == NULL ? 0 : imap_msgcache_get_virtual_size(cache);
 }
 
-static const char *_input_str(MailSortType type, unsigned int id, void *context)
+static const char *_input_mailbox(MailSortType type, unsigned int id,
+				  void *context)
 {
 	IndexSortContext *ctx = context;
 	ImapEnvelopeField env_field;
@@ -57,15 +58,9 @@
 	case MAIL_SORT_CC:
 		env_field = IMAP_ENVELOPE_CC;
 		break;
-	case MAIL_SORT_DATE:
-                env_field = IMAP_ENVELOPE_DATE;
-		break;
 	case MAIL_SORT_FROM:
                 env_field = IMAP_ENVELOPE_FROM;
 		break;
-	case MAIL_SORT_SUBJECT:
-                env_field = IMAP_ENVELOPE_SUBJECT;
-		break;
 	case MAIL_SORT_TO:
                 env_field = IMAP_ENVELOPE_TO;
 		break;
@@ -78,7 +73,34 @@
 	envelope = imap_msgcache_get(search_open_cache(ctx, id),
 				     IMAP_CACHE_ENVELOPE);
 	return envelope == NULL ? NULL :
-		imap_envelope_parse(envelope, env_field);
+		imap_envelope_parse(envelope, env_field,
+				    IMAP_ENVELOPE_RESULT_FIRST_MAILBOX);
+}
+
+static const char *_input_str(MailSortType type, unsigned int id, void *context)
+{
+	IndexSortContext *ctx = context;
+	ImapEnvelopeField env_field;
+	const char *envelope;
+
+	switch (type) {
+	case MAIL_SORT_DATE:
+                env_field = IMAP_ENVELOPE_DATE;
+		break;
+	case MAIL_SORT_SUBJECT:
+                env_field = IMAP_ENVELOPE_SUBJECT;
+		break;
+	default:
+		i_unreached();
+		return NULL;
+	}
+
+	/* get field from hopefully cached envelope */
+	envelope = imap_msgcache_get(search_open_cache(ctx, id),
+				     IMAP_CACHE_ENVELOPE);
+	return envelope == NULL ? NULL :
+		imap_envelope_parse(envelope, env_field,
+				    IMAP_ENVELOPE_RESULT_STRING);
 }
 
 static time_t _input_time(MailSortType type, unsigned int id, void *context)
@@ -131,6 +153,7 @@
 MailSortFuncs index_sort_funcs = {
 	_input_time,
 	_input_uofft,
+	_input_mailbox,
 	_input_str,
 	_input_reset,
 	_output
--- a/src/lib-storage/mail-sort.c	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-storage/mail-sort.c	Tue Dec 17 06:28:40 2002 +0200
@@ -1,8 +1,11 @@
 /* Copyright (C) 2002 Timo Sirainen */
 
+/* Implementation of draft-ietf-imapext-sort-10 sorting algorithm */
+
 #include "lib.h"
 #include "buffer.h"
 #include "ostream.h"
+#include "imap-base-subject.h"
 #include "mail-sort.h"
 
 #include <stdlib.h>
@@ -15,6 +18,7 @@
 	void *func_context;
 
 	Buffer *sort_buffer;
+	Pool temp_pool;
 
 	time_t last_arrival, last_date;
 	uoff_t last_size;
@@ -104,6 +108,7 @@
 						 128 * sizeof(unsigned int),
 						 (size_t)-1);
 
+	ctx->temp_pool = pool_create("Sort", 8192, FALSE);
 	ctx->funcs = funcs;
 	ctx->func_context = context;
 	return ctx;
@@ -113,6 +118,7 @@
 {
 	mail_sort_flush(ctx);
 	buffer_free(ctx->sort_buffer);
+	pool_unref(ctx->temp_pool);
 
 	i_free(ctx->last_cc);
 	i_free(ctx->last_from);
@@ -122,24 +128,31 @@
 	i_free(ctx);
 }
 
-static int sort_strcmp(const char *s1, const char *s2)
+static int addr_strcmp(const char *s1, const char *s2)
 {
 	if (s1 == NULL)
 		return s2 == NULL ? 0 : -1;
 	if (s2 == NULL)
 		return 1;
 
-	return strcasecmp(s1, s2); /* FIXME */
+	/* FIXME: maybe create ascii_strcasecmp()? strcasecmp() may compare
+	   non-ASCII too if locale is set. We don't do that now though. */
+	return strcasecmp(s1, s2);
 }
 
-static int subject_cmp(const char *s1, const char *s2)
+static int subject_cmp(Pool pool, const char *s1, const char *s2)
 {
+	int ret;
+
 	if (s1 == NULL)
 		return s2 == NULL ? 0 : -1;
 	if (s2 == NULL)
 		return 1;
 
-	return strcasecmp(s1, s2); /* FIXME */
+	p_clear(pool);
+	ret = strcmp(imap_get_base_subject_cased(pool, s1),
+		     imap_get_base_subject_cased(pool, s2));
+	return ret;
 }
 
 static void mail_sort_check_flush(MailSortContext *ctx, unsigned int id)
@@ -161,7 +174,7 @@
 	if (ctx->common_mask & MAIL_SORT_CC) {
 		str = ctx->funcs.input_str(MAIL_SORT_CC, id,
 					   ctx->func_context);
-		if (sort_strcmp(str, ctx->last_cc) != 0) {
+		if (addr_strcmp(str, ctx->last_cc) != 0) {
 			i_free(ctx->last_cc);
 			ctx->last_cc = i_strdup(str);
 			changed = TRUE;
@@ -180,7 +193,7 @@
 	if (ctx->common_mask & MAIL_SORT_FROM) {
 		str = ctx->funcs.input_str(MAIL_SORT_FROM, id,
 					   ctx->func_context);
-		if (sort_strcmp(str, ctx->last_from) != 0) {
+		if (addr_strcmp(str, ctx->last_from) != 0) {
 			i_free(ctx->last_from);
 			ctx->last_from = i_strdup(str);
 			changed = TRUE;
@@ -199,7 +212,7 @@
 	if (ctx->common_mask & MAIL_SORT_SUBJECT) {
 		str = ctx->funcs.input_str(MAIL_SORT_SUBJECT, id,
 					   ctx->func_context);
-		if (subject_cmp(str, ctx->last_subject) != 0) {
+		if (subject_cmp(ctx->temp_pool, str, ctx->last_subject) != 0) {
 			i_free(ctx->last_subject);
 			ctx->last_subject = i_strdup(str);
 			changed = TRUE;
@@ -209,7 +222,7 @@
 	if (ctx->common_mask & MAIL_SORT_TO) {
 		str = ctx->funcs.input_str(MAIL_SORT_TO, id,
 					   ctx->func_context);
-		if (sort_strcmp(str, ctx->last_to) != 0) {
+		if (addr_strcmp(str, ctx->last_to) != 0) {
 			i_free(ctx->last_to);
 			ctx->last_to = i_strdup(str);
 			changed = TRUE;
@@ -268,13 +281,17 @@
 		}
 		case MAIL_SORT_CC:
 		case MAIL_SORT_FROM:
-		case MAIL_SORT_TO:
-			ret = sort_strcmp(funcs->input_str(*output, *i1, ctx),
-					  funcs->input_str(*output, *i2, ctx));
+		case MAIL_SORT_TO: {
+			const char *a1, *a2;
+
+			a1 = funcs->input_mailbox(*output, *i1, ctx);
+			a2 = funcs->input_mailbox(*output, *i2, ctx);
+			ret = addr_strcmp(a1, a2);
 			break;
-
+		}
 		case MAIL_SORT_SUBJECT:
-			ret = subject_cmp(funcs->input_str(*output, *i1, ctx),
+			ret = subject_cmp(mail_sort_qsort_context->temp_pool,
+					  funcs->input_str(*output, *i1, ctx),
 					  funcs->input_str(*output, *i2, ctx));
 			break;
 		default:
--- a/src/lib-storage/mail-sort.h	Tue Dec 17 05:00:44 2002 +0200
+++ b/src/lib-storage/mail-sort.h	Tue Dec 17 06:28:40 2002 +0200
@@ -23,14 +23,23 @@
 typedef struct _MailSortContext MailSortContext;
 
 typedef struct {
+	/* arrival, date */
 	time_t (*input_time)(MailSortType type, unsigned int id,
 			     void *context);
+	/* size */
 	uoff_t (*input_uofft)(MailSortType type, unsigned int id,
 			      void *context);
+	/* cc, from, to. Return the mailbox of the first address. */
+	const char *(*input_mailbox)(MailSortType type, unsigned int id,
+				     void *context);
+	/* subject */
 	const char *(*input_str)(MailSortType type, unsigned int id,
 				 void *context);
+
+	/* done parsing this message, free all resources */
 	void (*input_reset)(void *context);
 
+	/* result callback */
 	void (*output)(unsigned int *data, size_t count, void *context);
 } MailSortFuncs;