diff src/lib-storage/index/index-search.c @ 0:3b1985cbc908 HEAD

Initial revision
author Timo Sirainen <tss@iki.fi>
date Fri, 09 Aug 2002 12:15:38 +0300
parents
children 1b34ec11fff8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/index/index-search.c	Fri Aug 09 12:15:38 2002 +0300
@@ -0,0 +1,635 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+#include "iobuffer.h"
+#include "mmap-util.h"
+#include "rfc822-date.h"
+#include "rfc822-tokenize.h"
+#include "index-storage.h"
+#include "mail-search.h"
+
+#include <stdlib.h>
+#include <ctype.h>
+
+#define ARG_SET_RESULT(arg, res) \
+	STMT_START { \
+		(arg)->result = !(arg)->not ? (res) : -(res); \
+	} STMT_END
+
+typedef struct {
+	IndexMailbox *ibox;
+	MailIndexRecord *rec;
+	unsigned int seq;
+} SearchIndexData;
+
+typedef struct {
+	MailSearchArg *args;
+	int custom_header;
+
+	const char *name, *value;
+	unsigned int name_len, value_len;
+} SearchHeaderData;
+
+typedef struct {
+	MailSearchArg *args;
+	const char *msg;
+	size_t size;
+	int last_block;
+} SearchTextData;
+
+/* truncate timestamp to day */
+static time_t timestamp_trunc(time_t t)
+{
+	struct tm *tm;
+
+	tm = localtime(&t);
+	tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+
+	return mktime(tm);
+}
+
+static int msgset_contains(const char *set, unsigned int match_num,
+			   unsigned int max_num)
+{
+	unsigned int num, num2;
+
+	while (*set != '\0') {
+		if (*set == '*') {
+			set++;
+			num = max_num;
+		} else {
+			num = 0;
+			while (*set >= '0' && *set <= '9') {
+				num = num*10 + (*set-'0');
+				set++;
+			}
+		}
+
+		if (*set == ',' || *set == '\0') {
+			if (num == match_num)
+				return TRUE;
+			if (*set == '\0')
+				return FALSE;
+		} else if (*set == ':') {
+			if (*set == '*') {
+				set++;
+
+				if (match_num >= num && num <= max_num)
+					return TRUE;
+			} else {
+				num2 = 0;
+				while (*set >= '0' && *set <= '9') {
+					num2 = num2*10 + (*set-'0');
+					set++;
+				}
+
+				if (match_num >= num && match_num <= num2)
+					return TRUE;
+			}
+
+			if (*set != ',')
+				return FALSE;
+		}
+
+		set++;
+	}
+
+	return FALSE;
+}
+
+/* Returns >0 = matched, 0 = not matched, -1 = unknown */
+static int search_arg_match_index(IndexMailbox *ibox, MailIndexRecord *rec,
+				  unsigned int seq, MailSearchArgType type,
+				  const char *value)
+{
+	time_t t;
+
+	switch (type) {
+	case SEARCH_ALL:
+		return TRUE;
+	case SEARCH_SET:
+		return msgset_contains(value, seq, ibox->synced_messages_count);
+	case SEARCH_UID:
+		return msgset_contains(value, rec->uid,
+				       ibox->synced_messages_count);
+
+	/* flags */
+	case SEARCH_ANSWERED:
+		return rec->msg_flags & MAIL_ANSWERED;
+	case SEARCH_DELETED:
+		return rec->msg_flags & MAIL_DELETED;
+	case SEARCH_DRAFT:
+		return rec->msg_flags & MAIL_DRAFT;
+	case SEARCH_FLAGGED:
+		return rec->msg_flags & MAIL_FLAGGED;
+	case SEARCH_SEEN:
+		return rec->msg_flags & MAIL_SEEN;
+	case SEARCH_RECENT:
+		return rec->uid >= ibox->index->first_recent_uid;
+	case SEARCH_KEYWORD:
+		return FALSE;
+
+	/* dates */
+	case SEARCH_BEFORE:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		return rec->internal_date < timestamp_trunc(t);
+	case SEARCH_ON:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		t = timestamp_trunc(t);
+		return rec->internal_date >= t &&
+			rec->internal_date < t + 3600*24;
+	case SEARCH_SINCE:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		return rec->internal_date >= timestamp_trunc(t);
+
+	case SEARCH_SENTBEFORE:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		return rec->sent_date < timestamp_trunc(t);
+	case SEARCH_SENTON:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		t = timestamp_trunc(t);
+		return rec->sent_date >= t &&
+			rec->sent_date < t + 3600*24;
+	case SEARCH_SENTSINCE:
+		if (!rfc822_parse_date(value, &t))
+			return FALSE;
+		return rec->sent_date >= timestamp_trunc(t);
+
+	/* sizes */
+	case SEARCH_SMALLER:
+		return rec->full_virtual_size < strtoul(value, NULL, 10);
+	case SEARCH_LARGER:
+		return rec->full_virtual_size > strtoul(value, NULL, 10);
+
+	default:
+		return -1;
+	}
+}
+
+static void search_index_arg(MailSearchArg *arg, void *user_data)
+{
+	SearchIndexData *data = user_data;
+
+	switch (search_arg_match_index(data->ibox, data->rec, data->seq,
+				       arg->type, arg->value.str)) {
+	case -1:
+		/* unknown */
+		break;
+	case 0:
+		ARG_SET_RESULT(arg, -1);
+		break;
+	default:
+		ARG_SET_RESULT(arg, 1);
+		break;
+	}
+}
+
+static int match_field(MailIndex *index, MailIndexRecord *rec,
+		       MailField field, const char *value)
+{
+	const char *field_value;
+	unsigned int i, value_len;
+
+	field_value = index->lookup_field(index, rec, field);
+	if (field_value == NULL)
+		return -1;
+
+	/* note: value is already uppercased */
+	value_len = strlen(value);
+	for (i = 0; field_value[i] != '\0'; i++) {
+		if (value[0] == i_toupper(field_value[i]) &&
+		    strncasecmp(value, field_value+i, value_len) == 0)
+			return 1;
+	}
+
+	return 0;
+}
+
+/* Returns >0 = matched, 0 = not matched, -1 = unknown */
+static int search_arg_match_cached(MailIndex *index, MailIndexRecord *rec,
+				   MailSearchArgType type, const char *value)
+{
+	switch (type) {
+	case SEARCH_FROM:
+		return match_field(index, rec, FIELD_TYPE_FROM, value);
+	case SEARCH_TO:
+		return match_field(index, rec, FIELD_TYPE_TO, value);
+	case SEARCH_CC:
+		return match_field(index, rec, FIELD_TYPE_CC, value);
+	case SEARCH_BCC:
+		return match_field(index, rec, FIELD_TYPE_BCC, value);
+	case SEARCH_SUBJECT:
+		return match_field(index, rec, FIELD_TYPE_SUBJECT, value);
+	default:
+		return -1;
+	}
+}
+
+static void search_cached_arg(MailSearchArg *arg, void *user_data)
+{
+	SearchIndexData *data = user_data;
+
+	switch (search_arg_match_cached(data->ibox->index, data->rec,
+					arg->type, arg->value.str)) {
+	case -1:
+		/* unknown */
+		break;
+	case 0:
+		ARG_SET_RESULT(arg, -1);
+		break;
+	default:
+		ARG_SET_RESULT(arg, 1);
+		break;
+	}
+}
+
+/* needle must be uppercased */
+static int header_value_match(const char *haystack, unsigned int haystack_len,
+			      const char *needle)
+{
+	const char *n;
+	unsigned int i, j, needle_len, max;
+
+	if (*needle == '\0')
+		return TRUE;
+
+	needle_len = strlen(needle);
+	if (haystack_len < needle_len)
+		return FALSE;
+
+	max = haystack_len - needle_len;
+	for (i = 0; i <= max; i++) {
+		if (needle[0] != i_toupper(haystack[i]))
+			continue;
+
+		for (j = i, n = needle; j < haystack_len; j++) {
+			if (haystack[j] == '\r') {
+				if (j+1 != haystack_len)
+					j++;
+			}
+
+			if (haystack[j] == '\n' && j+1 < haystack_len &&
+			    IS_LWSP(haystack[j+1])) {
+				/* long header continuation */
+				j++;
+			}
+
+			if (*n++ != i_toupper(haystack[j]))
+				break;
+
+			if (*n == '\0')
+				return 1;
+		}
+	}
+
+	return -1;
+}
+
+static void search_header_arg(MailSearchArg *arg, void *user_data)
+{
+	SearchHeaderData *data = user_data;
+	const char *value;
+	unsigned int len;
+	int ret;
+
+	/* first check that the field name matches to argument. */
+	switch (arg->type) {
+	case SEARCH_FROM:
+		if (data->name_len != 4 ||
+		    strncasecmp(data->name, "From", 4) != 0)
+			return;
+		value = arg->value.str;
+		break;
+	case SEARCH_TO:
+		if (data->name_len != 2 ||
+		    strncasecmp(data->name, "To", 2) != 0)
+			return;
+		value = arg->value.str;
+		break;
+	case SEARCH_CC:
+		if (data->name_len != 2 ||
+		    strncasecmp(data->name, "Cc", 2) != 0)
+			return;
+		value = arg->value.str;
+		break;
+	case SEARCH_BCC:
+		if (data->name_len != 3 ||
+		    strncasecmp(data->name, "Bcc", 3) != 0)
+			return;
+		value = arg->value.str;
+		break;
+	case SEARCH_SUBJECT:
+		if (data->name_len != 7 ||
+		    strncasecmp(data->name, "Subject", 7) != 0)
+			return;
+		value = arg->value.str;
+		break;
+	case SEARCH_HEADER:
+		data->custom_header = TRUE;
+
+		len = strlen(arg->value.str);
+		if (data->name_len != len ||
+		    strncasecmp(data->name, arg->value.str, len) != 0)
+			return;
+
+		value = arg->hdr_value;
+	default:
+		return;
+	}
+
+	/* then check if the value matches */
+	ret = header_value_match(data->value, data->value_len, value);
+        ARG_SET_RESULT(arg, ret);
+}
+
+static void search_header(MessagePart *part __attr_unused__,
+			  const char *name, unsigned int name_len,
+			  const char *value, unsigned int value_len,
+			  void *user_data)
+{
+	SearchHeaderData *data = user_data;
+
+	if (data->custom_header ||
+	    (name_len == 4 && strncasecmp(name, "From", 4) == 0) ||
+	    (name_len == 2 && strncasecmp(name, "To", 2) == 0) ||
+	    (name_len == 2 && strncasecmp(name, "Cc", 2) == 0) ||
+	    (name_len == 3 && strncasecmp(name, "Bcc", 3) == 0) ||
+	    (name_len == 7 && strncasecmp(name, "Subject", 7) == 0)) {
+		data->name = name;
+		data->value = value;
+		data->name_len = name_len;
+		data->value_len = value_len;
+
+		data->custom_header = FALSE;
+		mail_search_args_foreach(data->args, search_header_arg, data);
+	}
+}
+
+static void search_text(MailSearchArg *arg, SearchTextData *data)
+{
+	const char *p;
+	unsigned int i, len, max;
+
+	if (arg->result != 0)
+		return;
+
+	len = strlen(arg->value.str);
+	max = data->size-len;
+	for (i = 0, p = data->msg; i <= max; i++, p++) {
+		if (i_toupper(*p) == arg->value.str[0] &&
+		    strncasecmp(p, arg->value.str, len) == 0) {
+			/* match */
+			ARG_SET_RESULT(arg, 1);
+			return;
+		}
+	}
+
+	if (data->last_block)
+		ARG_SET_RESULT(arg, -1);
+}
+
+static void search_text_header(MailSearchArg *arg, void *user_data)
+{
+	SearchTextData *data = user_data;
+
+	if (arg->type == SEARCH_TEXT)
+		search_text(arg, data);
+}
+
+static void search_text_body(MailSearchArg *arg, void *user_data)
+{
+	SearchTextData *data = user_data;
+
+	if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY)
+		search_text(arg, data);
+}
+
+static int search_arg_match_text(IndexMailbox *ibox, MailIndexRecord *rec,
+				 MailSearchArg *args)
+{
+	const char *msg;
+	void *mmap_base;
+	off_t offset;
+	size_t size, mmap_length;
+	int fd, failed;
+	int have_headers, have_body, have_text;
+
+	/* first check what we need to use */
+	mail_search_args_analyze(args, &have_headers, &have_body, &have_text);
+	if (!have_headers && !have_body && !have_text)
+		return TRUE;
+
+	fd = ibox->index->open_mail(ibox->index, rec, &offset, &size);
+	if (fd == -1)
+		return FALSE;
+
+	mmap_base = mmap_aligned(fd, PROT_READ, offset, size,
+				 (void **) &msg, &mmap_length);
+	if (mmap_base == MAP_FAILED) {
+		failed = TRUE;
+		mail_storage_set_critical(ibox->box.storage, "mmap() failed "
+					  "for msg %u: %m", rec->uid);
+	} else {
+		failed = FALSE;
+		(void)madvise(mmap_base, mmap_length, MADV_SEQUENTIAL);
+
+		if (have_headers) {
+			SearchHeaderData data;
+
+			memset(&data, 0, sizeof(data));
+
+			/* header checks */
+			data.custom_header = TRUE;
+			data.args = args;
+			message_parse_header(NULL, msg, size, NULL,
+					     search_header, &data);
+		}
+
+		if (have_text) {
+			/* first search text from header*/
+			SearchTextData data;
+
+			data.args = args;
+			data.msg = msg;
+			data.size = rec->header_size;
+			data.last_block = FALSE;
+
+			mail_search_args_foreach(args, search_text_header,
+						 &data);
+		}
+
+		if (have_text || have_body) {
+			/* search text from body */
+			SearchTextData data;
+
+			/* FIXME: we should check this in blocks, so the whole
+			   message doesn't need to be in memory */
+			data.args = args;
+			data.msg = msg + rec->header_size;
+			data.size = size - rec->header_size;
+			data.last_block = TRUE;
+
+			mail_search_args_foreach(args, search_text_body, &data);
+		}
+	}
+
+	(void)munmap(mmap_base, mmap_length);
+	(void)close(fd);
+	return !failed;
+}
+
+static void seq_update(const char *set, unsigned int *first_seq,
+		       unsigned int *last_seq, unsigned int max_value)
+{
+	unsigned int seq;
+
+	while (*set != '\0') {
+		if (*set == '*') {
+			seq = max_value;
+			set++;
+		} else {
+			seq = 0;
+			while (*set >= '0' && *set <= '9') {
+				seq = seq*10 + (*set-'0');
+				set++;
+			}
+		}
+
+		if (seq != 0) {
+			if (*first_seq == 0 || seq < *first_seq)
+				*first_seq = seq;
+			if (*last_seq == 0 || seq > *last_seq)
+				*last_seq = seq;
+		}
+
+		seq++;
+	}
+}
+
+static void search_get_sequid(IndexMailbox *ibox, MailSearchArg *args,
+			      unsigned int *first_seq, unsigned int *last_seq,
+			      unsigned int *first_uid, unsigned int *last_uid)
+{
+	for (; args != NULL; args = args->next) {
+		if (args->type == SEARCH_OR || args->type == SEARCH_SUB) {
+			search_get_sequid(ibox, args->value.subargs,
+					  first_seq, last_seq,
+					  first_uid, last_uid);
+		} if (args->type == SEARCH_SET) {
+			seq_update(args->value.str, first_seq, last_seq,
+				   ibox->synced_messages_count);
+		} else if (args->type == SEARCH_UID) {
+			seq_update(args->value.str, first_uid, last_uid,
+				   ibox->index->header->next_uid-1);
+		} else if (args->type == SEARCH_ALL) {
+			/* go through everything */
+			*first_seq = 1;
+			*last_seq = ibox->synced_messages_count;
+			return;
+		}
+	}
+}
+
+static void search_get_sequences(IndexMailbox *ibox, MailSearchArg *args,
+				 unsigned int *first_seq,
+				 unsigned int *last_seq)
+{
+	MailIndexRecord *rec;
+	unsigned int seq, first_uid, last_uid;
+
+	*first_seq = *last_seq = 0;
+	first_uid = last_uid = 0;
+
+	search_get_sequid(ibox, args, first_seq, last_seq,
+			  &first_uid, &last_uid);
+
+	if (first_uid != 0 && (*first_seq != 1 ||
+			       *last_seq != ibox->synced_messages_count)) {
+		/* UIDs were used - see if they affect the sequences */
+		rec = ibox->index->lookup_uid_range(ibox->index,
+						    first_uid, last_uid);
+		if (rec != NULL) {
+			/* update lower UID */
+			seq = ibox->index->get_sequence(ibox->index, rec);
+			if (seq < *first_seq)
+				*first_seq = seq;
+
+			/* update higher UID .. except we don't really
+			   know it and it'd be uselessly slow to find it.
+			   use a kludgy method which might limit the
+			   sequences. */
+			seq += last_uid-first_uid;
+			if (seq >= ibox->synced_messages_count)
+				seq = ibox->synced_messages_count;
+
+			if (seq > *last_seq)
+				*last_seq = seq;
+		}
+	}
+
+	if (*first_seq == 0)
+		*first_seq = 1;
+	if (*last_seq == 0)
+		*last_seq = ibox->synced_messages_count;
+}
+
+static void search_messages(IndexMailbox *ibox, MailSearchArg *args,
+			    IOBuffer *outbuf, int uid_result)
+{
+	SearchIndexData data;
+	MailIndexRecord *rec;
+	unsigned int first_seq, last_seq, seq;
+	char num[MAX_INT_STRLEN+10];
+
+	/* see if we can limit the records we look at */
+	search_get_sequences(ibox, args, &first_seq, &last_seq);
+
+	data.ibox = ibox;
+
+	rec = ibox->index->lookup(ibox->index, first_seq);
+	for (seq = first_seq; rec != NULL && seq <= last_seq; seq++) {
+		data.rec = rec;
+		data.seq = seq;
+
+		mail_search_args_reset(args);
+
+		mail_search_args_foreach(args, search_index_arg, &data);
+		mail_search_args_foreach(args, search_cached_arg, &data);
+
+		if (search_arg_match_text(ibox, rec, args) &&
+		    args->result == 1) {
+			i_snprintf(num, sizeof(num), " %u",
+				   uid_result ? rec->uid : seq);
+			io_buffer_send(outbuf, num, strlen(num));
+		}
+		rec = ibox->index->next(ibox->index, rec);
+	}
+}
+
+int index_storage_search(Mailbox *box, MailSearchArg *args,
+			 IOBuffer *outbuf, int uid_result)
+{
+	IndexMailbox *ibox = (IndexMailbox *) box;
+	int failed;
+
+	if (!ibox->index->set_lock(ibox->index, MAIL_LOCK_SHARED))
+		failed = TRUE;
+	else {
+		io_buffer_send(outbuf, "* SEARCH", 8);
+
+		search_messages(ibox, args, outbuf, uid_result);
+		failed = !ibox->index->set_lock(ibox->index,
+						MAIL_LOCK_UNLOCK);
+		io_buffer_send(outbuf, "\r\n", 2);
+	}
+
+	if (failed)
+		(void)mail_storage_set_index_error(ibox);
+
+	return !failed;
+}