diff src/lib-index/mbox/mbox-append.c @ 22:a946ce1f09b7 HEAD

mbox fixes, not fully working yet but almost :)
author Timo Sirainen <tss@iki.fi>
date Sat, 24 Aug 2002 05:04:45 +0300
parents 82b7de533f98
children 55e09f36d23d
line wrap: on
line diff
--- a/src/lib-index/mbox/mbox-append.c	Fri Aug 23 19:21:36 2002 +0300
+++ b/src/lib-index/mbox/mbox-append.c	Sat Aug 24 05:04:45 2002 +0300
@@ -1,21 +1,13 @@
 /* Copyright (C) 2002 Timo Sirainen */
 
 #include "lib.h"
-#include "mmap-util.h"
 #include "ioloop.h"
+#include "iobuffer.h"
+#include "hex-binary.h"
+#include "md5.h"
 #include "mbox-index.h"
 #include "mail-index-util.h"
 
-#include <time.h>
-#include <ctype.h>
-#include <unistd.h>
-#include <sys/mman.h>
-
-static const char *months[] = {
-	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
-	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
-};
-
 static MailIndexRecord *
 mail_index_record_append(MailIndex *index, time_t internal_date,
 			 size_t full_virtual_size)
@@ -33,123 +25,102 @@
 	return rec;
 }
 
-static time_t from_line_parse_date(const char *msg, size_t size)
+static void mbox_read_message(IOBuffer *inbuf, unsigned int *virtual_size)
 {
-	const char *msg_end;
-	struct tm tm;
-	int i;
-
-	/* From <sender> <date> <moreinfo> */
-	if (strncmp(msg, "From ", 5) != 0)
-		return 0;
-
-	msg_end = msg + size;
+	unsigned char *msg;
+	unsigned int i, size, startpos, vsize;
 
-	/* skip sender */
-	msg += 5;
-	while (*msg != ' ' && msg < msg_end) msg++;
-	while (*msg == ' ' && msg < msg_end) msg++;
-
-	/* next 24 chars are the date in asctime() format,
-	   eg. "Thu Nov 29 22:33:52 2001" */
-	if (msg+24 > msg_end)
-		return 0;
+	/* read until "[\r]\nFrom " is found */
+	startpos = 0; vsize = 0;
+	while (io_buffer_read_data(inbuf, &msg, &size, startpos) >= 0) {
+		for (i = startpos; i < size; i++) {
+			if (msg[i] == '\n') {
+				if (i == 0 || msg[i-1] != '\r') {
+					/* missing CR */
+					vsize++;
+				}
+			} else if (msg[i] == ' ' && i >= 5) {
+				/* See if it's space after "From" */
+				if (msg[i-5] == '\n' && msg[i-4] == 'F' &&
+				    msg[i-3] == 'r' && msg[i-2] == 'o' &&
+				    msg[i-1] == 'm') {
+					/* yes, see if we had \r too */
+					i -= 5;
+					if (i > 0 && msg[i-1] == '\r')
+						i--;
+					else
+						vsize--;
+					break;
+				}
+			}
+		}
 
-	memset(&tm, 0, sizeof(tm));
-
-	/* skip weekday */
-	msg += 4;
+		if (i < size) {
+			startpos = i;
+			break;
+		}
 
-	/* month */
-	for (i = 0; i < 12; i++) {
-		if (strncasecmp(months[i], msg, 3) == 0) {
-			tm.tm_mon = i;
-			break;
+		if (i > 0) {
+			startpos = i < 7 ? i : 7;
+			i -= startpos;
+
+			io_buffer_skip(inbuf, i);
+			vsize += i;
 		}
 	}
 
-	if (i == 12 || msg[3] != ' ')
-		return 0;
-	msg += 4;
-
-	/* day */
-	if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ')
-		return 0;
-	tm.tm_mday = (msg[0]-'0') * 10 + (msg[1]-'0');
-	msg += 3;
-
-	/* hour */
-	if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':')
-		return 0;
-	tm.tm_hour = (msg[0]-'0') * 10 + (msg[1]-'0');
-	msg += 3;
+	io_buffer_skip(inbuf, startpos);
+	vsize += startpos;
 
-	/* minute */
-	if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':')
-		return 0;
-	tm.tm_min = (msg[0]-'0') * 10 + (msg[1]-'0');
-	msg += 3;
-
-	/* second */
-	if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ')
-		return 0;
-	tm.tm_sec = (msg[0]-'0') * 10 + (msg[1]-'0');
-	msg += 3;
-
-	/* year */
-	if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) ||
-	    !i_isdigit(msg[2]) || !i_isdigit(msg[3]))
-		return 0;
-	tm.tm_year = (msg[0]-'0') * 1000 + (msg[1]-'0') * 100 +
-		(msg[2]-'0') * 10 + (msg[3]-'0') - 1900;
-
-	tm.tm_isdst = -1;
-	return mktime(&tm);
+	*virtual_size = vsize;
 }
 
-static void header_func(MessagePart *part __attr_unused__,
-			const char *name, unsigned int name_len,
-			const char *value, unsigned int value_len,
-			void *context)
-{
-	MailIndexRecord *rec = context;
-
-	rec->msg_flags |= mbox_header_get_flags(name, name_len,
-						value, value_len);
-}
-
-static int mbox_index_append_data(MailIndex *index, const char *msg,
-				  off_t offset, size_t physical_size,
-				  size_t virtual_size)
+static int mbox_index_append_next(MailIndex *index, IOBuffer *inbuf)
 {
 	MailIndexRecord *rec;
 	MailIndexUpdate *update;
+        MboxHeaderContext ctx;
 	time_t internal_date;
-	char location[MAX_INT_STRLEN];
-	unsigned int i;
+	off_t start_offset, stop_offset, old_size;
+	unsigned char *data, md5_digest[16];
+	unsigned int size, pos, virtual_size;
+	const char *location;
+
+	/* get the From-line */
+	pos = 0;
+	while (io_buffer_read_data(inbuf, &data, &size, pos) >= 0) {
+		for (; pos < size; pos++) {
+			if (data[pos] == '\n')
+				break;
+		}
 
-	internal_date = from_line_parse_date(msg, physical_size);
+		if (pos < size)
+			break;
+	}
+
+	if (pos == size || size <= 5 || strncmp(data, "From ", 5) != 0) {
+		/* a) no \n found, or line too long
+		   b) not a From-line */
+		index_set_error(index, "Error indexing mbox file %s: "
+				"From-line not found where expected",
+				index->mbox_path);
+		index->set_flags |= MAIL_INDEX_FLAG_FSCK;
+		return FALSE;
+	}
+
+	/* parse the From-line */
+	internal_date = mbox_from_parse_date(data, size);
 	if (internal_date <= 0)
 		internal_date = ioloop_time;
 
-	/* skip the From-line */
-	for (i = 0; i < physical_size; i++) {
-		if (msg[i] == '\n') {
-			i++;
-			break;
-		}
-	}
+	io_buffer_skip(inbuf, pos+1);
+	start_offset = inbuf->offset;
 
-	if (i == physical_size)
-		return FALSE;
+	/* now, find the ending "[\r]\nFrom " */
+	mbox_read_message(inbuf, &virtual_size);
+	stop_offset = inbuf->offset;
 
-	msg += i;
-	offset += i;
-	physical_size -= i;
-	virtual_size -= i;
-	if (i > 0 && msg[i-1] != '\r')
-		virtual_size--;
-
+	/* add message to index */
 	rec = mail_index_record_append(index, internal_date, virtual_size);
 	if (rec == NULL)
 		return FALSE;
@@ -157,11 +128,32 @@
 	update = index->update_begin(index, rec);
 
 	/* location = offset to beginning of message */
-	i_snprintf(location, sizeof(location), "%lu", (unsigned long) offset);
+	location = binary_to_hex((unsigned char *) &start_offset,
+				 sizeof(start_offset));
 	index->update_field(update, FIELD_TYPE_LOCATION, location, 0);
 
-	/* parse the header and cache wanted fields */
-	mail_index_update_headers(update, msg, physical_size, header_func, rec);
+	/* parse the header and cache wanted fields. get the message flags
+	   from Status and X-Status fields. temporarily limit the buffer size
+	   so the message body is parsed properly (FIXME: does this have
+	   side effects?) */
+	mbox_header_init_context(&ctx);
+
+        old_size = inbuf->size;
+	inbuf->size = stop_offset;
+	io_buffer_seek(inbuf, start_offset);
+
+	mail_index_update_headers(update, inbuf, mbox_header_func, &ctx);
+
+	inbuf->size = old_size;
+	io_buffer_seek(inbuf, stop_offset);
+
+	/* save message flags */
+	rec->msg_flags |= ctx.flags;
+
+	/* save MD5 */
+	md5_final(&ctx.md5, md5_digest);
+	index->update_field(update, FIELD_TYPE_MD5,
+                            binary_to_hex(md5_digest, sizeof(md5_digest)), 0);
 
 	if (!index->update_end(update)) {
 		/* failed - delete the record */
@@ -172,79 +164,9 @@
 	return TRUE;
 }
 
-int mbox_index_append_mmaped(MailIndex *index, const char *data,
-			     size_t data_size, off_t start_offset)
+int mbox_index_append(MailIndex *index, IOBuffer *inbuf)
 {
-	const char *data_start, *data_end, *start, *cr;
-	size_t size, vsize;
-	off_t pos;
-	int missing_cr_count;
-
-	/* we should start with "From ". if we don't, something's messed up
-	   and we should check the whole file instead. */
-	if (strncmp(data, "From ", 5) != 0) {
-		index->set_flags |= MAIL_INDEX_FLAG_FSCK;
-		return FALSE;
-	}
-
-	/* each message ends at "\nFrom ". first get the size of the message,
-	   then parse it. calculate the missing CR count as well. */
-	start = data; cr = NULL; missing_cr_count = 0;
-
-	data_start = data;
-	data_end = data + data_size;
-	for (; data != data_end; data++) {
-		if (*data == '\r')
-			cr = data;
-		else if (*data == '\n') {
-			if (cr != data-1)
-				missing_cr_count++;
-
-			if (data+6 < data_end && data[1] == 'F' &&
-			    data[2] == 'r' && data[3] == 'o' &&
-			    data[4] == 'm' && data[5] == ' ') {
-				/* end of message */
-				pos = (off_t) (start - data_start) +
-					start_offset;
-				size = (size_t) (data - start) + 1;
-				vsize = size + missing_cr_count;
-				if (!mbox_index_append_data(index, start, pos,
-							    size, vsize))
-					return FALSE;
-
-				missing_cr_count = 0;
-				start = data+1;
-			}
-		}
-	}
-
-	/* last message */
-	pos = (off_t) (start - data_start);
-	size = (size_t) (data - start);
-	vsize = size + missing_cr_count;
-	return mbox_index_append_data(index, start, pos, size, vsize);
-}
-
-int mbox_index_append(MailIndex *index, int fd, const char *path)
-{
-	void *mmap_base;
-	size_t mmap_length;
-	off_t pos, end_pos;
-	int ret;
-
-	/* get our current position */
-	pos = lseek(fd, 0, SEEK_CUR);
-
-	/* get the size of the file */
-	end_pos = lseek(fd, 0, SEEK_END);
-
-	if (pos == -1 || end_pos == -1) {
-		index_set_error(index, "lseek() failed with mbox file %s: %m",
-				path);
-		return FALSE;
-	}
-
-	if (pos == end_pos) {
+	if (inbuf->offset == inbuf->size) {
 		/* no new data */
 		return TRUE;
 	}
@@ -252,18 +174,27 @@
 	if (!index->set_lock(index, MAIL_LOCK_EXCLUSIVE))
 		return FALSE;
 
-	/* mmap() the file */
-	mmap_length = end_pos-pos;
-	mmap_base = mmap(NULL, mmap_length, PROT_READ, MAP_SHARED, fd, pos);
-	if (mmap_base == MAP_FAILED) {
-		index_set_error(index, "mmap() failed with mbox file %s: %m",
-				path);
-		return FALSE;
+	for (;;) {
+		if (inbuf->offset != 0) {
+			/* we're at the [\r]\n before the From-line,
+			   skip it */
+			if (!mbox_skip_crlf(inbuf)) {
+				index_set_error(index,
+						"Error indexing mbox file %s: "
+						"LF not found where expected",
+						index->mbox_path);
+
+				index->set_flags |= MAIL_INDEX_FLAG_FSCK;
+				return FALSE;
+			}
+		}
+
+		if (inbuf->offset == inbuf->size)
+			break;
+
+		if (!mbox_index_append_next(index, inbuf))
+			return FALSE;
 	}
 
-	(void)madvise(mmap_base, mmap_length, MADV_SEQUENTIAL);
-
-	ret = mbox_index_append_mmaped(index, mmap_base, mmap_length, pos);
-	(void)munmap(mmap_base, mmap_length);
-	return ret;
+	return TRUE;
 }