# HG changeset patch
# User Timo Sirainen <tss@iki.fi>
# Date 1225566721 -7200
# Node ID 81c0fe5bd635995615a822f4a5bb481a15b2671a
# Parent  59fc12b2b08ab6e6e28b4571bbf3a155590b3afe
Added code for encoding and decoding IMAP's modified-UTF7 strings.

diff -r 59fc12b2b08a -r 81c0fe5bd635 src/lib-imap/Makefile.am
--- a/src/lib-imap/Makefile.am	Sat Nov 01 21:11:36 2008 +0200
+++ b/src/lib-imap/Makefile.am	Sat Nov 01 21:12:01 2008 +0200
@@ -15,6 +15,7 @@
 	imap-parser.c \
 	imap-quote.c \
 	imap-seqset.c \
+	imap-utf7.c \
 	imap-util.c
 
 headers = \
@@ -27,6 +28,7 @@
 	imap-parser.h \
 	imap-quote.h \
 	imap-seqset.h \
+	imap-utf7.h \
 	imap-util.h
 
 if INSTALL_HEADERS
diff -r 59fc12b2b08a -r 81c0fe5bd635 src/lib-imap/imap-utf7.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-imap/imap-utf7.c	Sat Nov 01 21:12:01 2008 +0200
@@ -0,0 +1,249 @@
+/* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "unichar.h"
+#include "imap-utf7.h"
+
+static const char imap_b64enc[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+#define XX 0xff
+static const unsigned char imap_b64dec[256] = {
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX,
+	52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX,
+	XX, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+	15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX,
+	XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+	41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX,
+	XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX
+};
+
+static void
+mbase64_encode(string_t *dest, const unsigned char *in, unsigned int len)
+{
+	str_append_c(dest, '&');
+	while (len >= 3) {
+		str_append_c(dest, imap_b64enc[in[0] >> 2]);
+		str_append_c(dest, imap_b64enc[((in[0] & 3) << 4) |
+					       (in[1] >> 4)]);
+		str_append_c(dest, imap_b64enc[((in[1] & 0x0f) << 2) |
+					       ((in[2] & 0xc0) >> 6)]);
+		str_append_c(dest, imap_b64enc[in[2] & 0x3f]);
+		in += 3;
+		len -= 3;
+	}
+	if (len > 0) {
+		str_append_c(dest, imap_b64enc[in[0] >> 2]);
+		if (len == 1)
+			str_append_c(dest, imap_b64enc[(in[0] & 0x03) << 4]);
+		else {
+			str_append_c(dest, imap_b64enc[((in[0] & 0x03) << 4) |
+						       (in[1] >> 4)]);
+			str_append_c(dest, imap_b64enc[(in[1] & 0x0f) << 2]);
+		}
+	}
+	str_append_c(dest, '-');
+}
+
+int imap_utf8_to_utf7(const char *src, string_t *dest)
+{
+	const char *p;
+	unichar_t chr;
+	uint8_t *utf16, *u;
+	uint16_t u16;
+
+	for (p = src; *p != '\0'; p++) {
+		if (*p == '&' || (unsigned char)*p >= 0x80)
+			break;
+	}
+	if (*p == '\0') {
+		/* no ASCII characters that need to be encoded */
+		str_append(dest, src);
+		return 0;
+	}
+
+	/* at least one encoded character */
+	str_append_n(dest, src, p-src);
+	utf16 = t_malloc(strlen(p)*2);
+	while (*p != '\0') {
+		if (*p == '&') {
+			str_append(dest, "&-");
+			p++;
+			continue;
+		}
+		if ((unsigned char)*p < 0x80) {
+			str_append_c(dest, *p);
+			p++;
+			continue;
+		}
+
+		u = utf16;
+		while ((unsigned char)*p >= 0x80) {
+			if (uni_utf8_get_char(p, &chr) <= 0)
+				return -1;
+			/* @UNSAFE */
+			if (chr < UTF16_SURROGATE_BASE) {
+				*u++ = chr >> 8;
+				*u++ = chr & 0xff;
+			} else {
+				u16 = UTF16_SURROGATE_HIGH(chr);
+				*u++ = u16 >> 8;
+				*u++ = u16 & 0xff;
+				u16 = UTF16_SURROGATE_LOW(chr);
+				*u++ = u16 >> 8;
+				*u++ = u16 & 0xff;
+			}
+			p += uni_utf8_char_bytes(*p);
+		}
+		mbase64_encode(dest, utf16, u-utf16);
+	}
+	return 0;
+}
+
+static int utf16buf_to_utf8(string_t *dest, const unsigned char output[4],
+			    unsigned int *_pos, unsigned int len)
+{
+	unsigned int pos = *_pos;
+	uint16_t high, low;
+	unichar_t chr;
+
+	if (len % 2 != 0)
+		return -1;
+	
+	high = (output[pos % 4] << 8) | output[(pos+1) % 4];
+	if (high < UTF16_SURROGATE_HIGH_FIRST ||
+	    high > UTF16_SURROGATE_HIGH_MAX) {
+		/* single byte */
+		uni_ucs4_to_utf8_c(high, dest);
+		*_pos = (pos + 2) % 4;
+		return 0;
+	}
+
+	if (high > UTF16_SURROGATE_HIGH_LAST)
+		return -1;
+	if (len != 4) {
+		/* missing the second character */
+		return -1;
+	}
+
+	low = (output[(pos+2)%4] << 8) | output[(pos+3) % 4];
+	if (low < UTF16_SURROGATE_LOW_FIRST || low > UTF16_SURROGATE_LOW_LAST)
+		return -1;
+
+	chr = UTF16_SURROGATE_BASE +
+		(((high & UTF16_SURROGATE_MASK) << UTF16_SURROGATE_SHIFT) |
+		 (low & UTF16_SURROGATE_MASK));
+	uni_ucs4_to_utf8_c(chr, dest);
+	return 0;
+}
+
+static int mbase64_decode_to_utf8(string_t *dest, const char **_src)
+{
+	const char *src = *_src;
+	unsigned char input[4], output[4];
+	unsigned int outstart = 0, outpos = 0;
+
+	while (*src != '-') {
+		input[0] = imap_b64dec[(uint8_t)src[0]];
+		input[1] = imap_b64dec[(uint8_t)src[1]];
+		if (input[0] == 0xff || input[1] == 0xff)
+			return -1;
+
+		output[outpos % 4] = (input[0] << 2) | (input[1] >> 4);
+		if (++outpos % 4 == outstart) {
+			if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+				return -1;
+		}
+
+		input[2] = imap_b64dec[(uint8_t)src[2]];
+		if (input[2] == 0xff) {
+			if (src[2] != '-')
+				return -1;
+
+			src += 2;
+			break;
+		}
+
+		output[outpos % 4] = (input[1] << 4) | (input[2] >> 2);
+		if (++outpos % 4 == outstart) {
+			if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+				return -1;
+		}
+
+		input[3] = imap_b64dec[(uint8_t)src[3]];
+		if (input[3] == 0xff) {
+			if (src[3] != '-')
+				return -1;
+
+			src += 3;
+			break;
+		}
+
+		output[outpos % 4] = ((input[2] << 6) & 0xc0) | input[3];
+		if (++outpos % 4 == outstart) {
+			if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0)
+				return -1;
+		}
+
+		src += 4;
+	}
+	if (outstart != outpos % 4) {
+		if (utf16buf_to_utf8(dest, output, &outstart,
+				     (4 + outpos - outstart) % 4) < 0)
+			return -1;
+	}
+
+	/* found ending '-' */
+	*_src = src + 1;
+	return 0;
+}
+
+int imap_utf7_to_utf8(const char *src, string_t *dest)
+{
+	const char *p;
+
+	for (p = src; *p != '\0'; p++) {
+		if (*p == '&' || (unsigned char)*p >= 0x80)
+			break;
+	}
+	if (*p == '\0') {
+		/* no IMAP-UTF-7 encoded characters */
+		str_append(dest, src);
+		return 0;
+	}
+	if ((unsigned char)*p >= 0x80) {
+		/* 8bit characters - the input is broken */
+		return -1;
+	}
+
+	/* at least one encoded character */
+	str_append_n(dest, src, p-src);
+	while (*p != '\0') {
+		if (*p == '&') {
+			if (*++p == '-') {
+				str_append_c(dest, '&');
+				p++;
+			} else {
+				if (mbase64_decode_to_utf8(dest, &p) < 0)
+					return -1;
+				if (p[0] == '&' && p[1] != '-') {
+					/* &...-& */
+					return -1;
+				}
+			}
+		} else {
+			str_append_c(dest, *p++);
+		}
+	}
+	return 0;
+}
diff -r 59fc12b2b08a -r 81c0fe5bd635 src/lib-imap/imap-utf7.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-imap/imap-utf7.h	Sat Nov 01 21:12:01 2008 +0200
@@ -0,0 +1,11 @@
+#ifndef IMAP_UTF7_H
+#define IMAP_UTF7_H
+
+/* Convert an UTF-8 string to IMAP-UTF-7. Returns 0 if ok, -1 if src isn't
+   valid UTF-8. */
+int imap_utf8_to_utf7(const char *src, string_t *dest);
+/* Convert IMAP-UTF-7 string to UTF-8. Returns 0 if ok, -1 if src isn't
+   valid IMAP-UTF-7. */
+int imap_utf7_to_utf8(const char *src, string_t *dest);
+
+#endif
diff -r 59fc12b2b08a -r 81c0fe5bd635 src/tests/test-imap.c
--- a/src/tests/test-imap.c	Sat Nov 01 21:11:36 2008 +0200
+++ b/src/tests/test-imap.c	Sat Nov 01 21:12:01 2008 +0200
@@ -1,7 +1,10 @@
 /* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
 
 #include "lib.h"
+#include "str.h"
+#include "unichar.h"
 #include "imap-match.h"
+#include "imap-utf7.h"
 #include "test-common.h"
 
 struct test_imap_match {
@@ -75,10 +78,99 @@
 	}
 }
 
+static void test_imap_utf7(void)
+{
+	static const char *to_utf7[] = {
+		"&&x&&", "&-&-x&-&-",
+		"~peter/mail/å°åŒ—/æ—¥æœ¬èªž", "~peter/mail/&U,BTFw-/&ZeVnLIqe-",
+		"tietÃ¤jÃ¤", "tiet&AOQ-j&AOQ-",
+		"pää", NULL,
+		NULL
+	};
+	static const char *invalid_utf7[] = {
+		"&Jjo!",
+		"&U,BTFw-&ZeVnLIqe-",
+		NULL
+	};
+	string_t *src, *dest;
+	const char *orig_src;
+	unsigned int i, j;
+	unichar_t chr;
+	bool success, all_success = TRUE;
+
+	src = t_str_new(256);
+	dest = t_str_new(256);
+
+	for (i = 0; to_utf7[i] != NULL; i += 2) {
+		str_truncate(dest, 0);
+		if (imap_utf8_to_utf7(to_utf7[i], dest) < 0)
+			success = to_utf7[i+1] == NULL;
+		else {
+			success = to_utf7[i+1] != NULL &&
+				strcmp(to_utf7[i+1], str_c(dest)) == 0;
+		}
+		if (!success) {
+			test_out(t_strdup_printf("imap_utf8_to_utf7(%d)", i/2),
+				 FALSE);
+			all_success = FALSE;
+		} else if (to_utf7[i+1] != NULL) {
+			str_truncate(dest, 0);
+			if (imap_utf7_to_utf8(to_utf7[i+1], dest) < 0 ||
+			    strcmp(to_utf7[i], str_c(dest)) != 0) {
+				test_out(t_strdup_printf("imap_utf7_to_utf8(%d)", i/2),
+					 FALSE);
+				all_success = FALSE;
+			}
+		}
+	}
+	if (all_success)
+		test_out("imap_utf8_to_utf7()", TRUE);
+
+	success = TRUE;
+	for (chr = 0xffff; chr <= 0x10010; chr++) {
+		for (i = 1; i <= 10; i++) {
+			str_truncate(src, 0);
+			str_truncate(dest, 0);
+			for (j = 0; j < i; j++) {
+				if (j % 3 == 0)
+					str_append_c(src, 'x');
+				if (j % 5 == 0)
+					str_append_c(src, '&');
+				uni_ucs4_to_utf8_c(chr, src);
+			}
+
+			orig_src = t_strdup(str_c(src));
+			str_truncate(src, 0);
+
+			if (imap_utf8_to_utf7(orig_src, dest) < 0)
+				success = FALSE;
+			else if (imap_utf7_to_utf8(str_c(dest), src) < 0)
+				success = FALSE;
+			else
+				success = strcmp(str_c(src), orig_src) == 0;
+			if (!success)
+				goto end;
+		}
+	}
+end:
+	test_out("imap_utf7_to_utf8(reverse)", success);
+	for (i = 0; invalid_utf7[i] != NULL; i++) {
+		str_truncate(dest, 0);
+		if (imap_utf7_to_utf8(invalid_utf7[i], dest) == 0) {
+			test_out(t_strdup_printf("imap_utf7_to_utf8(invalid.%d)", i),
+				 FALSE);
+			all_success = FALSE;
+		}
+	}
+	if (all_success)
+		test_out("imap_utf7_to_utf8(invalid)", TRUE);
+}
+
 int main(void)
 {
 	test_init();
 
 	test_imap_match();
+	test_imap_utf7();
 	return test_deinit();
 }