view src/lib-charset/charset-iconv.c @ 6131:5f56b2eb32b3 HEAD

Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8 comparing.
author Timo Sirainen <tss@iki.fi>
date Fri, 20 Jul 2007 17:27:02 +0300
parents 0d3583b02a32
children d01522d276f6
line wrap: on
line source

/* Copyright (C) 2002-2007 Timo Sirainen */

#include "lib.h"
#include "buffer.h"
#include "unichar.h"
#include "charset-utf8.h"

#ifdef HAVE_ICONV

#include <iconv.h>
#include <ctype.h>

struct charset_translation {
	iconv_t cd;
	unsigned int ucase:1;
};

struct charset_translation *
charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
{
	struct charset_translation *t;
	iconv_t cd;

	if (unknown_charset_r != NULL)
		*unknown_charset_r = FALSE;

	if (charset_is_utf8(charset))
		cd = (iconv_t)-1;
	else {
		cd = iconv_open("UTF-8", charset);
		if (cd == (iconv_t)-1) {
			if (unknown_charset_r != NULL)
				*unknown_charset_r = TRUE;
			return NULL;
		}
	}

	t = i_new(struct charset_translation, 1);
	t->cd = cd;
	t->ucase = ucase;
	return t;
}

void charset_to_utf8_end(struct charset_translation **_t)
{
	struct charset_translation *t = *_t;

	*_t = NULL;

	if (t->cd != (iconv_t)-1)
		iconv_close(t->cd);
	i_free(t);
}

void charset_to_utf8_reset(struct charset_translation *t)
{
	if (t->cd != (iconv_t)-1)
		(void)iconv(t->cd, NULL, NULL, NULL, NULL);
}

static bool
charset_to_utf8_try(struct charset_translation *t,
		    const unsigned char *src, size_t *src_size, buffer_t *dest,
		    enum charset_result *result)
{
	ICONV_CONST char *ic_srcbuf;
	char tmpbuf[8192], *ic_destbuf;
	size_t srcleft, destleft;
	bool ret = TRUE;

	if (t->cd == (iconv_t)-1) {
		/* no translation needed - just copy it to outbuf uppercased */
		*result = CHARSET_RET_OK;
		if (!t->ucase) {
			buffer_append(dest, src, *src_size);
			return TRUE;
		}

		if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
			*result = CHARSET_RET_INVALID_INPUT;
		return TRUE;
	}
	if (!t->ucase) {
		destleft = buffer_get_size(dest) - dest->used;
		if (destleft < *src_size) {
			/* The buffer is most likely too small to hold the
			   output, so increase it at least to the input size. */
			destleft = *src_size;
		}
		ic_destbuf = buffer_append_space_unsafe(dest, destleft);
	} else {
		destleft = sizeof(tmpbuf);
		ic_destbuf = tmpbuf;
	}

	srcleft = *src_size;
	ic_srcbuf = (ICONV_CONST char *) src;

	if (iconv(t->cd, &ic_srcbuf, &srcleft,
		  &ic_destbuf, &destleft) != (size_t)-1)
		*result = CHARSET_RET_OK;
	else if (errno == E2BIG) {
		/* set result just to avoid compiler warning */
		*result = CHARSET_RET_INCOMPLETE_INPUT;
		ret = FALSE;
	} else if (errno == EINVAL)
		*result = CHARSET_RET_INCOMPLETE_INPUT;
	else {
		/* should be EILSEQ */
		*result = CHARSET_RET_INVALID_INPUT;
		return TRUE;
	}
	*src_size -= srcleft;

	if (!t->ucase) {
		/* give back the memory we didn't use */
		buffer_set_used_size(dest, dest->used - destleft);
	} else {
		size_t tmpsize = sizeof(tmpbuf) - destleft;

		/* we just converted data to UTF-8, it can't be invalid */
		if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0)
			i_unreached();
	}
	return ret;
}

enum charset_result
charset_to_utf8(struct charset_translation *t,
		const unsigned char *src, size_t *src_size, buffer_t *dest)
{
	enum charset_result result;
	size_t pos, used, size;
	bool ret;

	for (pos = 0;;) {
		size = *src_size - pos;
		ret = charset_to_utf8_try(t, src + pos, &size, dest, &result);
		pos += size;

		if (ret) {
			*src_size = pos;
			return result;
		}

		/* force buffer to grow */
		used = dest->used;
		size = buffer_get_size(dest) - used + 1;
		(void)buffer_append_space_unsafe(dest, size);
		buffer_set_used_size(dest, used);
	}
}

#endif