Mercurial > dovecot > original-hg > dovecot-1.2
changeset 9456:23abbf14279c HEAD
str_sanitize(): Don't break UTF-8 input.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 28 Oct 2009 13:50:57 -0400 |
parents | bd4a6f500c75 |
children | 778a6418f54b |
files | src/lib/str-sanitize.c |
diffstat | 1 files changed, 27 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib/str-sanitize.c Tue Oct 27 22:47:25 2009 -0400 +++ b/src/lib/str-sanitize.c Wed Oct 28 13:50:57 2009 -0400 @@ -1,32 +1,52 @@ /* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */ #include "lib.h" +#include "unichar.h" #include "str.h" #include "str-sanitize.h" static size_t str_sanitize_skip_start(const char *src, size_t max_len) { + unsigned int len; + unichar_t chr; size_t i; - for (i = 0; i < max_len; i++) { - if (((unsigned char)src[i] & 0x7f) < 32) + for (i = 0; i < max_len; ) { + len = uni_utf8_char_bytes(src[i]); + if (uni_utf8_get_char(src+i, &chr) <= 0) break; + if ((unsigned char)src[i] < 32) + break; + i += len; } return i; } void str_sanitize_append(string_t *dest, const char *src, size_t max_len) { + unsigned int len; + unichar_t chr; size_t i; + int ret; - i = str_sanitize_skip_start(src, max_len); - str_append_n(dest, src, i); - - for (; i < max_len && src[i] != '\0'; i++) { - if (((unsigned char)src[i] & 0x7f) < 32) + for (i = 0; i < max_len && src[i] != '\0'; ) { + len = uni_utf8_char_bytes(src[i]); + ret = uni_utf8_get_char(src+i, &chr); + if (ret <= 0) { + /* invalid UTF-8 */ + str_append_c(dest, '?'); + if (ret == 0) { + /* input ended too early */ + return; + } + i++; + continue; + } + if ((unsigned char)src[i] < 32) str_append_c(dest, '?'); else str_append_c(dest, src[i]); + i += len; } if (src[i] != '\0') {