Mercurial > dovecot > core-2.2
changeset 10212:f68c2cc1b32b HEAD
str_sanitize(): Don't break UTF-8 input.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 28 Oct 2009 13:50:55 -0400 |
parents | 104edcb89a70 |
children | 6c32cc350164 |
files | src/lib/str-sanitize.c src/lib/test-str-sanitize.c |
diffstat | 2 files changed, 38 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib/str-sanitize.c Tue Oct 27 22:44:39 2009 -0400 +++ b/src/lib/str-sanitize.c Wed Oct 28 13:50:55 2009 -0400 @@ -1,32 +1,52 @@ /* Copyright (c) 2004-2009 Dovecot authors, see the included COPYING file */ #include "lib.h" +#include "unichar.h" #include "str.h" #include "str-sanitize.h" static size_t str_sanitize_skip_start(const char *src, size_t max_len) { + unsigned int len; + unichar_t chr; size_t i; - for (i = 0; i < max_len; i++) { - if (((unsigned char)src[i] & 0x7f) < 32) + for (i = 0; i < max_len; ) { + len = uni_utf8_char_bytes(src[i]); + if (uni_utf8_get_char(src+i, &chr) <= 0) break; + if ((unsigned char)src[i] < 32) + break; + i += len; } return i; } void str_sanitize_append(string_t *dest, const char *src, size_t max_len) { + unsigned int len; + unichar_t chr; size_t i; + int ret; - i = str_sanitize_skip_start(src, max_len); - str_append_n(dest, src, i); - - for (; i < max_len && src[i] != '\0'; i++) { - if (((unsigned char)src[i] & 0x7f) < 32) + for (i = 0; i < max_len && src[i] != '\0'; ) { + len = uni_utf8_char_bytes(src[i]); + ret = uni_utf8_get_char(src+i, &chr); + if (ret <= 0) { + /* invalid UTF-8 */ + str_append_c(dest, '?'); + if (ret == 0) { + /* input ended too early */ + return; + } + i++; + continue; + } + if ((unsigned char)src[i] < 32) str_append_c(dest, '?'); else str_append_c(dest, src[i]); + i += len; } if (src[i] != '\0') {
--- a/src/lib/test-str-sanitize.c Tue Oct 27 22:44:39 2009 -0400 +++ b/src/lib/test-str-sanitize.c Wed Oct 28 13:50:55 2009 -0400 @@ -17,7 +17,10 @@ { "ab", 2 }, { "abc", 2 }, { "abcd", 3 }, - { "abcde", 4 } + { "abcde", 4 }, + { "с", 10 }, + { "с", 1 }, + { "\001x\x1fy\x81", 10 } }; static const char *output[] = { NULL, @@ -26,15 +29,18 @@ "ab", "...", "...", - "a..." + "a...", + "с", + "с", + "?x?y?" }; const char *str; unsigned int i; - bool success; + test_begin("str_sanitize"); for (i = 0; i < N_ELEMENTS(input); i++) { str = str_sanitize(input[i].str, input[i].max_len); - success = null_strcmp(output[i], str) == 0; - test_out(t_strdup_printf("str_sanitize(%d)", i), success); + test_assert(null_strcmp(output[i], str) == 0); } + test_end(); }