Mercurial > dovecot > core-2.2
changeset 15574:7be4709aab5e
uni_utf8_*(): Treat overlong UTF8 sequences as invalid.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 04 Jan 2013 23:24:26 +0200 |
parents | 721aba9128b9 |
children | c91d142130be |
files | src/lib/Makefile.am src/lib/test-lib.c src/lib/test-lib.h src/lib/test-unichar.c src/lib/unichar.c |
diffstat | 5 files changed, 42 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib/Makefile.am Fri Jan 04 05:29:25 2013 +0200 +++ b/src/lib/Makefile.am Fri Jan 04 23:24:26 2013 +0200 @@ -273,6 +273,7 @@ test-str-find.c \ test-str-sanitize.c \ test-time-util.c \ + test-unichar.c \ test-utc-mktime.c \ test-var-expand.c
--- a/src/lib/test-lib.c Fri Jan 04 05:29:25 2013 +0200 +++ b/src/lib/test-lib.c Fri Jan 04 23:24:26 2013 +0200 @@ -30,6 +30,7 @@ test_str_find, test_str_sanitize, test_time_util, + test_unichar, test_utc_mktime, test_var_expand, NULL
--- a/src/lib/test-lib.h Fri Jan 04 05:29:25 2013 +0200 +++ b/src/lib/test-lib.h Fri Jan 04 23:24:26 2013 +0200 @@ -29,6 +29,7 @@ void test_str_find(void); void test_str_sanitize(void); void test_time_util(void); +void test_unichar(void); void test_utc_mktime(void); void test_var_expand(void);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/test-unichar.c Fri Jan 04 23:24:26 2013 +0200 @@ -0,0 +1,24 @@ +/* Copyright (c) 2007-2012 Dovecot authors, see the included COPYING file */ + +#include "test-lib.h" +#include "str.h" +#include "unichar.h" + +void test_unichar(void) +{ + static const char *overlong_utf8 = "\xf8\x80\x95\x81\xa1"; + unichar_t chr, chr2; + string_t *str = t_str_new(16); + + test_begin("unichars"); + for (chr = 0; chr <= 0x10ffff; chr++) { + str_truncate(str, 0); + uni_ucs4_to_utf8_c(chr, str); + test_assert(uni_utf8_str_is_valid(str_c(str))); + test_assert(uni_utf8_get_char(str_c(str), &chr2) > 0); + test_assert(chr2 == chr); + } + test_assert(!uni_utf8_str_is_valid(overlong_utf8)); + test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0); + test_end(); +}
--- a/src/lib/unichar.c Fri Jan 04 05:29:25 2013 +0200 +++ b/src/lib/unichar.c Fri Jan 04 23:24:26 2013 +0200 @@ -37,8 +37,10 @@ int uni_utf8_get_char_n(const void *_input, size_t max_len, unichar_t *chr_r) { + static unichar_t lowest_valid_chr_table[] = + { 0, 0, 0x80, 0x800, 0x10000, 0x20000, 0x40000 }; const unsigned char *input = _input; - unichar_t chr; + unichar_t chr, lowest_valid_chr; unsigned int i, len; int ret; @@ -75,10 +77,12 @@ return -1; } - if (len <= max_len) + if (len <= max_len) { + lowest_valid_chr = lowest_valid_chr_table[len]; ret = 1; - else { + } else { /* check first if the input is invalid before returning 0 */ + lowest_valid_chr = 0; ret = 0; len = max_len; } @@ -91,6 +95,10 @@ chr <<= 6; chr |= input[i] & 0x3f; } + if (chr < lowest_valid_chr) { + /* overlong encoding */ + return -1; + } *chr_r = chr; return ret; @@ -340,19 +348,11 @@ static inline unsigned int is_valid_utf8_seq(const unsigned char *input, unsigned int size) { - unsigned int i, len; - - len = uni_utf8_char_bytes(input[0]); - if (unlikely(len > size || len == 1)) - return 0; + unichar_t chr; - /* the rest of the chars should be in 0x80..0xbf range. - anything else is start of a sequence or invalid */ - for (i = 1; i < len; i++) { - if (unlikely(input[i] < 0x80 || input[i] > 0xbf)) - return 0; - } - return len; + if (uni_utf8_get_char_n(input, size, &chr) <= 0) + return 0; + return uni_utf8_char_bytes(input[0]); } static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,