Mercurial > dovecot > core-2.2
changeset 18801:2bf5c51738d4
lib-fts: Added fts_icu_lcase()
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 02 Jun 2015 21:54:52 +0300 |
parents | e2d42eab722f |
children | 26d9a4fcb0d4 |
files | src/lib-fts/fts-icu.c src/lib-fts/fts-icu.h src/lib-fts/test-fts-icu.c |
diffstat | 3 files changed, 81 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/fts-icu.c Tue Jun 02 21:49:46 2015 +0300 +++ b/src/lib-fts/fts-icu.c Tue Jun 02 21:54:52 2015 +0300 @@ -5,8 +5,26 @@ #include "unichar.h" #include "fts-icu.h" +#include <unicode/uchar.h> +#include <unicode/ucasemap.h> #include <unicode/uclean.h> +static struct UCaseMap *icu_csm = NULL; + +static struct UCaseMap *fts_icu_csm(void) +{ + UErrorCode err = U_ZERO_ERROR; + + if (icu_csm != NULL) + return icu_csm; + icu_csm = ucasemap_open(NULL, U_FOLD_CASE_DEFAULT, &err); + if (U_FAILURE(err)) { + i_fatal("LibICU ucasemap_open() failed: %s", + u_errorName(err)); + } + return icu_csm; +} + void fts_icu_utf8_to_utf16(buffer_t *dest_utf16, const char *src_utf8) { UErrorCode err = U_ZERO_ERROR; @@ -111,7 +129,36 @@ return 0; } +void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8) +{ + struct UCaseMap *csm = fts_icu_csm(); + size_t avail_bytes, dest_pos = dest_utf8->used; + char *dest_data; + int dest_full_len; + UErrorCode err = 0; + + avail_bytes = buffer_get_writable_size(dest_utf8) - dest_pos; + dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, avail_bytes); + + dest_full_len = ucasemap_utf8ToLower(csm, dest_data, avail_bytes, + src_utf8, -1, &err); + if (err == U_BUFFER_OVERFLOW_ERROR) { + err = U_ZERO_ERROR; + dest_data = buffer_get_space_unsafe(dest_utf8, dest_pos, dest_full_len); + dest_full_len = ucasemap_utf8ToLower(csm, dest_data, dest_full_len, + src_utf8, -1, &err); + i_assert(err != U_BUFFER_OVERFLOW_ERROR); + } + if (U_FAILURE(err)) { + i_fatal("LibICU ucasemap_utf8ToLower() failed: %s", + u_errorName(err)); + } + buffer_set_used_size(dest_utf8, dest_full_len); +} + void fts_icu_deinit(void) { + if (icu_csm != NULL) + ucasemap_close(icu_csm); u_cleanup(); }
--- a/src/lib-fts/fts-icu.h Tue Jun 02 21:49:46 2015 +0300 +++ b/src/lib-fts/fts-icu.h Tue Jun 02 21:54:52 2015 +0300 @@ -13,6 +13,8 @@ int fts_icu_translate(buffer_t *dest_utf16, const UChar *src_utf16, unsigned int src_len, UTransliterator *transliterator, const char **error_r); +/* Lowercase the given UTF-8 string. */ +void fts_icu_lcase(string_t *dest_utf8, const char *src_utf8); /* Free all the memory used by ICU functions. */ void fts_icu_deinit(void);
--- a/src/lib-fts/test-fts-icu.c Tue Jun 02 21:49:46 2015 +0300 +++ b/src/lib-fts/test-fts-icu.c Tue Jun 02 21:54:52 2015 +0300 @@ -141,6 +141,35 @@ test_end(); } +static void test_fts_icu_lcase(void) +{ + const char *src = "aBcD\xC3\x84\xC3\xA4"; + string_t *dest = t_str_new(64); + + test_begin("fts_icu_lcase"); + fts_icu_lcase(dest, src); + test_assert(strcmp(str_c(dest), "abcd\xC3\xA4\xC3\xA4") == 0); + test_end(); +} + +static void test_fts_icu_lcase_resize(void) +{ + const char *src = "a\xC3\x84"; + string_t *dest; + unsigned int i; + + test_begin("fts_icu_lcase resize"); + for (i = 2; i <= 4; i++) { + dest = t_str_new(i); + test_assert(buffer_get_size(dest) == i); + fts_icu_lcase(dest, src); + test_assert(strcmp(str_c(dest), "a\xC3\xA4") == 0); + test_assert(buffer_get_size(dest) == 4); + } + + test_end(); +} + int main(void) { static void (*test_functions[])(void) = { @@ -150,9 +179,11 @@ test_fts_icu_utf16_to_utf8_resize, test_fts_icu_translate, test_fts_icu_translate_resize, + test_fts_icu_lcase, + test_fts_icu_lcase_resize, NULL }; int ret = test_run(test_functions); - u_cleanup(); + fts_icu_deinit(); return ret; }