Mercurial > dovecot > core-2.2
view src/lib/str-sanitize.c @ 22955:812e5c961328
fts: Indexing virtual mailbox didn't always index the last mails
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Thu, 03 May 2018 18:33:00 +0300 |
parents | a556724ce39b |
children |
line wrap: on
line source
/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "unichar.h" #include "str.h" #include "str-sanitize.h" static size_t str_sanitize_skip_start(const char *src, size_t max_bytes) { unichar_t chr; size_t i; for (i = 0; i < max_bytes && src[i] != '\0'; ) { int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); if (len <= 0) break; if ((unsigned char)src[i] < 32) break; i += len; } i_assert(i <= max_bytes); return i; } static size_t str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars) { unichar_t chr; uintmax_t c; size_t i; for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) { int len = uni_utf8_get_char(src+i, &chr); if (len <= 0) break; if ((unsigned char)src[i] < 32) break; c++; i += len; } i_assert(c <= max_chars); return i; } static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos) { const unsigned char *data = str_data(dest); size_t len = str_len(dest); if (len == initial_pos) return; if ((data[len-1] & 0x80) == 0) { str_truncate(dest, len-1); return; } /* truncate UTF-8 sequence. */ while (len > 0 && (data[len-1] & 0xc0) == 0x80) len--; if (len > 0 && (data[len-1] & 0xc0) == 0xc0) len--; if (len >= initial_pos) str_truncate(dest, len); } void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes) { size_t initial_pos = str_len(dest); unichar_t chr; size_t i; for (i = 0; i < max_bytes && src[i] != '\0'; ) { int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); if (len == 0) break; /* input ended too early */ if (len < 0) { /* invalid UTF-8 */ str_append_c(dest, '?'); i++; continue; } if ((unsigned char)src[i] < 32) str_append_c(dest, '?'); else str_append_n(dest, src+i, len); i += len; } if (src[i] != '\0') { if (max_bytes < 3) str_truncate(dest, initial_pos); else { while (str_len(dest) - initial_pos > max_bytes-3) str_sanitize_truncate_char(dest, initial_pos); } str_append(dest, "..."); } } void str_sanitize_append_utf8(string_t *dest, const char *src, uintmax_t max_cps) { size_t last_pos = 0; unichar_t chr; uintmax_t c; size_t i; i_assert(max_cps > 0); for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) { int len = uni_utf8_get_char(src+i, &chr); if (len == 0) break; /* input ended too early */ last_pos = str_len(dest); if (len < 0) { /* invalid UTF-8 */ str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); i++; continue; } if ((unsigned char)src[i] < 32) str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); else str_append_n(dest, src+i, len); i += len; c++; } if (src[i] != '\0') { str_truncate(dest, last_pos); str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8); } } const char *str_sanitize(const char *src, size_t max_bytes) { string_t *str; size_t i; if (src == NULL) return NULL; i = str_sanitize_skip_start(src, max_bytes); if (src[i] == '\0') return src; str = t_str_new(I_MIN(max_bytes, 256)); str_sanitize_append(str, src, max_bytes); return str_c(str); } const char *str_sanitize_utf8(const char *src, uintmax_t max_cps) { string_t *str; size_t i; if (src == NULL) return NULL; i = str_sanitize_skip_start_utf8(src, max_cps); if (src[i] == '\0') return src; str = t_str_new(I_MIN(max_cps, 256)); str_sanitize_append_utf8(str, src, max_cps); return str_c(str); }