view src/lib/str-sanitize.h @ 22922:a556724ce39b

lib: Implement str_sanitize_utf8(). Unlike str_sanitize(), this function truncates strings based on a UTF8 code point limit rather than a maximum size in bytes. Also, the Unicode replacement character is used to mark invalid/control characters and an ellipsis character is used to indicate the string truncation. For the normal str_sanitize() this is done using a question mark and triple dots respectively.
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Sat, 14 Apr 2018 02:05:51 +0200
parents 081c7da83d8f
children
line wrap: on
line source

#ifndef STR_SANITIZE_H
#define STR_SANITIZE_H

/* All control characters in src will be appended as '?'. If src is longer
   than max_bytes, it's truncated with "..." appended to the end. Note that
   src is treated as UTF-8 input, but max_bytes is in bytes instead of
   UTF-8 characters. */
void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes);
/* All control characters in src will be appended as the unicode replacement
   character (U+FFFD). If src has more than max_cps unicode code points, it's
   truncated with a horizontal ellipsis character (U+2026) appended to the end.
 */
void str_sanitize_append_utf8(string_t *dest, const char *src,
			      uintmax_t max_cps);
/* Return src sanitized. If there are no changes, src pointer is returned.
   If src is NULL, returns NULL. */
const char *str_sanitize(const char *src, size_t max_bytes);
/* The unicode version of str_sanitize() using str_sanitize_append_utf8()
   internally. */
const char *str_sanitize_utf8(const char *src, uintmax_t max_cps);

#endif