view src/lib/str-sanitize.c @ 21322:5ab8dc1a4a6f

global: Change string position/length from unsigned int to size_t Mainly to avoid truncating >4GB strings, which might potentially cause some security holes. Normally there are other limits, which prevent such excessive strings from being created in the first place. I'm sure this didn't find everything. Maybe everything could be found with compiler warnings. -Wconversion kind of does it, but it gives way too many unnecessary warnings. These were mainly found with: grep " = strlen" egrep "unsigned int.*(size|len)"
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Mon, 12 Dec 2016 07:19:55 +0200
parents 0f22db71df7a
children 2e2563132d5f
line wrap: on
line source

/* Copyright (c) 2004-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "unichar.h"
#include "str.h"
#include "str-sanitize.h"

static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
{
	unichar_t chr;
	size_t i;

	for (i = 0; i < max_bytes && src[i] != '\0'; ) {
		int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
		if (len <= 0)
			break;
		if ((unsigned char)src[i] < 32)
			break;
		i += len;
	}
	i_assert(i <= max_bytes);
	return i;
}

static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
{
	const unsigned char *data = str_data(dest);
	size_t len = str_len(dest);

	if (len == initial_pos)
		return;
	if ((data[len-1] & 0x80) == 0) {
		str_truncate(dest, len-1);
		return;
	}
	/* truncate UTF-8 sequence. */
	while (len > 0 && (data[len-1] & 0xc0) == 0x80)
		len--;
	if (len > 0 && (data[len-1] & 0xc0) == 0xc0)
		len--;
	if (len >= initial_pos)
		str_truncate(dest, len);
}

void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
{
	size_t initial_pos = str_len(dest);
	unichar_t chr;
	size_t i;

	for (i = 0; i < max_bytes && src[i] != '\0'; ) {
		int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
		if (len == 0)
			break; /* input ended too early */

		if (len < 0) {
			/* invalid UTF-8 */
			str_append_c(dest, '?');
			i++;
			continue;
		}
		if ((unsigned char)src[i] < 32)
			str_append_c(dest, '?');
		else
			str_append_n(dest, src+i, len);
		i += len;
	}

	if (src[i] != '\0') {
		if (max_bytes < 3)
			str_truncate(dest, initial_pos);
		else {
			while (str_len(dest) - initial_pos > max_bytes-3)
				str_sanitize_truncate_char(dest, initial_pos);
		}
		str_append(dest, "...");
	}
}

const char *str_sanitize(const char *src, size_t max_bytes)
{
	string_t *str;
	size_t i;

	if (src == NULL)
		return NULL;

	i = str_sanitize_skip_start(src, max_bytes);
	if (src[i] == '\0')
		return src;

	str = t_str_new(I_MIN(max_bytes, 256));
	str_sanitize_append(str, src, max_bytes);
	return str_c(str);
}