Mercurial > dovecot > core-2.2
changeset 5520:f322edd67a4f HEAD
Added Boyer-Moore string searching.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 04 Apr 2007 11:17:03 +0300 |
parents | 7b6511e67476 |
children | baf3367d2450 |
files | src/lib/Makefile.am src/lib/str-find.c src/lib/str-find.h |
diffstat | 3 files changed, 186 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib/Makefile.am Wed Apr 04 09:15:31 2007 +0300 +++ b/src/lib/Makefile.am Wed Apr 04 11:17:03 2007 +0300 @@ -78,6 +78,7 @@ seq-range-array.c \ sha1.c \ str.c \ + str-find.c \ str-sanitize.c \ strescape.c \ strfuncs.c \ @@ -156,6 +157,7 @@ seq-range-array.h \ sha1.h \ str.h \ + str-find.h \ str-sanitize.h \ strescape.h \ strfuncs.h \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/str-find.c Wed Apr 04 11:17:03 2007 +0300 @@ -0,0 +1,167 @@ +/* Copyright (C) 2007 Timo Sirainen */ + +#include "lib.h" +#include "str-find.h" + +struct str_find_context { + pool_t pool; + unsigned char *key; + unsigned int key_len; + + unsigned int *matches; + unsigned int match_count; + + int badtab[UCHAR_MAX+1]; + int goodtab[]; +}; + +static void init_badtab(struct str_find_context *ctx) +{ + unsigned int i, len_1 = ctx->key_len - 1; + + for (i = 0; i <= UCHAR_MAX; i++) + ctx->badtab[i] = ctx->key_len; + + for (i = 0; i < len_1; i++) + ctx->badtab[ctx->key[i]] = len_1 - i; +} + +static void init_suffixes(struct str_find_context *ctx, unsigned int *suffixes) +{ + unsigned int len_1 = ctx->key_len - 1; + int f = 0, g, i; + + suffixes[len_1] = ctx->key_len; + g = len_1; + for (i = ctx->key_len - 2; i >= 0; i--) { + if (i > g && (int)suffixes[i + len_1 - f] < i - g) + suffixes[i] = suffixes[i + len_1 - f]; + else { + if (i < g) + g = i; + f = i; + while (g >= 0 && ctx->key[g] == ctx->key[g + len_1 - f]) + g--; + suffixes[i] = f - g; + } + } +} + +static void init_goodtab(struct str_find_context *ctx) +{ + unsigned int len_1 = ctx->key_len - 1; + unsigned int j, *suffixes; + int i; + + suffixes = t_buffer_get(ctx->key_len); + init_suffixes(ctx, suffixes); + + for (i = 0; i < (int)ctx->key_len; i++) + ctx->goodtab[i] = ctx->key_len; + + j = 0; + for (i = len_1; i >= -1; i--) { + if (i < 0 || suffixes[i] == (unsigned int)i + 1) { + for (; j < len_1 - i; j++) { + if (ctx->goodtab[j] == (int)ctx->key_len) + ctx->goodtab[j] = len_1 - i; + } + } + } + for (i = 0; i <= (int)ctx->key_len - 2; i++) + ctx->goodtab[len_1 - suffixes[i]] = len_1 - i; +} + +struct str_find_context *str_find_init(pool_t pool, const char *key) +{ + struct str_find_context *ctx; + unsigned int key_len = strlen(key); + + ctx = p_malloc(pool, sizeof(struct str_find_context) + + sizeof(ctx->goodtab[0]) * key_len); + ctx->pool = pool; + ctx->matches = p_malloc(pool, key_len); + ctx->key_len = key_len; + ctx->key = p_malloc(pool, key_len); + memcpy(ctx->key, key, key_len); + + init_goodtab(ctx); + init_badtab(ctx); + return ctx; +} + +void str_find_deinit(struct str_find_context **_ctx) +{ + struct str_find_context *ctx = *_ctx; + + *_ctx = NULL; + p_free(ctx->pool, ctx->matches); + p_free(ctx->pool, ctx->key); + p_free(ctx->pool, ctx); +} + +bool str_find_more(struct str_find_context *ctx, + const unsigned char *data, size_t size) +{ + unsigned int key_len = ctx->key_len; + unsigned int i, j, a, b; + int bad_value; + + for (i = j = 0; i < ctx->match_count; i++) { + a = ctx->matches[i]; + if (ctx->matches[i] + size >= key_len) { + /* we can fully determine this match now */ + for (; a < key_len; a++) { + if (ctx->key[a] != data[a - ctx->matches[i]]) + break; + } + + if (a == key_len) + return TRUE; + } else { + for (b = 0; b < size; b++) { + if (ctx->key[a+b] != data[b]) + break; + } + + if (b == size) + ctx->matches[j++] = a + size; + } + } + if (j > 0) { + i_assert(j + size < key_len); + ctx->match_count = j; + j = 0; + } else { + /* Boyer-Moore searching */ + j = 0; + while (j + key_len <= size) { + i = key_len - 1; + while (ctx->key[i] == data[i + j]) { + if (i == 0) + return TRUE; + i--; + } + + bad_value = ctx->badtab[data[i + j]] + i + 1 - key_len; + j += I_MAX(ctx->goodtab[i], bad_value); + } + i_assert(j <= size); + ctx->match_count = 0; + } + + for (; j < size; j++) { + for (i = j; i < size; i++) { + if (ctx->key[i-j] != data[i]) + break; + } + if (i == size) + ctx->matches[ctx->match_count++] = size - j; + } + return FALSE; +} + +void str_find_reset(struct str_find_context *ctx) +{ + ctx->match_count = 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib/str-find.h Wed Apr 04 11:17:03 2007 +0300 @@ -0,0 +1,17 @@ +#ifndef __STR_FIND_H +#define __STR_FIND_H + +struct str_find_context; + +struct str_find_context *str_find_init(pool_t pool, const char *key); +void str_find_deinit(struct str_find_context **ctx); + +/* Returns TRUE if key is found. It's possible to send the data in arbitrary + blocks and have the key still match. */ +bool str_find_more(struct str_find_context *ctx, + const unsigned char *data, size_t size); +/* Reset input data. The next str_find_more() call won't try to match the key + to earlier data. */ +void str_find_reset(struct str_find_context *ctx); + +#endif