Mercurial > dovecot > core-2.2
view src/lib-fts/fts-tokenizer.c @ 19604:c996bc091c6b
master: Do not close stdout if going foreground
This lets one to use /dev/stdout for logging. Mainly
useful for testing purposes where we can generate
log output to stdout and use tee to write it to a
file for later examination.
author | Aki Tuomi <aki.tuomi@dovecot.fi> |
---|---|
date | Mon, 18 Jan 2016 15:50:23 +0200 |
parents | 0f22db71df7a |
children | 2e2563132d5f |
line wrap: on
line source
/* Copyright (c) 2014-2016 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "array.h" #include "istream.h" #include "str.h" #include "strfuncs.h" #include "fts-tokenizer.h" #include "fts-tokenizer-private.h" static ARRAY(const struct fts_tokenizer *) fts_tokenizer_classes; void fts_tokenizers_init(void) { if (!array_is_created(&fts_tokenizer_classes)) { fts_tokenizer_register(fts_tokenizer_generic); fts_tokenizer_register(fts_tokenizer_email_address); } } void fts_tokenizers_deinit(void) { if (array_is_created(&fts_tokenizer_classes)) array_free(&fts_tokenizer_classes); } /* private */ void fts_tokenizer_register(const struct fts_tokenizer *tok_class) { if (!array_is_created(&fts_tokenizer_classes)) i_array_init(&fts_tokenizer_classes, FTS_TOKENIZER_CLASSES_NR); array_append(&fts_tokenizer_classes, &tok_class, 1); } /* private */ void fts_tokenizer_unregister(const struct fts_tokenizer *tok_class) { const struct fts_tokenizer *const *tp; unsigned int idx; array_foreach(&fts_tokenizer_classes, tp) { if (strcmp((*tp)->name, tok_class->name) == 0) { idx = array_foreach_idx(&fts_tokenizer_classes, tp); array_delete(&fts_tokenizer_classes, idx, 1); if (array_count(&fts_tokenizer_classes) == 0) array_free(&fts_tokenizer_classes); return; } } i_unreached(); } const struct fts_tokenizer *fts_tokenizer_find(const char *name) { const struct fts_tokenizer *const *tp; array_foreach(&fts_tokenizer_classes, tp) { if (strcmp((*tp)->name, name) == 0) return *tp; } return NULL; } const char *fts_tokenizer_name(const struct fts_tokenizer *tok) { return tok->name; } static void fts_tokenizer_self_reset(struct fts_tokenizer *tok) { tok->prev_data = NULL; tok->prev_size = 0; tok->prev_skip = 0; tok->prev_reply_finished = TRUE; } int fts_tokenizer_create(const struct fts_tokenizer *tok_class, struct fts_tokenizer *parent, const char *const *settings, struct fts_tokenizer **tokenizer_r, const char **error_r) { struct fts_tokenizer *tok; const char *empty_settings = NULL; i_assert(settings == NULL || str_array_length(settings) % 2 == 0); if (settings == NULL) settings = &empty_settings; if (tok_class->v->create(settings, &tok, error_r) < 0) { *tokenizer_r = NULL; return -1; } tok->refcount = 1; fts_tokenizer_self_reset(tok); if (parent != NULL) { fts_tokenizer_ref(parent); tok->parent = parent; tok->parent_input = buffer_create_dynamic(default_pool, 128); } *tokenizer_r = tok; return 0; } void fts_tokenizer_ref(struct fts_tokenizer *tok) { i_assert(tok->refcount > 0); tok->refcount++; } void fts_tokenizer_unref(struct fts_tokenizer **_tok) { struct fts_tokenizer *tok = *_tok; i_assert(tok->refcount > 0); *_tok = NULL; if (--tok->refcount > 0) return; if (tok->parent_input != NULL) buffer_free(&tok->parent_input); if (tok->parent != NULL) fts_tokenizer_unref(&tok->parent); tok->v->destroy(tok); } static int fts_tokenizer_next_self(struct fts_tokenizer *tok, const unsigned char *data, size_t size, const char **token_r, const char **error_r) { int ret = 0; size_t skip = 0; i_assert(tok->prev_reply_finished || (data == tok->prev_data && size == tok->prev_size)); if (tok->prev_reply_finished) { /* whole new data */ ret = tok->v->next(tok, data, size, &skip, token_r, error_r); } else { /* continuing previous data */ i_assert(tok->prev_skip <= size); ret = tok->v->next(tok, data + tok->prev_skip, size - tok->prev_skip, &skip, token_r, error_r); } if (ret > 0) { i_assert(skip <= size - tok->prev_skip); tok->prev_data = data; tok->prev_size = size; tok->prev_skip = tok->prev_skip + skip; tok->prev_reply_finished = FALSE; } else if (ret == 0) { /* we need a new data block */ fts_tokenizer_self_reset(tok); } return ret; } void fts_tokenizer_reset(struct fts_tokenizer *tok) { tok->v->reset(tok); fts_tokenizer_self_reset(tok); } int fts_tokenizer_next(struct fts_tokenizer *tok, const unsigned char *data, size_t size, const char **token_r, const char **error_r) { int ret; switch (tok->parent_state) { case FTS_TOKENIZER_PARENT_STATE_ADD_DATA: ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r); if (ret <= 0 || tok->parent == NULL || tok->skip_parents) break; buffer_set_used_size(tok->parent_input, 0); buffer_append(tok->parent_input, *token_r, strlen(*token_r)); tok->parent_state++; /* fall through */ case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT: ret = fts_tokenizer_next(tok->parent, tok->parent_input->data, tok->parent_input->used, token_r, error_r); if (ret != 0) break; tok->parent_state++; /* fall through */ case FTS_TOKENIZER_PARENT_STATE_FINALIZE: ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r, error_r); if (ret != 0) break; /* we're finished sending this token to parent tokenizer. see if our own tokenizer has more tokens available */ tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA; return fts_tokenizer_next(tok, data, size, token_r, error_r); default: i_unreached(); } /* we must not be returning empty tokens */ i_assert(ret <= 0 || (*token_r)[0] != '\0'); return ret; } int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r, const char **error_r) { return fts_tokenizer_next(tok, NULL, 0, token_r, error_r); }