Mercurial > dovecot > core-2.2
changeset 7990:662172573fe1 HEAD
Initial code to support Apache Solr (Lucene indexing server).
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 11 Jul 2008 01:44:13 +0530 |
parents | 9369c0190ef0 |
children | b6c6070a38ac |
files | configure.in src/plugins/Makefile.am src/plugins/fts-solr/Makefile.am src/plugins/fts-solr/fts-backend-solr.c src/plugins/fts-solr/fts-solr-plugin.c src/plugins/fts-solr/fts-solr-plugin.h src/plugins/fts-solr/schema.xml src/plugins/fts-solr/solr-connection.c src/plugins/fts-solr/solr-connection.h src/plugins/fts/fts-storage.c |
diffstat | 10 files changed, 930 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/configure.in Thu Jul 10 22:38:31 2008 +0530 +++ b/configure.in Fri Jul 11 01:44:13 2008 +0530 @@ -263,6 +263,15 @@ want_lucene=no) AM_CONDITIONAL(BUILD_LUCENE, test "$want_lucene" = "yes") +AC_ARG_WITH(solr, +[ --with-solr Build with Solr full text search support], + if test x$withval = xno || test x$withval = xauto; then + want_solr=$withval + else + want_solr=yes + fi, + want_solr=no) + AC_ARG_WITH(ssl, [ --with-ssl=gnutls|openssl Build with GNUTLS or OpenSSL (default)], if test x$withval = xno; then @@ -2204,6 +2213,36 @@ fi AM_CONDITIONAL(HAVE_RQUOTA, test "$have_rquota" = "yes") +if test "$want_solr" != "no"; then + AC_CHECK_PROG(CURLCONFIG, curl-config, YES, NO) + if test $CURLCONFIG = YES; then + CURL_CFLAGS=`curl-config --cflags` + CURL_LIBS=`curl-config --libs` + + dnl libcurl found, also need libexpat + AC_CHECK_LIB(expat, XML_Parse, [ + AC_CHECK_HEADER(expat.h, [ + AC_SUBST(CURL_CFLAGS) + AC_SUBST(CURL_LIBS) + have_solr=yes + ], [ + if test $want_solr = yes; then + AC_ERROR([Can't build with Solr support: expat.h not found]) + fi + ]) + ], [ + if test $want_solr = yes; then + AC_ERROR([Can't build with Solr support: libexpat not found]) + fi + ]) + else + if test $want_solr = yes; then + AC_ERROR([Can't build with Solr support: curl-config not found]) + fi + fi +fi +AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes") + dnl ** dnl ** capabilities dnl ** @@ -2264,6 +2303,7 @@ src/plugins/expire/Makefile src/plugins/fts/Makefile src/plugins/fts-lucene/Makefile +src/plugins/fts-solr/Makefile src/plugins/fts-squat/Makefile src/plugins/lazy-expunge/Makefile src/plugins/mail-log/Makefile
--- a/src/plugins/Makefile.am Thu Jul 10 22:38:31 2008 +0530 +++ b/src/plugins/Makefile.am Fri Jul 11 01:44:13 2008 +0530 @@ -6,7 +6,11 @@ FTS_LUCENE = fts-lucene endif +if BUILD_SOLR +FTS_LUCENE = fts-solr +endif + SUBDIRS = \ acl convert expire fts fts-squat lazy-expunge mail-log mbox-snarf \ quota imap-quota trash virtual \ - $(ZLIB) $(FTS_LUCENE) + $(ZLIB) $(FTS_LUCENE) $(FTS_SOLR)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/Makefile.am Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,32 @@ +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/lib \ + -I$(top_srcdir)/src/lib-mail \ + -I$(top_srcdir)/src/lib-index \ + -I$(top_srcdir)/src/lib-storage \ + -I$(top_srcdir)/src/plugins/fts + +lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version + +module_LTLIBRARIES = \ + lib21_fts_solr_plugin.la + +lib21_fts_solr_plugin_la_LIBADD = \ + $(CURL_CFLAGS) -lexpat + +lib21_fts_solr_plugin_la_SOURCES = \ + fts-backend-solr.c \ + fts-solr-plugin.c \ + solr-connection.c + +noinst_HEADERS = \ + fts-solr-plugin.h \ + solr-connection.h + +EXTRA_DIST = schema.xml + +install-exec-local: + for d in imap lda; do \ + $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \ + rm -f $(DESTDIR)$(moduledir)/$$d/lib21_fts_solr_plugin$(MODULE_SUFFIX); \ + $(LN_S) ../lib21_fts_solr_plugin$(MODULE_SUFFIX) $(DESTDIR)$(moduledir)/$$d; \ + done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/fts-backend-solr.c Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,299 @@ +/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "mail-storage-private.h" +#include "solr-connection.h" +#include "fts-solr-plugin.h" + +#include <stdlib.h> +#include <curl/curl.h> + +struct solr_fts_backend_build_context { + struct fts_backend_build_context ctx; + + struct solr_connection_post *post; + uint32_t prev_uid, uid_validity; + string_t *cmd; + bool headers; +}; + +static struct solr_connection *solr_conn = NULL; + +static void solr_quote_str(string_t *dest, const char *str) +{ + solr_connection_quote_str(solr_conn, dest, str); +} + +static void xml_encode(string_t *dest, const char *str) +{ + for (; *str != '\0'; str++) { + switch (*str) { + case '&': + str_append(dest, "&"); + break; + case '<': + str_append(dest, "<"); + break; + case '>': + str_append(dest, ">"); + break; + default: + str_append_c(dest, *str); + break; + } + } +} + +static struct fts_backend * +fts_backend_solr_init(struct mailbox *box ATTR_UNUSED) +{ + struct fts_backend *backend; + + if (solr_conn == NULL) + solr_conn = solr_connection_init(getenv("FTS_SOLR")); + + backend = i_new(struct fts_backend, 1); + *backend = fts_backend_solr; + return backend; +} + +static void fts_backend_solr_deinit(struct fts_backend *backend) +{ + i_free(backend); +} + +static int fts_backend_solr_get_last_uid(struct fts_backend *backend, + uint32_t *last_uid_r) +{ + struct mailbox_status status; + ARRAY_TYPE(seq_range) uids; + const struct seq_range *uidvals; + unsigned int count; + string_t *str; + + str = t_str_new(256); + str_append(str, "fl=uid&rows=1&sort=uid%20desc&q="); + + mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status); + str_printfa(str, "uidv:%u%%20box:", status.uidvalidity); + solr_quote_str(str, backend->box->name); + str_append(str, "%20user:"); + solr_quote_str(str, backend->box->storage->user); + + t_array_init(&uids, 1); + if (solr_connection_select(solr_conn, str_c(str), &uids) < 0) + return -1; + + uidvals = array_get(&uids, &count); + if (count == 0) { + /* nothing indexed yet for this mailbox */ + *last_uid_r = 0; + } else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) { + *last_uid_r = uidvals[0].seq1; + } else { + i_error("fts_solr: Last UID lookup returned multiple rows"); + return -1; + } + return 0; +} + +static int +fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r, + struct fts_backend_build_context **ctx_r) +{ + struct solr_fts_backend_build_context *ctx; + struct mailbox_status status; + + *last_uid_r = (uint32_t)-1; + + ctx = i_new(struct solr_fts_backend_build_context, 1); + ctx->ctx.backend = backend; + ctx->post = solr_connection_post_begin(solr_conn); + ctx->cmd = str_new(default_pool, 256); + + mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status); + ctx->uid_validity = status.uidvalidity; + + *ctx_r = &ctx->ctx; + return 0; +} + +static int +fts_backend_solr_build_more(struct fts_backend_build_context *_ctx, + uint32_t uid, const unsigned char *data, + size_t size, bool headers) +{ + struct solr_fts_backend_build_context *ctx = + (struct solr_fts_backend_build_context *)_ctx; + struct mailbox *box = _ctx->backend->box; + string_t *cmd = ctx->cmd; + + /* body comes first, then headers */ + if (ctx->prev_uid != uid) { + /* uid changed */ + str_truncate(cmd, 0); + if (ctx->prev_uid == 0) + str_append(cmd, "<add>"); + else + str_append(cmd, "</field></doc>"); + ctx->prev_uid = uid; + + str_printfa(cmd, "<doc>" + "<field name=\"uid\">%u</field>" + "<field name=\"uidv\">%u</field>", + uid, ctx->uid_validity); + + str_append(cmd, "<field name=\"box\">"); + xml_encode(cmd, box->name); + str_append(cmd, "</field><field name=\"user\">"); + xml_encode(cmd, box->storage->user); + + str_printfa(cmd, "</field><field name=\"id\">%u/%u/", + uid, ctx->uid_validity); + xml_encode(cmd, box->storage->user); + str_append_c(cmd, '/'); + xml_encode(cmd, box->name); + str_append(cmd, "</field>"); + + ctx->headers = headers; + if (headers) { + str_append(cmd, "<field name=\"hdr\">"); + } else { + str_append(cmd, "<field name=\"body\">"); + } + solr_connection_post_more(ctx->post, str_data(cmd), + str_len(cmd)); + } else if (headers && !ctx->headers) { + str_truncate(cmd, 0); + str_append(cmd, "</field><field name=\"hdr\">"); + solr_connection_post_more(ctx->post, str_data(cmd), + str_len(cmd)); + } else { + i_assert(!(!headers && ctx->headers)); + } + + solr_connection_post_more(ctx->post, data, size); + return 0; +} + +static int +fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx) +{ + struct solr_fts_backend_build_context *ctx = + (struct solr_fts_backend_build_context *)_ctx; + int ret = 0; + + if (ctx->prev_uid != 0) { + str_truncate(ctx->cmd, 0); + str_append(ctx->cmd, "</field></doc></add>"); + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + ret = solr_connection_end(ctx->post); + /* commit and wait until the documents we just indexed are + visible to the following search */ + if (solr_connection_post(solr_conn, + "<commit waitFlush=\"false\" " + "waitSearcher=\"true\"/>") < 0) + ret = -1; + } + str_free(&ctx->cmd); + i_free(ctx); + return ret; +} + +static void +fts_backend_solr_expunge(struct fts_backend *backend ATTR_UNUSED, + struct mail *mail) +{ + struct mailbox_status status; + + mailbox_get_status(mail->box, STATUS_UIDVALIDITY, &status); + + T_BEGIN { + string_t *cmd; + + cmd = t_str_new(256); + str_printfa(cmd, "<delete><id>%u/%u/", + mail->uid, status.uidvalidity); + xml_encode(cmd, mail->box->storage->user); + str_append_c(cmd, '/'); + xml_encode(cmd, mail->box->name); + str_append(cmd, "</id></delete>"); + + (void)solr_connection_post(solr_conn, str_c(cmd)); + } T_END; +} + +static void +fts_backend_solr_expunge_finish(struct fts_backend *backend ATTR_UNUSED, + struct mailbox *box ATTR_UNUSED, + bool committed ATTR_UNUSED) +{ +} + +static int fts_backend_solr_lock(struct fts_backend *backend ATTR_UNUSED) +{ + return 1; +} + +static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED) +{ +} + +static int +fts_backend_solr_lookup(struct fts_backend *backend, const char *key, + enum fts_lookup_flags flags, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + struct mailbox_status status; + string_t *str; + + i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0); + + str = t_str_new(256); + str_append(str, "fl=uid&q="); + if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) { + /* body only */ + i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0); + str_append(str, "body:"); + } else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) { + /* header only */ + str_append(str, "hdr:"); + } else { + /* both */ + str_append(str, "any:"); + } + solr_quote_str(str, key); + + mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status); + str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity); + solr_quote_str(str, backend->box->name); + str_append(str, "%20user:"); + solr_quote_str(str, backend->box->storage->user); + + array_clear(maybe_uids); + return solr_connection_select(solr_conn, str_c(str), definite_uids); +} + +struct fts_backend fts_backend_solr = { + MEMBER(name) "solr", + MEMBER(flags) 0, + + { + fts_backend_solr_init, + fts_backend_solr_deinit, + fts_backend_solr_get_last_uid, + fts_backend_solr_build_init, + fts_backend_solr_build_more, + fts_backend_solr_build_deinit, + fts_backend_solr_expunge, + fts_backend_solr_expunge_finish, + fts_backend_solr_lock, + fts_backend_solr_unlock, + fts_backend_solr_lookup, + NULL + } +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/fts-solr-plugin.c Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,16 @@ +/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "fts-solr-plugin.h" + +const char *fts_solr_plugin_version = PACKAGE_VERSION; + +void fts_solr_plugin_init(void) +{ + fts_backend_register(&fts_backend_solr); +} + +void fts_solr_plugin_deinit(void) +{ + fts_backend_unregister(fts_backend_solr.name); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/fts-solr-plugin.h Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,11 @@ +#ifndef FTS_SOLR_PLUGIN_H +#define FTS_SOLR_PLUGIN_H + +#include "fts-api-private.h" + +extern struct fts_backend fts_backend_solr; + +void fts_solr_plugin_init(void); +void fts_solr_plugin_deinit(void); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/schema.xml Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="UTF-8" ?> + +<schema name="dovecot" version="1.1"> + <types> + <!-- IMAP has 32bit unsigned ints but java ints are signed, so use longs --> + <fieldType name="string" class="solr.StrField" omitNorms="true"/> + <fieldType name="long" class="solr.LongField" omitNorms="true"/> + <fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/> + <fieldType name="float" class="solr.FloatField" omitNorms="true"/> + + <fieldType name="text" class="solr.TextField" positionIncrementGap="100"> + <analyzer type="index"> + <tokenizer class="solr.WhitespaceTokenizerFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.WhitespaceTokenizerFactory"/> + <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> + <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> + <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> + </analyzer> + </fieldType> + </types> + + + <fields> + <field name="id" type="string" indexed="true" stored="true" required="true" /> + <field name="uid" type="slong" indexed="true" stored="true" required="true" /> + <field name="uidv" type="long" indexed="true" stored="true" required="true" /> + <field name="box" type="string" indexed="true" stored="true" required="true" /> + <field name="user" type="string" indexed="true" stored="true" required="true" /> + <field name="hdr" type="text" indexed="true" stored="false" /> + <field name="body" type="text" indexed="true" stored="false" /> + <field name="any" type="text" indexed="true" stored="false" multiValued="true" /> + </fields> + + <copyField source="hdr" dest="any" /> + <copyField source="body" dest="any" /> + + <uniqueKey>id</uniqueKey> + <defaultSearchField>any</defaultSearchField> + <solrQueryParser defaultOperator="AND" /> +</schema>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/solr-connection.c Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,454 @@ +/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */ + +/* curl: 7.16.0 curl_multi_timeout */ + +#include "lib.h" +#include "str.h" +#include "strescape.h" +#include "solr-connection.h" + +#include <curl/curl.h> +#include <expat.h> + +enum solr_xml_response_state { + SOLR_XML_RESPONSE_STATE_ROOT, + SOLR_XML_RESPONSE_STATE_RESPONSE, + SOLR_XML_RESPONSE_STATE_RESULT, + SOLR_XML_RESPONSE_STATE_DOC, + SOLR_XML_RESPONSE_STATE_CONTENT +}; + +enum solr_xml_content_state { + SOLR_XML_CONTENT_STATE_NONE = 0, + SOLR_XML_CONTENT_STATE_UID, + SOLR_XML_CONTENT_STATE_SCORE +}; + +struct solr_lookup_xml_context { + enum solr_xml_response_state state; + enum solr_xml_content_state content_state; + int depth; + + ARRAY_TYPE(seq_range) *uids; +}; + +struct solr_connection_post { + struct solr_connection *conn; + const unsigned char *data; + size_t size, pos; + + unsigned int failed:1; +}; + +struct solr_connection { + CURL *curl; + CURLM *curlm; + + char curl_errorbuf[CURL_ERROR_SIZE]; + struct curl_slist *headers, *headers_post; + XML_Parser xml_parser; + + char *url; + + unsigned int debug:1; + unsigned int posting:1; + unsigned int xml_failed:1; +}; + +static void +solr_conn_init_settings(struct solr_connection *conn, const char *str) +{ + const char *const *tmp; + + if (str == NULL) + return; + + for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) { + if (strncmp(*tmp, "url=", 4) == 0) { + i_free(conn->url); + conn->url = i_strdup(*tmp + 4); + } else if (strcmp(*tmp, "debug") == 0) { + conn->debug = TRUE; + } else { + i_fatal("fts_solr: Invalid setting: %s", *tmp); + } + } + if (conn->url == NULL) + i_fatal("fts_solr: url setting missing"); +} + +static size_t +curl_output_func(void *data, size_t element_size, size_t nmemb, void *context) +{ + struct solr_connection_post *post = context; + size_t size = element_size * nmemb; + + /* @UNSAFE */ + if (size > post->size - post->pos) + size = post->size - post->pos; + + memcpy(data, post->data + post->pos, size); + post->pos += size; + return size; +} + +static int solr_xml_parse(struct solr_connection *conn, + const void *data, size_t size, bool done) +{ + enum XML_Error err; + int line; + + if (conn->xml_failed) + return -1; + + if (XML_Parse(conn->xml_parser, data, size, done)) + return 0; + + err = XML_GetErrorCode(conn->xml_parser); + if (err != XML_ERROR_FINISHED) { + line = XML_GetCurrentLineNumber(conn->xml_parser); + i_error("fts_solr: Invalid XML input at line %d: %s", + line, XML_ErrorString(err)); + conn->xml_failed = TRUE; + return -1; + } + return 0; +} + +static size_t +curl_input_func(void *data, size_t element_size, size_t nmemb, void *context) +{ + struct solr_connection *conn = context; + size_t size = element_size * nmemb; + + (void)solr_xml_parse(conn, data, size, FALSE); + return size; +} + +struct solr_connection *solr_connection_init(const char *settings) +{ + struct solr_connection *conn; + + conn = i_new(struct solr_connection, 1); + solr_conn_init_settings(conn, settings); + + conn->curlm = curl_multi_init(); + conn->curl = curl_easy_init(); + if (conn->curl == NULL || conn->curlm == NULL) { + i_fatal_status(FATAL_OUTOFMEM, + "fts_solr: Failed to allocate curl"); + } + + /* set global curl options */ + curl_easy_setopt(conn->curl, CURLOPT_ERRORBUFFER, conn->curl_errorbuf); + if (conn->debug) + curl_easy_setopt(conn->curl, CURLOPT_VERBOSE, 1L); + + curl_easy_setopt(conn->curl, CURLOPT_NOPROGRESS, 1L); + curl_easy_setopt(conn->curl, CURLOPT_NOSIGNAL, 1L); + curl_easy_setopt(conn->curl, CURLOPT_READFUNCTION, curl_output_func); + curl_easy_setopt(conn->curl, CURLOPT_WRITEFUNCTION, curl_input_func); + curl_easy_setopt(conn->curl, CURLOPT_WRITEDATA, conn); + + conn->headers = curl_slist_append(NULL, "Content-Type: text/xml"); + conn->headers_post = curl_slist_append(NULL, "Content-Type: text/xml"); + conn->headers_post = curl_slist_append(conn->headers_post, + "Transfer-Encoding: chunked"); + conn->headers_post = curl_slist_append(conn->headers_post, + "Expect:"); + curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers); + + conn->xml_parser = XML_ParserCreate("UTF-8"); + if (conn->xml_parser == NULL) { + i_fatal_status(FATAL_OUTOFMEM, + "fts_solr: Failed to allocate XML parser"); + } + return conn; +} + +void solr_connection_deinit(struct solr_connection *conn) +{ + curl_slist_free_all(conn->headers); + curl_slist_free_all(conn->headers_post); + curl_multi_cleanup(conn->curlm); + curl_easy_cleanup(conn->curl); + i_free(conn->url); + i_free(conn); +} + +void solr_connection_quote_str(struct solr_connection *conn, string_t *dest, + const char *str) +{ + char *encoded; + + encoded = curl_easy_escape(conn->curl, str_escape(str), 0); + str_printfa(dest, "%%22%s%%22", encoded); + curl_free(encoded); +} + +static const char *attrs_get_name(const char **attrs) +{ + for (; *attrs != NULL; attrs += 2) { + if (strcmp(attrs[0], "name") == 0) + return attrs[1]; + } + return ""; +} + +static void +solr_lookup_xml_start(void *context, const char *name, const char **attrs) +{ + struct solr_lookup_xml_context *ctx = context; + const char *name_attr; + + i_assert(ctx->depth >= (int)ctx->state); + + ctx->depth++; + if (ctx->depth - 1 > (int)ctx->state) { + /* skipping over unwanted elements */ + return; + } + + /* response -> result -> doc */ + switch (ctx->state) { + case SOLR_XML_RESPONSE_STATE_ROOT: + if (strcmp(name, "response") == 0) + ctx->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESPONSE: + if (strcmp(name, "result") == 0) + ctx->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESULT: + if (strcmp(name, "doc") == 0) + ctx->state++; + break; + case SOLR_XML_RESPONSE_STATE_DOC: + name_attr = attrs_get_name(attrs); + if (strcmp(name_attr, "uid") == 0) + ctx->content_state = SOLR_XML_CONTENT_STATE_UID; + else if (strcmp(name_attr, "score") == 0) + ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE; + else + break; + ctx->state++; + break; + case SOLR_XML_RESPONSE_STATE_CONTENT: + break; + } +} + +static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED) +{ + struct solr_lookup_xml_context *ctx = context; + + i_assert(ctx->depth >= (int)ctx->state); + + if (ctx->depth == (int)ctx->state) { + ctx->state--; + ctx->content_state = SOLR_XML_CONTENT_STATE_NONE; + } + ctx->depth--; +} + +static void solr_lookup_xml_data(void *context, const char *str, int len) +{ + struct solr_lookup_xml_context *ctx = context; + uint32_t uid; + int i; + + switch (ctx->content_state) { + case SOLR_XML_CONTENT_STATE_NONE: + break; + case SOLR_XML_CONTENT_STATE_UID: + for (i = 0, uid = 0; i < len; i++) { + if (str[i] < '0' || str[i] > '9') + break; + uid = uid*10 + str[i]-'0'; + } + if (i != len) { + i_error("fts_solr: received invalid uid"); + break; + } + seq_range_array_add(ctx->uids, 0, uid); + break; + case SOLR_XML_CONTENT_STATE_SCORE: + /* FIXME */ + break; + } +} + +int solr_connection_select(struct solr_connection *conn, const char *query, + ARRAY_TYPE(seq_range) *uids) +{ + struct solr_lookup_xml_context solr_lookup_context; + string_t *str; + CURLcode ret; + + i_assert(!conn->posting); + + memset(&solr_lookup_context, 0, sizeof(solr_lookup_context)); + solr_lookup_context.uids = uids; + + conn->xml_failed = FALSE; + XML_SetElementHandler(conn->xml_parser, + solr_lookup_xml_start, solr_lookup_xml_end); + XML_SetCharacterDataHandler(conn->xml_parser, solr_lookup_xml_data); + XML_SetUserData(conn->xml_parser, &solr_lookup_context); + + str = t_str_new(256); + str_append(str, conn->url); + str_append(str, "select?"); + str_append(str, query); + + curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str)); + ret = curl_easy_perform(conn->curl); + if (ret != 0) { + i_error("fts_solr: HTTP GET failed: %s", + conn->curl_errorbuf); + return -1; + } + return solr_xml_parse(conn, NULL, 0, TRUE); +} + +struct solr_connection_post * +solr_connection_post_begin(struct solr_connection *conn) +{ + struct solr_connection_post *post; + CURLMcode merr; + string_t *str; + + post = i_new(struct solr_connection_post, 1); + post->conn = conn; + + i_assert(!conn->posting); + conn->posting = TRUE; + + curl_easy_setopt(conn->curl, CURLOPT_READDATA, post); + merr = curl_multi_add_handle(conn->curlm, conn->curl); + if (merr != CURLM_OK) { + i_error("fts_solr: curl_multi_add_handle() failed: %s", + curl_multi_strerror(merr)); + post->failed = TRUE; + } else { + str = t_str_new(256); + str_append(str, conn->url); + str_append(str, "update"); + + curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str)); + curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, + conn->headers_post); + curl_easy_setopt(conn->curl, CURLOPT_POST, (long)1); + } + return post; +} + +void solr_connection_post_more(struct solr_connection_post *post, + const unsigned char *data, size_t size) +{ + fd_set fdread; + fd_set fdwrite; + fd_set fdexcep; + struct timeval timeout_tv; + long timeout; + CURLMcode merr; + int ret, handles, maxfd; + + i_assert(post->conn->posting); + + if (post->failed) + return; + + post->data = data; + post->size = size; + post->pos = 0; + + for (;;) { + merr = curl_multi_perform(post->conn->curlm, &handles); + if (merr == CURLM_CALL_MULTI_PERFORM) + continue; + if (merr != CURLM_OK) { + i_error("fts_solr: curl_multi_perform() failed: %s", + curl_multi_strerror(merr)); + break; + } + if ((post->pos == post->size && post->size != 0) || + (handles == 0 && post->size == 0)) { + /* everything sent successfully */ + return; + } + + /* everything wasn't sent - wait. just use select, + since libcurl interface is easiest with it. */ + FD_ZERO(&fdread); + FD_ZERO(&fdwrite); + FD_ZERO(&fdexcep); + + merr = curl_multi_fdset(post->conn->curlm, &fdread, &fdwrite, + &fdexcep, &maxfd); + if (merr != CURLM_OK) { + i_error("fts_solr: curl_multi_fdset() failed: %s", + curl_multi_strerror(merr)); + break; + } + i_assert(maxfd >= 0); + + merr = curl_multi_timeout(post->conn->curlm, &timeout); + if (merr != CURLM_OK) { + i_error("fts_solr: curl_multi_timeout() failed: %s", + curl_multi_strerror(merr)); + break; + } + + if (timeout < 0) { + timeout_tv.tv_sec = 1; + timeout_tv.tv_usec = 0; + } else { + timeout_tv.tv_sec = timeout / 1000; + timeout_tv.tv_usec = (timeout % 1000) * 1000; + } + ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout_tv); + if (ret < 0) { + i_error("fts_solr: select() failed: %m"); + break; + } + } + post->failed = TRUE; +} + +int solr_connection_end(struct solr_connection_post *post) +{ + struct solr_connection *conn = post->conn; + long httpret; + int ret = post->failed ? -1 : 0; + + i_assert(conn->posting); + + solr_connection_post_more(post, NULL, 0); + + curl_easy_getinfo(post->conn->curl, CURLINFO_RESPONSE_CODE, &httpret); + if (httpret != 200 && ret == 0) { + i_error("fts_solr: Indexing failed with %ld", httpret); + ret = -1; + } + + curl_easy_setopt(conn->curl, CURLOPT_READDATA, NULL); + curl_easy_setopt(conn->curl, CURLOPT_POST, (long)0); + curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers); + + (void)curl_multi_remove_handle(conn->curlm, conn->curl); + i_free(post); + + conn->posting = FALSE; + return ret; +} + +int solr_connection_post(struct solr_connection *conn, const char *cmd) +{ + struct solr_connection_post *post; + + post = solr_connection_post_begin(conn); + solr_connection_post_more(post, (const unsigned char *)cmd, + strlen(cmd)); + return solr_connection_end(post); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-solr/solr-connection.h Fri Jul 11 01:44:13 2008 +0530 @@ -0,0 +1,22 @@ +#ifndef SOLR_CONNECTION_H +#define SOLR_CONNECTION_H + +#include "seq-range-array.h" + +struct solr_connection *solr_connection_init(const char *settings); +void solr_connection_deinit(struct solr_connection *conn); + +void solr_connection_quote_str(struct solr_connection *conn, string_t *dest, + const char *str); + +int solr_connection_select(struct solr_connection *conn, const char *query, + ARRAY_TYPE(seq_range) *uids); +int solr_connection_post(struct solr_connection *conn, const char *cmd); + +struct solr_connection_post * +solr_connection_post_begin(struct solr_connection *conn); +void solr_connection_post_more(struct solr_connection_post *post, + const unsigned char *data, size_t size); +int solr_connection_end(struct solr_connection_post *post); + +#endif
--- a/src/plugins/fts/fts-storage.c Thu Jul 10 22:38:31 2008 +0530 +++ b/src/plugins/fts/fts-storage.c Fri Jul 11 01:44:13 2008 +0530 @@ -188,7 +188,7 @@ if (fts_backend_build_init(backend, &last_uid_locked, &build) < 0) return -1; - if (last_uid != last_uid_locked) { + if (last_uid != last_uid_locked && last_uid_locked != (uint32_t)-1) { /* changed, need to get again the sequences */ i_assert(last_uid < last_uid_locked);