changeset 7990:662172573fe1 HEAD

Initial code to support Apache Solr (Lucene indexing server).
author Timo Sirainen <tss@iki.fi>
date Fri, 11 Jul 2008 01:44:13 +0530
parents 9369c0190ef0
children b6c6070a38ac
files configure.in src/plugins/Makefile.am src/plugins/fts-solr/Makefile.am src/plugins/fts-solr/fts-backend-solr.c src/plugins/fts-solr/fts-solr-plugin.c src/plugins/fts-solr/fts-solr-plugin.h src/plugins/fts-solr/schema.xml src/plugins/fts-solr/solr-connection.c src/plugins/fts-solr/solr-connection.h src/plugins/fts/fts-storage.c
diffstat 10 files changed, 930 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/configure.in	Thu Jul 10 22:38:31 2008 +0530
+++ b/configure.in	Fri Jul 11 01:44:13 2008 +0530
@@ -263,6 +263,15 @@
 	want_lucene=no)
 AM_CONDITIONAL(BUILD_LUCENE, test "$want_lucene" = "yes")
 
+AC_ARG_WITH(solr,
+[  --with-solr             Build with Solr full text search support],
+	if test x$withval = xno || test x$withval = xauto; then
+		want_solr=$withval
+	else
+		want_solr=yes
+	fi,
+	want_solr=no)
+
 AC_ARG_WITH(ssl,
 [  --with-ssl=gnutls|openssl Build with GNUTLS or OpenSSL (default)],
 	if test x$withval = xno; then
@@ -2204,6 +2213,36 @@
 fi
 AM_CONDITIONAL(HAVE_RQUOTA, test "$have_rquota" = "yes")
 
+if test "$want_solr" != "no"; then
+  AC_CHECK_PROG(CURLCONFIG, curl-config, YES, NO)
+  if test $CURLCONFIG = YES; then
+    CURL_CFLAGS=`curl-config --cflags`
+    CURL_LIBS=`curl-config --libs`
+    
+    dnl libcurl found, also need libexpat
+    AC_CHECK_LIB(expat, XML_Parse, [
+      AC_CHECK_HEADER(expat.h, [
+        AC_SUBST(CURL_CFLAGS)
+        AC_SUBST(CURL_LIBS)
+	have_solr=yes
+      ], [
+	if test $want_solr = yes; then
+	  AC_ERROR([Can't build with Solr support: expat.h not found])
+	fi
+      ])
+    ], [
+      if test $want_solr = yes; then
+	AC_ERROR([Can't build with Solr support: libexpat not found])
+      fi
+    ])
+  else
+    if test $want_solr = yes; then
+      AC_ERROR([Can't build with Solr support: curl-config not found])
+    fi
+  fi
+fi
+AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes")
+
 dnl **
 dnl ** capabilities
 dnl **
@@ -2264,6 +2303,7 @@
 src/plugins/expire/Makefile
 src/plugins/fts/Makefile
 src/plugins/fts-lucene/Makefile
+src/plugins/fts-solr/Makefile
 src/plugins/fts-squat/Makefile
 src/plugins/lazy-expunge/Makefile
 src/plugins/mail-log/Makefile
--- a/src/plugins/Makefile.am	Thu Jul 10 22:38:31 2008 +0530
+++ b/src/plugins/Makefile.am	Fri Jul 11 01:44:13 2008 +0530
@@ -6,7 +6,11 @@
 FTS_LUCENE = fts-lucene
 endif
 
+if BUILD_SOLR
+FTS_LUCENE = fts-solr
+endif
+
 SUBDIRS = \
 	acl convert expire fts fts-squat lazy-expunge mail-log mbox-snarf \
 	quota imap-quota trash virtual \
-	$(ZLIB) $(FTS_LUCENE)
+	$(ZLIB) $(FTS_LUCENE) $(FTS_SOLR)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/Makefile.am	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,32 @@
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-mail \
+	-I$(top_srcdir)/src/lib-index \
+	-I$(top_srcdir)/src/lib-storage \
+	-I$(top_srcdir)/src/plugins/fts
+
+lib21_fts_solr_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+	lib21_fts_solr_plugin.la
+
+lib21_fts_solr_plugin_la_LIBADD = \
+	$(CURL_CFLAGS) -lexpat
+
+lib21_fts_solr_plugin_la_SOURCES = \
+	fts-backend-solr.c \
+	fts-solr-plugin.c \
+	solr-connection.c
+
+noinst_HEADERS = \
+	fts-solr-plugin.h \
+	solr-connection.h
+
+EXTRA_DIST = schema.xml
+
+install-exec-local:
+	for d in imap lda; do \
+	  $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
+	  rm -f $(DESTDIR)$(moduledir)/$$d/lib21_fts_solr_plugin$(MODULE_SUFFIX); \
+	  $(LN_S) ../lib21_fts_solr_plugin$(MODULE_SUFFIX) $(DESTDIR)$(moduledir)/$$d; \
+	done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,299 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "str.h"
+#include "mail-storage-private.h"
+#include "solr-connection.h"
+#include "fts-solr-plugin.h"
+
+#include <stdlib.h>
+#include <curl/curl.h>
+
+struct solr_fts_backend_build_context {
+	struct fts_backend_build_context ctx;
+
+	struct solr_connection_post *post;
+	uint32_t prev_uid, uid_validity;
+	string_t *cmd;
+	bool headers;
+};
+
+static struct solr_connection *solr_conn = NULL;
+
+static void solr_quote_str(string_t *dest, const char *str)
+{
+	solr_connection_quote_str(solr_conn, dest, str);
+}
+
+static void xml_encode(string_t *dest, const char *str)
+{
+	for (; *str != '\0'; str++) {
+		switch (*str) {
+		case '&':
+			str_append(dest, "&amp;");
+			break;
+		case '<':
+			str_append(dest, "&lt;");
+			break;
+		case '>':
+			str_append(dest, "&gt;");
+			break;
+		default:
+			str_append_c(dest, *str);
+			break;
+		}
+	}
+}
+
+static struct fts_backend *
+fts_backend_solr_init(struct mailbox *box ATTR_UNUSED)
+{
+	struct fts_backend *backend;
+
+	if (solr_conn == NULL)
+		solr_conn = solr_connection_init(getenv("FTS_SOLR"));
+
+	backend = i_new(struct fts_backend, 1);
+	*backend = fts_backend_solr;
+	return backend;
+}
+
+static void fts_backend_solr_deinit(struct fts_backend *backend)
+{
+	i_free(backend);
+}
+
+static int fts_backend_solr_get_last_uid(struct fts_backend *backend,
+					 uint32_t *last_uid_r)
+{
+	struct mailbox_status status;
+	ARRAY_TYPE(seq_range) uids;
+	const struct seq_range *uidvals;
+	unsigned int count;
+	string_t *str;
+
+	str = t_str_new(256);
+	str_append(str, "fl=uid&rows=1&sort=uid%20desc&q=");
+
+	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+	str_printfa(str, "uidv:%u%%20box:", status.uidvalidity);
+	solr_quote_str(str, backend->box->name);
+	str_append(str, "%20user:");
+	solr_quote_str(str, backend->box->storage->user);
+
+	t_array_init(&uids, 1);
+	if (solr_connection_select(solr_conn, str_c(str), &uids) < 0)
+		return -1;
+
+	uidvals = array_get(&uids, &count);
+	if (count == 0) {
+		/* nothing indexed yet for this mailbox */
+		*last_uid_r = 0;
+	} else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) {
+		*last_uid_r = uidvals[0].seq1;
+	} else {
+		i_error("fts_solr: Last UID lookup returned multiple rows");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r,
+			    struct fts_backend_build_context **ctx_r)
+{
+	struct solr_fts_backend_build_context *ctx;
+	struct mailbox_status status;
+
+	*last_uid_r = (uint32_t)-1;
+
+	ctx = i_new(struct solr_fts_backend_build_context, 1);
+	ctx->ctx.backend = backend;
+	ctx->post = solr_connection_post_begin(solr_conn);
+	ctx->cmd = str_new(default_pool, 256);
+
+	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+	ctx->uid_validity = status.uidvalidity;
+
+	*ctx_r = &ctx->ctx;
+	return 0;
+}
+
+static int
+fts_backend_solr_build_more(struct fts_backend_build_context *_ctx,
+			    uint32_t uid, const unsigned char *data,
+			    size_t size, bool headers)
+{
+	struct solr_fts_backend_build_context *ctx =
+		(struct solr_fts_backend_build_context *)_ctx;
+	struct mailbox *box = _ctx->backend->box;
+	string_t *cmd = ctx->cmd;
+
+	/* body comes first, then headers */
+	if (ctx->prev_uid != uid) {
+		/* uid changed */
+		str_truncate(cmd, 0);
+		if (ctx->prev_uid == 0)
+			str_append(cmd, "<add>");
+		else
+			str_append(cmd, "</field></doc>");
+		ctx->prev_uid = uid;
+
+		str_printfa(cmd, "<doc>"
+			    "<field name=\"uid\">%u</field>"
+			    "<field name=\"uidv\">%u</field>",
+			    uid, ctx->uid_validity);
+
+		str_append(cmd, "<field name=\"box\">");
+		xml_encode(cmd, box->name);
+		str_append(cmd, "</field><field name=\"user\">");
+		xml_encode(cmd, box->storage->user);
+
+		str_printfa(cmd, "</field><field name=\"id\">%u/%u/",
+			    uid, ctx->uid_validity);
+		xml_encode(cmd, box->storage->user);
+		str_append_c(cmd, '/');
+		xml_encode(cmd, box->name);
+		str_append(cmd, "</field>");
+
+		ctx->headers = headers;
+		if (headers) {
+			str_append(cmd, "<field name=\"hdr\">");
+		} else {
+			str_append(cmd, "<field name=\"body\">");
+		}
+		solr_connection_post_more(ctx->post, str_data(cmd),
+					  str_len(cmd));
+	} else if (headers && !ctx->headers) {
+		str_truncate(cmd, 0);
+		str_append(cmd, "</field><field name=\"hdr\">");
+		solr_connection_post_more(ctx->post, str_data(cmd),
+					  str_len(cmd));
+	} else {
+		i_assert(!(!headers && ctx->headers));
+	}
+
+	solr_connection_post_more(ctx->post, data, size);
+	return 0;
+}
+
+static int
+fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx)
+{
+	struct solr_fts_backend_build_context *ctx =
+		(struct solr_fts_backend_build_context *)_ctx;
+	int ret = 0;
+
+	if (ctx->prev_uid != 0) {
+		str_truncate(ctx->cmd, 0);
+		str_append(ctx->cmd, "</field></doc></add>");
+		solr_connection_post_more(ctx->post, str_data(ctx->cmd),
+					  str_len(ctx->cmd));
+		ret = solr_connection_end(ctx->post);
+		/* commit and wait until the documents we just indexed are
+		   visible to the following search */
+		if (solr_connection_post(solr_conn,
+					 "<commit waitFlush=\"false\" "
+					 "waitSearcher=\"true\"/>") < 0)
+			ret = -1;
+	}
+	str_free(&ctx->cmd);
+	i_free(ctx);
+	return ret;
+}
+
+static void
+fts_backend_solr_expunge(struct fts_backend *backend ATTR_UNUSED,
+			 struct mail *mail)
+{
+	struct mailbox_status status;
+
+	mailbox_get_status(mail->box, STATUS_UIDVALIDITY, &status);
+
+	T_BEGIN {
+		string_t *cmd;
+
+		cmd = t_str_new(256);
+		str_printfa(cmd, "<delete><id>%u/%u/",
+			    mail->uid, status.uidvalidity);
+		xml_encode(cmd, mail->box->storage->user);
+		str_append_c(cmd, '/');
+		xml_encode(cmd, mail->box->name);
+		str_append(cmd, "</id></delete>");
+
+		(void)solr_connection_post(solr_conn, str_c(cmd));
+	} T_END;
+}
+
+static void
+fts_backend_solr_expunge_finish(struct fts_backend *backend ATTR_UNUSED,
+				struct mailbox *box ATTR_UNUSED,
+				bool committed ATTR_UNUSED)
+{
+}
+
+static int fts_backend_solr_lock(struct fts_backend *backend ATTR_UNUSED)
+{
+	return 1;
+}
+
+static void fts_backend_solr_unlock(struct fts_backend *backend ATTR_UNUSED)
+{
+}
+
+static int
+fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
+			enum fts_lookup_flags flags,
+			ARRAY_TYPE(seq_range) *definite_uids,
+			ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	struct mailbox_status status;
+	string_t *str;
+
+	i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
+
+	str = t_str_new(256);
+	str_append(str, "fl=uid&q=");
+	if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
+		/* body only */
+		i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
+		str_append(str, "body:");
+	} else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
+		/* header only */
+		str_append(str, "hdr:");
+	} else {
+		/* both */
+		str_append(str, "any:");
+	}
+	solr_quote_str(str, key);
+
+	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
+	str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
+	solr_quote_str(str, backend->box->name);
+	str_append(str, "%20user:");
+	solr_quote_str(str, backend->box->storage->user);
+
+	array_clear(maybe_uids);
+	return solr_connection_select(solr_conn, str_c(str), definite_uids);
+}
+
+struct fts_backend fts_backend_solr = {
+	MEMBER(name) "solr",
+	MEMBER(flags) 0,
+
+	{
+		fts_backend_solr_init,
+		fts_backend_solr_deinit,
+		fts_backend_solr_get_last_uid,
+		fts_backend_solr_build_init,
+		fts_backend_solr_build_more,
+		fts_backend_solr_build_deinit,
+		fts_backend_solr_expunge,
+		fts_backend_solr_expunge_finish,
+		fts_backend_solr_lock,
+		fts_backend_solr_unlock,
+		fts_backend_solr_lookup,
+		NULL
+	}
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/fts-solr-plugin.c	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,16 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "fts-solr-plugin.h"
+
+const char *fts_solr_plugin_version = PACKAGE_VERSION;
+
+void fts_solr_plugin_init(void)
+{
+	fts_backend_register(&fts_backend_solr);
+}
+
+void fts_solr_plugin_deinit(void)
+{
+	fts_backend_unregister(fts_backend_solr.name);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/fts-solr-plugin.h	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,11 @@
+#ifndef FTS_SOLR_PLUGIN_H
+#define FTS_SOLR_PLUGIN_H
+
+#include "fts-api-private.h"
+
+extern struct fts_backend fts_backend_solr;
+
+void fts_solr_plugin_init(void);
+void fts_solr_plugin_deinit(void);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/schema.xml	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+
+<schema name="dovecot" version="1.1">
+  <types>
+    <!-- IMAP has 32bit unsigned ints but java ints are signed, so use longs -->
+    <fieldType name="string" class="solr.StrField" omitNorms="true"/>
+    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
+    <fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/>
+    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
+
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+ </types>
+
+
+ <fields>
+   <field name="id" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="uid" type="slong" indexed="true" stored="true" required="true" /> 
+   <field name="uidv" type="long" indexed="true" stored="true" required="true" /> 
+   <field name="box" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="user" type="string" indexed="true" stored="true" required="true" /> 
+   <field name="hdr" type="text" indexed="true" stored="false" /> 
+   <field name="body" type="text" indexed="true" stored="false" /> 
+   <field name="any" type="text" indexed="true" stored="false" multiValued="true" />
+ </fields>
+
+ <copyField source="hdr" dest="any" />
+ <copyField source="body" dest="any" />
+
+ <uniqueKey>id</uniqueKey>
+ <defaultSearchField>any</defaultSearchField>
+ <solrQueryParser defaultOperator="AND" />
+</schema>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/solr-connection.c	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,454 @@
+/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
+
+/* curl: 7.16.0 curl_multi_timeout */
+
+#include "lib.h"
+#include "str.h"
+#include "strescape.h"
+#include "solr-connection.h"
+
+#include <curl/curl.h>
+#include <expat.h>
+
+enum solr_xml_response_state {
+	SOLR_XML_RESPONSE_STATE_ROOT,
+	SOLR_XML_RESPONSE_STATE_RESPONSE,
+	SOLR_XML_RESPONSE_STATE_RESULT,
+	SOLR_XML_RESPONSE_STATE_DOC,
+	SOLR_XML_RESPONSE_STATE_CONTENT
+};
+
+enum solr_xml_content_state {
+	SOLR_XML_CONTENT_STATE_NONE = 0,
+	SOLR_XML_CONTENT_STATE_UID,
+	SOLR_XML_CONTENT_STATE_SCORE
+};
+
+struct solr_lookup_xml_context {
+	enum solr_xml_response_state state;
+	enum solr_xml_content_state content_state;
+	int depth;
+
+	ARRAY_TYPE(seq_range) *uids;
+};
+
+struct solr_connection_post {
+	struct solr_connection *conn;
+	const unsigned char *data;
+	size_t size, pos;
+
+	unsigned int failed:1;
+};
+
+struct solr_connection {
+	CURL *curl;
+	CURLM *curlm;
+
+	char curl_errorbuf[CURL_ERROR_SIZE];
+	struct curl_slist *headers, *headers_post;
+	XML_Parser xml_parser;
+
+	char *url;
+
+	unsigned int debug:1;
+	unsigned int posting:1;
+	unsigned int xml_failed:1;
+};
+
+static void
+solr_conn_init_settings(struct solr_connection *conn, const char *str)
+{
+	const char *const *tmp;
+
+	if (str == NULL)
+		return;
+
+	for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) {
+		if (strncmp(*tmp, "url=", 4) == 0) {
+			i_free(conn->url);
+			conn->url = i_strdup(*tmp + 4);
+		} else if (strcmp(*tmp, "debug") == 0) {
+			conn->debug = TRUE;
+		} else {
+			i_fatal("fts_solr: Invalid setting: %s", *tmp);
+		}
+	}
+	if (conn->url == NULL)
+		i_fatal("fts_solr: url setting missing");
+}
+
+static size_t
+curl_output_func(void *data, size_t element_size, size_t nmemb, void *context)
+{
+	struct solr_connection_post *post = context;
+	size_t size = element_size * nmemb;
+
+	/* @UNSAFE */
+	if (size > post->size - post->pos)
+		size = post->size - post->pos;
+
+	memcpy(data, post->data + post->pos, size);
+	post->pos += size;
+	return size;
+}
+
+static int solr_xml_parse(struct solr_connection *conn,
+			  const void *data, size_t size, bool done)
+{
+	enum XML_Error err;
+	int line;
+
+	if (conn->xml_failed)
+		return -1;
+
+	if (XML_Parse(conn->xml_parser, data, size, done))
+		return 0;
+
+	err = XML_GetErrorCode(conn->xml_parser);
+	if (err != XML_ERROR_FINISHED) {
+		line = XML_GetCurrentLineNumber(conn->xml_parser);
+		i_error("fts_solr: Invalid XML input at line %d: %s",
+			line, XML_ErrorString(err));
+		conn->xml_failed = TRUE;
+		return -1;
+	}
+	return 0;
+}
+
+static size_t
+curl_input_func(void *data, size_t element_size, size_t nmemb, void *context)
+{
+	struct solr_connection *conn = context;
+	size_t size = element_size * nmemb;
+
+	(void)solr_xml_parse(conn, data, size, FALSE);
+	return size;
+}
+
+struct solr_connection *solr_connection_init(const char *settings)
+{
+	struct solr_connection *conn;
+
+	conn = i_new(struct solr_connection, 1);
+	solr_conn_init_settings(conn, settings);
+
+	conn->curlm = curl_multi_init();
+	conn->curl = curl_easy_init();
+	if (conn->curl == NULL || conn->curlm == NULL) {
+		i_fatal_status(FATAL_OUTOFMEM,
+			       "fts_solr: Failed to allocate curl");
+	}
+
+	/* set global curl options */
+	curl_easy_setopt(conn->curl, CURLOPT_ERRORBUFFER, conn->curl_errorbuf);
+	if (conn->debug)
+		curl_easy_setopt(conn->curl, CURLOPT_VERBOSE, 1L);
+
+	curl_easy_setopt(conn->curl, CURLOPT_NOPROGRESS, 1L);
+	curl_easy_setopt(conn->curl, CURLOPT_NOSIGNAL, 1L);
+	curl_easy_setopt(conn->curl, CURLOPT_READFUNCTION, curl_output_func);
+	curl_easy_setopt(conn->curl, CURLOPT_WRITEFUNCTION, curl_input_func);
+	curl_easy_setopt(conn->curl, CURLOPT_WRITEDATA, conn);
+
+	conn->headers = curl_slist_append(NULL, "Content-Type: text/xml");
+	conn->headers_post = curl_slist_append(NULL, "Content-Type: text/xml");
+	conn->headers_post = curl_slist_append(conn->headers_post,
+					       "Transfer-Encoding: chunked");
+	conn->headers_post = curl_slist_append(conn->headers_post,
+					       "Expect:");
+	curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers);
+
+	conn->xml_parser = XML_ParserCreate("UTF-8");
+	if (conn->xml_parser == NULL) {
+		i_fatal_status(FATAL_OUTOFMEM,
+			       "fts_solr: Failed to allocate XML parser");
+	}
+	return conn;
+}
+
+void solr_connection_deinit(struct solr_connection *conn)
+{
+	curl_slist_free_all(conn->headers);
+	curl_slist_free_all(conn->headers_post);
+	curl_multi_cleanup(conn->curlm);
+	curl_easy_cleanup(conn->curl);
+	i_free(conn->url);
+	i_free(conn);
+}
+
+void solr_connection_quote_str(struct solr_connection *conn, string_t *dest,
+			       const char *str)
+{
+	char *encoded;
+
+	encoded = curl_easy_escape(conn->curl, str_escape(str), 0);
+	str_printfa(dest, "%%22%s%%22", encoded);
+	curl_free(encoded);
+}
+
+static const char *attrs_get_name(const char **attrs)
+{
+	for (; *attrs != NULL; attrs += 2) {
+		if (strcmp(attrs[0], "name") == 0)
+			return attrs[1];
+	}
+	return "";
+}
+
+static void
+solr_lookup_xml_start(void *context, const char *name, const char **attrs)
+{
+	struct solr_lookup_xml_context *ctx = context;
+	const char *name_attr;
+
+	i_assert(ctx->depth >= (int)ctx->state);
+
+	ctx->depth++;
+	if (ctx->depth - 1 > (int)ctx->state) {
+		/* skipping over unwanted elements */
+		return;
+	}
+
+	/* response -> result -> doc */
+	switch (ctx->state) {
+	case SOLR_XML_RESPONSE_STATE_ROOT:
+		if (strcmp(name, "response") == 0)
+			ctx->state++;
+		break;
+	case SOLR_XML_RESPONSE_STATE_RESPONSE:
+		if (strcmp(name, "result") == 0)
+			ctx->state++;
+		break;
+	case SOLR_XML_RESPONSE_STATE_RESULT:
+		if (strcmp(name, "doc") == 0)
+			ctx->state++;
+		break;
+	case SOLR_XML_RESPONSE_STATE_DOC:
+		name_attr = attrs_get_name(attrs);
+		if (strcmp(name_attr, "uid") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
+		else if (strcmp(name_attr, "score") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
+		else 
+			break;
+		ctx->state++;
+		break;
+	case SOLR_XML_RESPONSE_STATE_CONTENT:
+		break;
+	}
+}
+
+static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED)
+{
+	struct solr_lookup_xml_context *ctx = context;
+
+	i_assert(ctx->depth >= (int)ctx->state);
+
+	if (ctx->depth == (int)ctx->state) {
+		ctx->state--;
+		ctx->content_state = SOLR_XML_CONTENT_STATE_NONE;
+	}
+	ctx->depth--;
+}
+
+static void solr_lookup_xml_data(void *context, const char *str, int len)
+{
+	struct solr_lookup_xml_context *ctx = context;
+	uint32_t uid;
+	int i;
+
+	switch (ctx->content_state) {
+	case SOLR_XML_CONTENT_STATE_NONE:
+		break;
+	case SOLR_XML_CONTENT_STATE_UID:
+		for (i = 0, uid = 0; i < len; i++) {
+			if (str[i] < '0' || str[i] > '9')
+				break;
+			uid = uid*10 + str[i]-'0';
+		}
+		if (i != len) {
+			i_error("fts_solr: received invalid uid");
+			break;
+		}
+		seq_range_array_add(ctx->uids, 0, uid);
+		break;
+	case SOLR_XML_CONTENT_STATE_SCORE:
+		/* FIXME */
+		break;
+	}
+}
+
+int solr_connection_select(struct solr_connection *conn, const char *query,
+			   ARRAY_TYPE(seq_range) *uids)
+{
+	struct solr_lookup_xml_context solr_lookup_context;
+	string_t *str;
+	CURLcode ret;
+
+	i_assert(!conn->posting);
+
+	memset(&solr_lookup_context, 0, sizeof(solr_lookup_context));
+	solr_lookup_context.uids = uids;
+
+	conn->xml_failed = FALSE;
+	XML_SetElementHandler(conn->xml_parser,
+			      solr_lookup_xml_start, solr_lookup_xml_end);
+	XML_SetCharacterDataHandler(conn->xml_parser, solr_lookup_xml_data);
+	XML_SetUserData(conn->xml_parser, &solr_lookup_context);
+
+	str = t_str_new(256);
+	str_append(str, conn->url);
+	str_append(str, "select?");
+	str_append(str, query);
+
+	curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str));
+	ret = curl_easy_perform(conn->curl);
+	if (ret != 0) {
+		i_error("fts_solr: HTTP GET failed: %s",
+			conn->curl_errorbuf);
+		return -1;
+	}
+	return solr_xml_parse(conn, NULL, 0, TRUE);
+}
+
+struct solr_connection_post *
+solr_connection_post_begin(struct solr_connection *conn)
+{
+	struct solr_connection_post *post;
+	CURLMcode merr;
+	string_t *str;
+
+	post = i_new(struct solr_connection_post, 1);
+	post->conn = conn;
+
+	i_assert(!conn->posting);
+	conn->posting = TRUE;
+
+	curl_easy_setopt(conn->curl, CURLOPT_READDATA, post);
+	merr = curl_multi_add_handle(conn->curlm, conn->curl);
+	if (merr != CURLM_OK) {
+		i_error("fts_solr: curl_multi_add_handle() failed: %s",
+			curl_multi_strerror(merr));
+		post->failed = TRUE;
+	} else {
+		str = t_str_new(256);
+		str_append(str, conn->url);
+		str_append(str, "update");
+
+		curl_easy_setopt(conn->curl, CURLOPT_URL, str_c(str));
+		curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER,
+				 conn->headers_post);
+		curl_easy_setopt(conn->curl, CURLOPT_POST, (long)1);
+	}
+	return post;
+}
+
+void solr_connection_post_more(struct solr_connection_post *post,
+			       const unsigned char *data, size_t size)
+{
+	fd_set fdread;
+	fd_set fdwrite;
+	fd_set fdexcep;
+	struct timeval timeout_tv;
+	long timeout;
+	CURLMcode merr;
+	int ret, handles, maxfd;
+
+	i_assert(post->conn->posting);
+
+	if (post->failed)
+		return;
+
+	post->data = data;
+	post->size = size;
+	post->pos = 0;
+
+	for (;;) {
+		merr = curl_multi_perform(post->conn->curlm, &handles);
+		if (merr == CURLM_CALL_MULTI_PERFORM)
+			continue;
+		if (merr != CURLM_OK) {
+			i_error("fts_solr: curl_multi_perform() failed: %s",
+				curl_multi_strerror(merr));
+			break;
+		}
+		if ((post->pos == post->size && post->size != 0) ||
+		    (handles == 0 && post->size == 0)) {
+			/* everything sent successfully */
+			return;
+		}
+
+		/* everything wasn't sent - wait. just use select,
+		   since libcurl interface is easiest with it. */
+		FD_ZERO(&fdread);
+		FD_ZERO(&fdwrite);
+		FD_ZERO(&fdexcep);
+
+		merr = curl_multi_fdset(post->conn->curlm, &fdread, &fdwrite,
+					&fdexcep, &maxfd);
+		if (merr != CURLM_OK) {
+			i_error("fts_solr: curl_multi_fdset() failed: %s",
+				curl_multi_strerror(merr));
+			break;
+		}
+		i_assert(maxfd >= 0);
+
+		merr = curl_multi_timeout(post->conn->curlm, &timeout);
+		if (merr != CURLM_OK) {
+			i_error("fts_solr: curl_multi_timeout() failed: %s",
+				curl_multi_strerror(merr));
+			break;
+		}
+
+		if (timeout < 0) {
+			timeout_tv.tv_sec = 1;
+			timeout_tv.tv_usec = 0;
+		} else {
+			timeout_tv.tv_sec = timeout / 1000;
+			timeout_tv.tv_usec = (timeout % 1000) * 1000;
+		}
+		ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout_tv);
+		if (ret < 0) {
+			i_error("fts_solr: select() failed: %m");
+			break;
+		}
+	}
+	post->failed = TRUE;
+}
+
+int solr_connection_end(struct solr_connection_post *post)
+{
+	struct solr_connection *conn = post->conn;
+	long httpret;
+	int ret = post->failed ? -1 : 0;
+
+	i_assert(conn->posting);
+
+	solr_connection_post_more(post, NULL, 0);
+
+	curl_easy_getinfo(post->conn->curl, CURLINFO_RESPONSE_CODE, &httpret);
+	if (httpret != 200 && ret == 0) {
+		i_error("fts_solr: Indexing failed with %ld", httpret);
+		ret = -1;
+	}
+
+	curl_easy_setopt(conn->curl, CURLOPT_READDATA, NULL);
+	curl_easy_setopt(conn->curl, CURLOPT_POST, (long)0);
+	curl_easy_setopt(conn->curl, CURLOPT_HTTPHEADER, conn->headers);
+
+	(void)curl_multi_remove_handle(conn->curlm, conn->curl);
+	i_free(post);
+
+	conn->posting = FALSE;
+	return ret;
+}
+
+int solr_connection_post(struct solr_connection *conn, const char *cmd)
+{
+	struct solr_connection_post *post;
+
+	post = solr_connection_post_begin(conn);
+	solr_connection_post_more(post, (const unsigned char *)cmd,
+				  strlen(cmd));
+	return solr_connection_end(post);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-solr/solr-connection.h	Fri Jul 11 01:44:13 2008 +0530
@@ -0,0 +1,22 @@
+#ifndef SOLR_CONNECTION_H
+#define SOLR_CONNECTION_H
+
+#include "seq-range-array.h"
+
+struct solr_connection *solr_connection_init(const char *settings);
+void solr_connection_deinit(struct solr_connection *conn);
+
+void solr_connection_quote_str(struct solr_connection *conn, string_t *dest,
+			       const char *str);
+
+int solr_connection_select(struct solr_connection *conn, const char *query,
+			   ARRAY_TYPE(seq_range) *uids);
+int solr_connection_post(struct solr_connection *conn, const char *cmd);
+
+struct solr_connection_post *
+solr_connection_post_begin(struct solr_connection *conn);
+void solr_connection_post_more(struct solr_connection_post *post,
+			       const unsigned char *data, size_t size);
+int solr_connection_end(struct solr_connection_post *post);
+
+#endif
--- a/src/plugins/fts/fts-storage.c	Thu Jul 10 22:38:31 2008 +0530
+++ b/src/plugins/fts/fts-storage.c	Fri Jul 11 01:44:13 2008 +0530
@@ -188,7 +188,7 @@
 
 	if (fts_backend_build_init(backend, &last_uid_locked, &build) < 0)
 		return -1;
-	if (last_uid != last_uid_locked) {
+	if (last_uid != last_uid_locked && last_uid_locked != (uint32_t)-1) {
 		/* changed, need to get again the sequences */
 		i_assert(last_uid < last_uid_locked);