changeset 13224:a50ac3a840a9

fts-lucene: Added "doveadm dump" support for lucene index.
author Timo Sirainen <tss@iki.fi>
date Fri, 12 Aug 2011 17:27:05 +0300
parents 271bbac3047d
children 046090da4aaf
files src/plugins/fts-lucene/Makefile.am src/plugins/fts-lucene/doveadm-fts-lucene.c src/plugins/fts-lucene/lucene-wrapper.cc src/plugins/fts-lucene/lucene-wrapper.h
diffstat 4 files changed, 188 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/fts-lucene/Makefile.am	Fri Aug 12 17:40:55 2011 +0300
+++ b/src/plugins/fts-lucene/Makefile.am	Fri Aug 12 17:27:05 2011 +0300
@@ -1,13 +1,16 @@
+doveadm_moduledir = $(moduledir)/doveadm
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/lib \
 	-I$(top_srcdir)/src/lib-mail \
 	-I$(top_srcdir)/src/lib-index \
 	-I$(top_srcdir)/src/lib-storage \
 	-I$(top_srcdir)/src/plugins/fts \
-	-I/usr/lib
+	-I$(top_srcdir)/src/doveadm
 
 NOPLUGIN_LDFLAGS =
 lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
+lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version
 
 module_LTLIBRARIES = \
 	lib21_fts_lucene_plugin.la
@@ -38,3 +41,9 @@
 	textcat.conf
 endif
 EXTRA_DIST = textcat.conf
+
+doveadm_module_LTLIBRARIES = \
+	lib20_doveadm_fts_lucene_plugin.la
+
+lib20_doveadm_fts_lucene_plugin_la_SOURCES = \
+	doveadm-fts-lucene.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c	Fri Aug 12 17:27:05 2011 +0300
@@ -0,0 +1,71 @@
+/* Copyright (c) 2011 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "hex-binary.h"
+#include "doveadm-dump.h"
+#include "doveadm-fts.h"
+#include "lucene-wrapper.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+const char *doveadm_fts_lucene_plugin_version = DOVECOT_VERSION;
+
+void doveadm_fts_lucene_plugin_init(struct module *module);
+void doveadm_fts_lucene_plugin_deinit(void);
+
+static void cmd_dump_fts_lucene(int argc ATTR_UNUSED, char *argv[])
+{
+	struct lucene_index *index;
+	struct lucene_index_iter *iter;
+	mail_guid_128_t prev_guid;
+	const struct lucene_index_record *rec;
+	bool first = TRUE;
+
+	memset(&prev_guid, 0, sizeof(prev_guid));
+	index = lucene_index_init(argv[1], NULL, NULL);
+	iter = lucene_index_iter_init(index);
+	while ((rec = lucene_index_iter_next(iter)) != NULL) {
+		if (memcmp(prev_guid, rec->mailbox_guid,
+			   sizeof(prev_guid)) != 0) {
+			if (first)
+				first = FALSE;
+			else
+				printf("\n");
+			memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid));
+			printf("%s: ", binary_to_hex(prev_guid, sizeof(prev_guid)));
+		}
+		printf("%u,", rec->uid);
+	}
+	printf("\n");
+	if (lucene_index_iter_deinit(&iter) < 0)
+		i_error("Lucene index iteration failed");
+	lucene_index_deinit(index);
+}
+
+static bool test_dump_fts_lucene(const char *path)
+{
+	const char *p;
+	struct stat st;
+
+	p = strrchr(path, '/');
+	if (p++ == NULL)
+		p = path;
+	p = t_strconcat(p, "/segments.gen", NULL);
+	return stat(p, &st) == 0;
+}
+
+struct doveadm_cmd_dump doveadm_cmd_dump_fts_lucene = {
+	"fts-lucene",
+	test_dump_fts_lucene,
+	cmd_dump_fts_lucene
+};
+
+void doveadm_fts_lucene_plugin_init(struct module *module ATTR_UNUSED)
+{
+	doveadm_dump_register(&doveadm_cmd_dump_fts_lucene);
+}
+
+void doveadm_fts_lucene_plugin_deinit(void)
+{
+}
--- a/src/plugins/fts-lucene/lucene-wrapper.cc	Fri Aug 12 17:40:55 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc	Fri Aug 12 17:27:05 2011 +0300
@@ -213,10 +213,9 @@
 }
 
 static int
-lucene_doc_get_uid(struct lucene_index *index, Document *doc,
-		   const TCHAR *field_name, uint32_t *uid_r)
+lucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r)
 {
-	Field *field = doc->getField(field_name);
+	Field *field = doc->getField(_T("uid"));
 	const TCHAR *uid = field == NULL ? NULL : field->stringValue();
 	if (uid == NULL) {
 		i_error("lucene: Corrupted FTS index %s: No UID for document",
@@ -253,7 +252,7 @@
 			uint32_t uid;
 
 			if (lucene_doc_get_uid(index, &hits->doc(i),
-					       _T("uid"), &uid) < 0) {
+					       &uid) < 0) {
 				ret = -1;
 				break;
 			}
@@ -516,25 +515,33 @@
 }
 
 static int
-rescan_open_mailbox(struct rescan_context *ctx, Document *doc)
+fts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc,
+			    mail_guid_128_t *guid_r)
 {
-	int ret;
-
 	Field *field = doc->getField(_T("box"));
 	const TCHAR *box_guid = field == NULL ? NULL : field->stringValue();
 	if (box_guid == NULL) {
 		i_error("lucene: Corrupted FTS index %s: No mailbox for document",
-			ctx->index->path);
-		return 0;
+			index->path);
+		return -1;
 	}
 
-	mail_guid_128_t guid;
-	if (wcharguid_to_guid(&guid, box_guid) < 0) {
+	if (wcharguid_to_guid(guid_r, box_guid) < 0) {
 		i_error("lucene: Corrupted FTS index %s: "
-			"box field not in expected format",
-			ctx->index->path);
+			"box field not in expected format", index->path);
+		return -1;
+	}
+	return 0;
+}
+
+static int
+rescan_open_mailbox(struct rescan_context *ctx, Document *doc)
+{
+	mail_guid_128_t guid;
+	int ret;
+
+	if (fts_lucene_get_mailbox_guid(ctx->index, doc, &guid) < 0)
 		return 0;
-	}
 
 	if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) {
 		/* same as last one */
@@ -587,7 +594,7 @@
 {
 	uint32_t lucene_uid, idx_uid;
 
-	if (lucene_doc_get_uid(ctx->index, doc, _T("uid"), &lucene_uid) < 0)
+	if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0)
 		return 0;
 
 	if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n,
@@ -917,7 +924,7 @@
 			uint32_t uid;
 
 			if (lucene_doc_get_uid(index, &hits->doc(i),
-					       _T("uid"), &uid) < 0) {
+					       &uid) < 0) {
 				ret = -1;
 				break;
 			}
@@ -1028,7 +1035,7 @@
 			}
 
 			if (lucene_doc_get_uid(index, &hits->doc(i),
-					       _T("uid"), &uid) < 0) {
+					       &uid) < 0) {
 				ret = -1;
 				break;
 			}
@@ -1049,6 +1056,7 @@
 		return -1;
 	}
 }
+
 int lucene_index_lookup_multi(struct lucene_index *index,
 			      struct hash_table *guids,
 			      struct mail_search_arg *args, bool and_args,
@@ -1076,3 +1084,70 @@
 	}
 	return 0;
 }
+
+struct lucene_index_iter {
+	struct lucene_index *index;
+	struct lucene_index_record rec;
+
+	Hits *hits;
+	size_t i;
+	bool failed;
+};
+
+struct lucene_index_iter *
+lucene_index_iter_init(struct lucene_index *index)
+{
+	static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL };
+	struct lucene_index_iter *iter;
+	int ret;
+
+	iter = i_new(struct lucene_index_iter, 1);
+	iter->index = index;
+	if ((ret = lucene_index_open_search(index)) <= 0) {
+		if (ret < 0)
+			iter->failed = true;
+		return iter;
+	}
+
+	Term term(_T("box"), _T("*"));
+	WildcardQuery query(&term);
+	Sort sort(sort_fields);
+
+	try {
+		iter->hits = index->searcher->search(&query, &sort);
+	} catch (CLuceneError &err) {
+		lucene_handle_error(index, err, "rescan search");
+		iter->failed = true;
+	}
+	return iter;
+}
+
+const struct lucene_index_record *
+lucene_index_iter_next(struct lucene_index_iter *iter)
+{
+	if (iter->hits == NULL)
+		return NULL;
+	if (iter->i == iter->hits->length())
+		return NULL;
+
+	Document *doc = &iter->hits->doc(iter->i);
+	iter->i++;
+
+	memset(&iter->rec, 0, sizeof(iter->rec));
+	(void)fts_lucene_get_mailbox_guid(iter->index, doc,
+					  &iter->rec.mailbox_guid);
+	(void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid);
+	return &iter->rec;
+}
+
+int lucene_index_iter_deinit(struct lucene_index_iter **_iter)
+{
+	struct lucene_index_iter *iter = *_iter;
+	int ret = iter->failed ? -1 : 0;
+
+	*_iter = NULL;
+	if (iter->hits != NULL)
+		_CLDELETE(iter->hits);
+	i_free(iter);
+	return ret;
+}
--- a/src/plugins/fts-lucene/lucene-wrapper.h	Fri Aug 12 17:40:55 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.h	Fri Aug 12 17:27:05 2011 +0300
@@ -4,8 +4,17 @@
 #include "fts-api-private.h"
 #include "mail-types.h"
 
+struct hash_table;
+struct mailbox_list;
+struct fts_expunge_log;
+
 #define MAILBOX_GUID_HEX_LENGTH (MAIL_GUID_128_SIZE*2)
 
+struct lucene_index_record {
+	mail_guid_128_t mailbox_guid;
+	uint32_t uid;
+};
+
 struct lucene_index *lucene_index_init(const char *path,
 				       const char *textcat_dir,
 				       const char *textcat_conf);
@@ -37,6 +46,12 @@
 			      struct mail_search_arg *args, bool and_args,
 			      struct fts_multi_result *result);
 
+struct lucene_index_iter *
+lucene_index_iter_init(struct lucene_index *index);
+const struct lucene_index_record *
+lucene_index_iter_next(struct lucene_index_iter *iter);
+int lucene_index_iter_deinit(struct lucene_index_iter **iter);
+
 /* internal: */
 void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
 			    wchar_t *dest, size_t destsize);