Mercurial > dovecot > core-2.2
changeset 13256:16d8a1c1543f
fts-lucene: Use WildcardQuery rather than RangeQuery to limit what UIDs to fetch.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 16 Aug 2011 19:46:06 +0300 |
parents | f6c534c1baee |
children | cddc09ae5e1d |
files | src/plugins/fts-lucene/lucene-wrapper.cc |
diffstat | 1 files changed, 50 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-lucene/lucene-wrapper.cc Tue Aug 16 19:19:10 2011 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.cc Tue Aug 16 19:46:06 2011 +0300 @@ -686,6 +686,21 @@ wguid_hex[i] = '\0'; } +static void +lucene_index_query_range_add(BooleanQuery *query, wchar_t *wuid, + wchar_t max_char) +{ + wchar_t i; + + for (i = wuid[0]; i <= max_char; i++) { + wuid[0] = i; + + Term *term = _CLNEW Term(_T("uid"), wuid); + query->add(_CLNEW TermQuery(term), true, BooleanClause::SHOULD); + _CLDECDELETE(term); + } +} + static int lucene_index_expunge_record(struct lucene_index *index, const struct fts_expunge_log_read_record *rec) @@ -699,15 +714,42 @@ range = array_get(&rec->uids, &count); BooleanQuery query; + BooleanQuery uids_query; - /* search for UIDs between lowest and highest expunged UID */ - wchar_t wuid1[MAX_INT_STRLEN], wuid2[MAX_INT_STRLEN]; - swprintf(wuid1, N_ELEMENTS(wuid1), L"%u", range[0].seq1); - swprintf(wuid2, N_ELEMENTS(wuid2), L"%u", range[count-1].seq2); - Term wuid1_term(_T("uid"), wuid1); - Term wuid2_term(_T("uid"), wuid2); - RangeQuery rq(&wuid1_term, &wuid2_term, true); - query.add(&rq, BooleanClause::MUST); + /* RangeQuery actually just adds each term within the range to the + search query, causing "too many clauses" at some point. + So use WildcardQuery to get something approximately true. */ + uint32_t seq1 = range[0].seq1, seq2 = range[count-1].seq2; + + if (seq2 / seq1 > 10) { + /* just iterate through everything */ + } else { + wchar_t wuid1[MAX_INT_STRLEN], wuid2[MAX_INT_STRLEN]; + unsigned int i; + + swprintf(wuid1, N_ELEMENTS(wuid1), L"%u", range[0].seq1); + swprintf(wuid2, N_ELEMENTS(wuid2), L"%u", range[count-1].seq2); + + for (i = 1; wuid1[i] != '\0'; i++) + wuid1[i] = '?'; + for (i = 1; wuid2[i] != '\0'; i++) + wuid2[i] = '?'; + + if (wcslen(wuid1) == wcslen(wuid2)) { + /* for example: 1???..9??? */ + lucene_index_query_range_add(&uids_query, + wuid1, wuid2[0]); + } else { + /* for example: 4?? .. 5??? */ + lucene_index_query_range_add(&uids_query, + wuid1, '9'); + wchar_t max = wuid2[0]; + wuid2[0] = '1'; + lucene_index_query_range_add(&uids_query, + wuid2, max); + } + query.add(&uids_query, BooleanClause::MUST); + } wchar_t wguid[MAILBOX_GUID_HEX_LENGTH + 1]; guid128_to_wguid(rec->mailbox_guid, wguid);