annotate src/plugins/fts-squat/squat-trie.c @ 5068:65a4e1e61555 HEAD

Some bugfixes. Moved header struct to a separate file.
author Timo Sirainen <tss@iki.fi>
date Wed, 24 Jan 2007 16:41:28 +0200
parents 8c1433eb2ba9
children 109ca861405f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2006 Timo Sirainen */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
4 #include "array.h"
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "bsearch-insert-pos.h"
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
6 #include "file-cache.h"
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
7 #include "file-lock.h"
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8 #include "istream.h"
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #include "ostream.h"
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
10 #include "read-full.h"
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
11 #include "write-full.h"
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12 #include "mmap-util.h"
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
13 #include "unichar.h"
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14 #include "squat-uidlist.h"
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
15 #include "squat-trie.h"
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
16 #include "squat-trie-private.h"
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
17
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18 #include <stdio.h>
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
19 #include <stdlib.h>
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
20 #include <unistd.h>
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
21 #include <fcntl.h>
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
22 #include <ctype.h>
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
23
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
24 /* 8bit character counter holds only 255, so we can't use 256. */
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
25 #define MAX_8BIT_CHAR_COUNT 255
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
26
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
27 #define FAST_8BIT_LEVEL 2
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
28
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
29 #define TRIE_COMPRESS_PERCENTAGE 30
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
30 #define TRIE_COMPRESS_MIN_SIZE (1024*50)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
31
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
32 #define SQUAT_TRIE_VERSION 1
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
33 #define SQUAT_TRIE_LOCK_TIMEOUT 60
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
34
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35 /* for non-x86 use memcpy() when accessing unaligned int* addresses */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
36 #if defined(__i386__) || defined(__x86_64__)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37 # define ALLOW_UNALIGNED_ACCESS
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38 #endif
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40 #define BLOCK_SIZE 4
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42 #define ALIGN(size) \
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
43 (((size) + sizeof(void *)-1) & ~((unsigned int) sizeof(void *)-1))
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
44
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
45 struct squat_trie {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46 char *filepath;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47 int fd;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 dev_t dev;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 ino_t ino;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
51 enum file_lock_method lock_method;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
52 struct file_lock *file_lock;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
53 int lock_count;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
54 int lock_type; /* F_RDLCK / F_WRLCK */
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
55
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
56 struct file_cache *file_cache;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
57 uint32_t file_cache_modify_counter;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
58
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
59 void *mmap_base; /* NULL with mmap_disable=yes */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
60 const uint8_t *const_mmap_base;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
61 size_t mmap_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63 const struct squat_trie_header *hdr;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
64 uint32_t uidvalidity;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
65
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
66 char *uidlist_filepath;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67 struct squat_uidlist *uidlist;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
68 struct trie_node *root;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 buffer_t *buf;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
70
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
71 unsigned int corrupted:1;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
72 unsigned int mmap_disable:1;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
73 };
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
74
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
75 struct squat_trie_build_context {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
76 struct squat_trie *trie;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
77
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
78 struct ostream *output;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
79
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
80 uint32_t prev_uid;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
81 unsigned int prev_added_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
82 uint16_t prev_added[BLOCK_SIZE-1];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
83
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
84 unsigned int node_count;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
85 unsigned int deleted_space;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
86
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
87 unsigned int modified:1;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
88 unsigned int failed:1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
89 unsigned int locked:1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
90 };
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
91
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
92 struct squat_trie_compress_context {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
93 struct squat_trie *trie;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
94
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
95 const char *tmp_path;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
96 struct ostream *output;
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
97 int fd;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
98
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
99 struct squat_uidlist_compress_ctx *uidlist_ctx;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
100
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
101 unsigned int node_count;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
102 };
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
103
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
104 struct trie_node {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
105 /* new characters have been added to this node */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
106 uint8_t resized:1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
107 /* idx pointers have been updated */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
108 uint8_t modified:1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
109 uint8_t chars_8bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
110 uint16_t chars_16bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
111
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
112 uint32_t file_offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
113 uint32_t orig_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
114
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
115 /* the node pointers are valid as long as their lowest bit is 0,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
116 otherwise they're offsets to the trie file (>> 1).
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
118 in leaf nodes the children pointers are uint32_t uid_list_idx[]; */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
119 /* uint8_t 8bit_chars[chars_8bit_count]; */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
120 /* struct trie_node *children[chars_8bit_count]; */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121 /* uint16_t 16bit_chars[chars_16bit_count]; */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
122 /* struct trie_node *children[chars_16bit_count]; */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
123 };
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
124 #define NODE_CHARS8(node) \
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 (uint8_t *)(node + 1)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
126 #define NODE_CHILDREN8(node) \
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
127 (struct trie_node **) \
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
128 ((char *)((node) + 1) + \
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129 ALIGN(sizeof(uint8_t) * ((node)->chars_8bit_count)))
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
130 #define NODE_CHARS16(node, level) \
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
131 (uint16_t *)((char *)NODE_CHILDREN8(node) + \
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
132 ((node)->chars_8bit_count) * \
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
133 ((level) == BLOCK_SIZE ? \
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
134 sizeof(uint32_t) : sizeof(struct trie_node *)))
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
135 #define NODE_CHILDREN16(node, level) \
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
136 (struct trie_node **) \
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
137 ((char *)NODE_CHARS16(node, level) + \
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
138 ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count)))
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
139
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
140 static void free_node(struct trie_node *node, unsigned int level);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
141 static void squat_trie_compress_chars8(struct trie_node *node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142 static int
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
143 squat_trie_compress_node(struct squat_trie_compress_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
144 struct trie_node *node, unsigned int level);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
145 static int trie_write_node(struct squat_trie_build_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
146 unsigned int level, struct trie_node *node);
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
147 static int
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
148 squat_trie_build_flush(struct squat_trie_build_context *ctx, bool finish);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
149
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150 static int chr_8bit_cmp(const void *_key, const void *_chr)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
151 {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
152 const uint16_t *key = _key;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
153 const uint8_t *chr = _chr;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155 return *key - *chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
156 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158 static int chr_16bit_cmp(const void *_key, const void *_chr)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160 const uint16_t *key = _key, *chr = _chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 return *key - *chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
165 void _squat_trie_pack_num(buffer_t *buffer, uint32_t num)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
166 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167 uint8_t c;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
168
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169 /* number continues as long as the highest bit is set */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
170 while (num >= 0x80) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
171 c = (num & 0x7f) | 0x80;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 num >>= 7;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
173
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174 buffer_append(buffer, &c, 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
175 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
176
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177 c = num;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
178 buffer_append(buffer, &c, 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
180
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
181 uint32_t _squat_trie_unpack_num(const uint8_t **p, const uint8_t *end)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
182 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
183 const uint8_t *c = *p;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
184 uint32_t value = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
185 unsigned int bits = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
186
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
187 while (c != end && *c >= 0x80) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
188 value |= (*c & 0x7f) << bits;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
189 bits += 7;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
190 c++;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
191 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
192
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
193 if (c == end) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
194 /* last number shouldn't end with high bit */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
195 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
196 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
197 if (bits > 32-7) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
198 /* we have only 32bit numbers */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
199 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
200 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
201
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
202 value |= (*c & 0x7f) << bits;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
203 *p = c + 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
204 return value;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
205 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
206
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
207 static const uint16_t *
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
208 data_normalize(const void *data, size_t size, buffer_t *dest)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
209 {
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
210 const unsigned char *src = data;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
211 size_t i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
212
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
213 buffer_set_used_size(dest, 0);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
214 for (i = 0; i < size; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
215 uint16_t chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
216
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
217 if (src[i] <= 32)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
218 chr = 0;
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
219 else if (src[i] <= 'z')
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
220 chr = i_toupper(src[i]) - 32;
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
221 else if (src[i] < 128)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
222 chr = src[i] - 32 - 26;
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
223 else {
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
224 /* UTF-8 input */
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
225 unichar_t uchr;
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
226
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
227 /* FIXME: can we do anything better than just
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
228 truncate with >16bit values? */
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
229 uchr = uni_utf8_get_char_len(src+i, size-i);
4902
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
230 if (uchr == (unichar_t)-1)
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
231 chr = 0;
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
232 else {
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
233 uchr -= 32 - 26;
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
234 chr = uchr < (uint16_t)-1 ? uchr : 0;
92f7f19c8c58 Handle broken UTF8
Timo Sirainen <tss@iki.fi>
parents: 4901
diff changeset
235 }
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
236 i += uni_utf8_skip[src[i] & 0xff] - 1;
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
237 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
238 buffer_append(dest, &chr, sizeof(chr));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
239 }
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
240
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
241 return dest->data;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
242 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
243
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
244 static void
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
245 squat_trie_set_syscall_error(struct squat_trie *trie, const char *function)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
246 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
247 i_error("%s failed with index search file %s: %m",
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
248 function, trie->filepath);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
249 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
250
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
251 void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
252 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
253 i_error("Corrupted index search file %s: %s", trie->filepath, reason);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
254
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
255 (void)unlink(trie->filepath);
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
256 (void)unlink(trie->uidlist_filepath);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
257 trie->corrupted = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
258 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
259
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
260 static void
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
261 trie_map_node_save_leaf(const uint32_t *src_idx, unsigned int count,
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
262 uint32_t *children)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
263 {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
264 unsigned int i;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
265
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
266 #ifndef ALLOW_UNALIGNED_ACCESS
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
267 if ((POINTER_CAST_TO(src_idx, size_t) & (sizeof(uint32_t)-1)) == 0) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
268 #endif
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
269 for (i = 0; i < count; i++)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
270 children[i] = src_idx[i];
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
271 #ifndef ALLOW_UNALIGNED_ACCESS
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
272 } else {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
273 /* unaligned access */
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
274 const uint8_t *src_idx8 = (const uint8_t *)src_idx;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
275
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
276 for (i = 0; i < count; i++) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
277 memcpy(&children[i], src_idx8 + i * sizeof(uint32_t),
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
278 sizeof(children[i]));
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
279 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
280 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
281 #endif
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
282 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
283
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
284 static void
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
285 trie_map_node_save_children(unsigned int level, const uint32_t *src_idx,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
286 unsigned int count, struct trie_node **children)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
287 {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
288 unsigned int i;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
289
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
290 if (level == BLOCK_SIZE) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
291 trie_map_node_save_leaf(src_idx, count, (uint32_t *)children);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
292 return;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
293 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
294
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
295 #ifndef ALLOW_UNALIGNED_ACCESS
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
296 if ((POINTER_CAST_TO(src_idx, size_t) & (sizeof(uint32_t)-1)) == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
297 #endif
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
298 for (i = 0; i < count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
299 children[i] = src_idx[i] == 0 ? NULL :
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
300 POINTER_CAST(src_idx[i] | 1);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
301 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
302 #ifndef ALLOW_UNALIGNED_ACCESS
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
303 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
304 /* unaligned access */
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
305 const uint8_t *src_idx8 = (const uint8_t *)src_idx;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
306 uint32_t idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
307
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
308 for (i = 0; i < count; i++) {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
309 memcpy(&idx, src_idx8 + i * sizeof(uint32_t),
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
310 sizeof(idx));
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
311 children[i] = idx == 0 ? NULL : POINTER_CAST(idx | 1);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
312 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
313 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
314 #endif
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
315 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
316
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
317 static int trie_map_area(struct squat_trie *trie, uoff_t offset, size_t len)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
318 {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
319 ssize_t ret;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
320
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
321 if (trie->file_cache == NULL)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
322 return 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
323
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
324 ret = file_cache_read(trie->file_cache, offset, len);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
325 if (ret < 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
326 squat_trie_set_syscall_error(trie, "file_cache_read()");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
327 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
328 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
329 trie->const_mmap_base =
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
330 file_cache_get_map(trie->file_cache, &trie->mmap_size);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
331 trie->hdr = (const void *)trie->const_mmap_base;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
332 return 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
333 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
334
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
335 static void
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
336 trie_map_fix_fast_node(struct trie_node *node, unsigned int chars8_count)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
337 {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
338 uint8_t *chars = NODE_CHARS8(node);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
339 struct trie_node **children = NODE_CHILDREN8(node);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
340 int i, j;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
341
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
342 i_assert(node->chars_8bit_count == MAX_8BIT_CHAR_COUNT);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
343
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
344 j = chars8_count - 1;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
345 for (i = node->chars_8bit_count - 1; i >= 0; i--) {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
346 if (j >= 0 && i == chars[j])
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
347 children[i] = children[j--];
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
348 else
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
349 children[i] = NULL;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
350 chars[i] = i;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
351 }
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
352 }
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
353
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
354 static int
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
355 trie_map_node(struct squat_trie *trie, uint32_t offset, unsigned int level,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
356 struct trie_node **node_r)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
357 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
358 struct trie_node *node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
359 const uint8_t *p, *end, *chars8_src, *chars16_src;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
360 uint32_t num, chars8_count, chars16_count;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
361 unsigned int chars8_offset, chars8_size, chars8_memsize;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
362 unsigned int chars16_offset, chars16_size, chars16_memsize;
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
363 unsigned int idx_size, alloced_chars8_count;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
364
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
365 i_assert(trie->fd != -1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
366
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
367 if (trie_map_area(trie, offset, 2+256) < 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
368 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
369
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
370 if (offset >= trie->mmap_size) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
371 squat_trie_set_corrupted(trie, "trie offset too large");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
372 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
373 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
374
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
375 p = trie->const_mmap_base + offset;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
376 end = trie->const_mmap_base + trie->mmap_size;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
377
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
378 /* get 8bit char count and check that it's valid */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
379 num = _squat_trie_unpack_num(&p, end);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
380 chars8_count = num >> 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
381
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
382 chars8_offset = p - trie->const_mmap_base;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
383 chars8_size = chars8_count * (sizeof(uint8_t) + sizeof(uint32_t));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
384
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
385 if (trie_map_area(trie, chars8_offset, chars8_size + 8) < 0)
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
386 return -1;
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
387
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
388 if (chars8_count > MAX_8BIT_CHAR_COUNT ||
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
389 chars8_offset + chars8_size > trie->mmap_size) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
390 squat_trie_set_corrupted(trie, "trie offset broken");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
391 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
392 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
393
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
394 idx_size = level == BLOCK_SIZE ?
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
395 sizeof(uint32_t) : sizeof(struct trie_node *);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
396
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
397 alloced_chars8_count = level <= FAST_8BIT_LEVEL ?
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
398 MAX_8BIT_CHAR_COUNT : chars8_count;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
399 chars8_memsize = ALIGN(alloced_chars8_count * sizeof(uint8_t)) +
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
400 alloced_chars8_count * idx_size;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
401
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
402 if ((num & 1) == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
403 /* no 16bit chars */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
404 chars16_count = 0;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
405 chars16_memsize = 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
406 chars16_offset = 0;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
407 } else {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
408 /* get the 16bit char count */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
409 p = trie->const_mmap_base + chars8_offset + chars8_size;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
410 end = trie->const_mmap_base + trie->mmap_size;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
411
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
412 chars16_count = _squat_trie_unpack_num(&p, end);
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
413 if (chars16_count > 65536) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
414 squat_trie_set_corrupted(trie, "trie offset broken");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
415 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
416 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
417 chars16_offset = p - trie->const_mmap_base;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
418
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
419 /* map the required area size and make sure it exists */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
420 chars16_size = chars16_count *
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
421 (sizeof(uint16_t) + sizeof(uint32_t));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
422 if (trie_map_area(trie, chars16_offset, chars16_size) < 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
423 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
424
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
425 if (chars16_offset + chars16_size > trie->mmap_size) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
426 squat_trie_set_corrupted(trie, "trie offset broken");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
427 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
428 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
429
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
430 chars16_memsize = ALIGN(chars16_count * sizeof(uint16_t)) +
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
431 chars16_count * idx_size;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
432 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
433
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
434 node = i_malloc(sizeof(*node) + chars8_memsize + chars16_memsize);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
435 node->chars_8bit_count = alloced_chars8_count;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
436 node->chars_16bit_count = chars16_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
437 node->file_offset = offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
438
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
439 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
440 uint8_t *chars8 = NODE_CHARS8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
441 uint16_t *chars16 = NODE_CHARS16(node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
442 struct trie_node **children8 = NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
443 struct trie_node **children16 = NODE_CHILDREN16(node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
444 const uint32_t *src_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
445 const void *end_offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
446
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
447 chars8_src = trie->const_mmap_base + chars8_offset;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
448 chars16_src = trie->const_mmap_base + chars16_offset;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
449
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
450 memcpy(chars8, chars8_src, sizeof(uint8_t) * chars8_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
451 memcpy(chars16, chars16_src, sizeof(uint16_t) * chars16_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
452
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
453 src_idx = CONST_PTR_OFFSET(chars8_src, chars8_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
454 trie_map_node_save_children(level, src_idx, chars8_count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
455 children8);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
456
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
457 if (alloced_chars8_count != chars8_count)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
458 trie_map_fix_fast_node(node, chars8_count);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
459 if (chars16_count == 0)
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
460 end_offset = &src_idx[chars8_count];
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
461 else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
462 src_idx = CONST_PTR_OFFSET(chars16_src,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
463 chars16_count *
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
464 sizeof(uint16_t));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
465 trie_map_node_save_children(level, src_idx,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
466 chars16_count, children16);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
467 end_offset = &src_idx[chars16_count];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
468 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
469
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
470 node->orig_size = ((const uint8_t *)end_offset -
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
471 trie->const_mmap_base) - offset;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
472 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
473
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
474 *node_r = node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
475 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
476 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
477
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
478 static void free_children(unsigned int level, struct trie_node **children,
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
479 unsigned int count)
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
480 {
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
481 unsigned int i;
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
482 uint32_t child_idx;
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
483
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
484 for (i = 0; i < count; i++) {
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
485 child_idx = POINTER_CAST_TO(children[i], size_t);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
486 if ((child_idx & 1) == 0 && children[i] != NULL)
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
487 free_node(children[i], level);
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
488 }
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
489 }
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
490
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
491 static void free_node(struct trie_node *node, unsigned int level)
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
492 {
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
493 if (level < BLOCK_SIZE) {
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
494 struct trie_node **children8 = NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
495 struct trie_node **children16 = NODE_CHILDREN16(node, level);
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
496
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
497 free_children(level + 1, children8, node->chars_8bit_count);
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
498 free_children(level + 1, children16, node->chars_16bit_count);
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
499 }
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
500 i_free(node);
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
501 }
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
502
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
503 static void squat_trie_unmap(struct squat_trie *trie)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
504 {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
505 if (trie->file_cache != NULL)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
506 file_cache_invalidate(trie->file_cache, 0, (uoff_t)-1);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
507
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
508 if (trie->mmap_base != NULL) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
509 if (munmap(trie->mmap_base, trie->mmap_size) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
510 squat_trie_set_syscall_error(trie, "munmap()");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
511 trie->mmap_base = NULL;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
512 }
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
513
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
514 trie->mmap_size = 0;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
515 trie->hdr = NULL;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
516 trie->const_mmap_base = NULL;
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
517
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
518 if (trie->root != NULL) {
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
519 free_node(trie->root, 1);
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
520 trie->root = NULL;
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
521 }
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
522 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
523
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
524 static void trie_file_close(struct squat_trie *trie)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
525 {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
526 if (trie->file_cache != NULL)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
527 file_cache_free(&trie->file_cache);
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
528 if (trie->file_lock != NULL)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
529 file_lock_free(&trie->file_lock);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
530
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
531 squat_trie_unmap(trie);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
532 if (trie->fd != -1) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
533 if (close(trie->fd) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
534 squat_trie_set_syscall_error(trie, "close()");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
535 trie->fd = -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
536 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
537
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
538 trie->hdr = NULL;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
539 trie->corrupted = FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
540 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
541
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
542 static int
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
543 trie_map_check_header(struct squat_trie *trie,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
544 const struct squat_trie_header *hdr, uoff_t file_size)
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
545 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
546 if (hdr->version != SQUAT_TRIE_VERSION)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
547 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
548
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
549 if (hdr->used_file_size > file_size) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
550 squat_trie_set_corrupted(trie, "used_file_size too large");
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
551 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
552 }
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
553 if (hdr->root_offset != 0 &&
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
554 (hdr->root_offset > file_size ||
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
555 hdr->root_offset < sizeof(*hdr))) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
556 squat_trie_set_corrupted(trie, "invalid root_offset");
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
557 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
558 }
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
559 if (hdr->uidvalidity != trie->uidvalidity) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
560 squat_trie_set_corrupted(trie, "uidvalidity changed");
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
561 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
562 }
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
563
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
564 return 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
565 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
566
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
567 static int squat_trie_file_was_modified(struct squat_trie *trie)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
568 {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
569 struct squat_trie_header hdr;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
570 int ret;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
571
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
572 ret = pread_full(trie->fd, &hdr.modify_counter,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
573 sizeof(hdr.modify_counter),
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
574 offsetof(struct squat_trie_header, modify_counter));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
575 if (ret < 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
576 squat_trie_set_syscall_error(trie, "pread_full()");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
577 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
578 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
579 if (ret == 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
580 /* broken file, treat as modified */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
581 return 1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
582 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
583 return hdr.modify_counter == trie->file_cache_modify_counter ? 0 : 1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
584 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
585
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
586 static int squat_trie_map(struct squat_trie *trie)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
587 {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
588 const struct squat_trie_header *hdr;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
589 struct stat st;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
590 ssize_t ret;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
591
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
592 if (trie->hdr != NULL) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
593 if (!trie->mmap_disable) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
594 if (trie->hdr->used_file_size <= trie->mmap_size) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
595 /* everything is already mapped */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
596 return 1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
597 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
598 } else {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
599 ret = squat_trie_file_was_modified(trie);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
600 if (ret <= 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
601 return ret < 0 ? -1 : 1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
602 }
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
603 }
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
604
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
605 if (fstat(trie->fd, &st) < 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
606 squat_trie_set_syscall_error(trie, "fstat()");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
607 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
608 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
609 trie->dev = st.st_dev;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
610 trie->ino = st.st_ino;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
611
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
612 squat_trie_unmap(trie);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
613
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
614 if (!trie->mmap_disable) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
615 trie->mmap_size = st.st_size;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
616 trie->mmap_base = mmap(NULL, trie->mmap_size,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
617 PROT_READ | PROT_WRITE,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
618 MAP_SHARED, trie->fd, 0);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
619 if (trie->mmap_base == MAP_FAILED) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
620 trie->mmap_size = 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
621 trie->mmap_base = NULL;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
622 squat_trie_set_syscall_error(trie, "mmap()");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
623 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
624 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
625 trie->const_mmap_base = trie->mmap_base;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
626 } else {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
627 ret = file_cache_read(trie->file_cache, 0, sizeof(*trie->hdr));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
628 if (ret < 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
629 squat_trie_set_syscall_error(trie, "file_cache_read()");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
630 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
631 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
632 if ((size_t)ret < sizeof(*trie->hdr)) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
633 squat_trie_set_corrupted(trie, "file too small");
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
634 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
635 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
636 trie->const_mmap_base =
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
637 file_cache_get_map(trie->file_cache, &trie->mmap_size);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
638 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
639
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
640 hdr = (const void *)trie->const_mmap_base;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
641 if (trie_map_check_header(trie, hdr, st.st_size) < 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
642 return -1;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
643 trie->hdr = hdr;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
644 trie->file_cache_modify_counter = trie->hdr->modify_counter;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
645
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
646 if (trie->hdr->root_offset != 0) {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
647 if (trie_map_node(trie, trie->hdr->root_offset,
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
648 1, &trie->root) < 0)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
649 return 0;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
650 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
651 return 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
652 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
653
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
654 static void trie_file_open_fd(struct squat_trie *trie, int fd)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
655 {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
656 struct stat st;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
657
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
658 if (fstat(fd, &st) < 0) {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
659 /* don't bother adding complexity by trying to handle this
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
660 error here. we'll break later anyway in easier error
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
661 handling paths. */
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
662 squat_trie_set_syscall_error(trie, "fstat()");
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
663 trie->ino = 0;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
664 } else {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
665 trie->dev = st.st_dev;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
666 trie->ino = st.st_ino;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
667 }
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
668 trie->fd = fd;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
669
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
670 if (trie->mmap_disable)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
671 trie->file_cache = file_cache_new(trie->fd);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
672 }
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
673
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
674 static int trie_file_open(struct squat_trie *trie, bool create)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
675 {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
676 int fd;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
677
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
678 i_assert(trie->fd == -1);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
679
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
680 fd = open(trie->filepath, O_RDWR | (create ? O_CREAT : 0), 0660);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
681 if (fd == -1) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
682 if (errno == ENOENT)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
683 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
684
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
685 squat_trie_set_syscall_error(trie, "open()");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
686 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
687 }
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
688 trie_file_open_fd(trie, fd);
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
689 return 1;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
690 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
691
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
692 static int trie_file_create_finish(struct squat_trie *trie)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
693 {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
694 struct squat_trie_header hdr;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
695 struct stat st;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
696
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
697 if (fstat(trie->fd, &st) < 0) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
698 squat_trie_set_syscall_error(trie, "fstat()");
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
699 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
700 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
701
5010
8c1433eb2ba9 Compiler warning fixes
Timo Sirainen <tss@iki.fi>
parents: 4947
diff changeset
702 if (st.st_size <= (off_t)sizeof(hdr)) {
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
703 memset(&hdr, 0, sizeof(hdr));
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
704 hdr.version = SQUAT_TRIE_VERSION;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
705 hdr.uidvalidity = trie->uidvalidity;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
706 hdr.used_file_size = sizeof(hdr);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
707
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
708 if (pwrite_full(trie->fd, &hdr, sizeof(hdr), 0) < 0) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
709 squat_trie_set_syscall_error(trie, "pwrite_full()");
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
710 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
711 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
712 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
713
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
714 return 0;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
715 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
716
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
717 struct squat_trie *
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
718 squat_trie_open(const char *path, uint32_t uidvalidity,
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
719 enum file_lock_method lock_method, bool mmap_disable)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
720 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
721 struct squat_trie *trie;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
722
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
723 trie = i_new(struct squat_trie, 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
724 trie->fd = -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
725 trie->filepath = i_strdup(path);
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
726 trie->uidvalidity = uidvalidity;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
727 trie->lock_method = lock_method;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
728 trie->mmap_disable = mmap_disable;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
729 trie->buf = buffer_create_dynamic(default_pool, 1024);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
730
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
731 trie->uidlist_filepath = i_strconcat(path, ".uids", NULL);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
732 trie->uidlist =
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
733 squat_uidlist_init(trie, trie->uidlist_filepath,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
734 uidvalidity, mmap_disable);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
735 return trie;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
736 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
737
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
738 void squat_trie_close(struct squat_trie *trie)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
739 {
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
740 squat_trie_unmap(trie);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
741 buffer_free(trie->buf);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
742 squat_uidlist_deinit(trie->uidlist);
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
743 i_free(trie->uidlist_filepath);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
744 i_free(trie->filepath);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
745 i_free(trie);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
746 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
747
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
748 int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *uid_r)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
749 {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
750 int ret;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
751
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
752 if (trie->fd == -1) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
753 if ((ret = trie_file_open(trie, FALSE)) < 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
754 return ret;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
755 if (ret == 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
756 *uid_r = 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
757 return 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
758 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
759 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
760
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
761 if (squat_trie_lock(trie, F_RDLCK) <= 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
762 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
763
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
764 ret = squat_uidlist_get_last_uid(trie->uidlist, uid_r);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
765 squat_trie_unlock(trie);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
766 return ret;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
767 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
768
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
769 static int squat_trie_is_file_stale(struct squat_trie *trie)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
770 {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
771 struct stat st;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
772
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
773 if (stat(trie->filepath, &st) < 0) {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
774 if (errno == ENOENT)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
775 return 1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
776
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
777 squat_trie_set_syscall_error(trie, "stat()");
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
778 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
779 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
780
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
781 return st.st_ino == trie->ino &&
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
782 CMP_DEV_T(st.st_dev, trie->dev) ? 0 : 1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
783 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
784
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
785 static int
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
786 squat_trie_file_lock(struct squat_trie *trie, int fd, const char *path,
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
787 int lock_type, struct file_lock **lock_r)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
788 {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
789 int ret;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
790
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
791 ret = file_wait_lock(fd, path, lock_type, trie->lock_method,
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
792 SQUAT_TRIE_LOCK_TIMEOUT, lock_r);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
793 if (ret == 0)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
794 squat_trie_set_syscall_error(trie, "file_wait_lock()");
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
795 return ret;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
796 }
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
797
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
798 int squat_trie_lock(struct squat_trie *trie, int lock_type)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
799 {
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
800 bool created = FALSE;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
801 int ret;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
802
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
803 i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
804
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
805 if (trie->lock_count > 0) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
806 /* read lock -> write lock would deadlock */
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
807 i_assert(trie->lock_type == lock_type || lock_type == F_RDLCK);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
808
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
809 trie->lock_count++;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
810 return 1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
811 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
812
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
813 if (trie->fd == -1 || trie->corrupted) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
814 trie_file_close(trie);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
815 if (lock_type == F_WRLCK) {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
816 if ((ret = trie_file_open(trie, FALSE)) < 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
817 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
818 if (ret == 0) {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
819 if (trie_file_open(trie, TRUE) < 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
820 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
821 created = TRUE;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
822 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
823 } else {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
824 if (trie_file_open(trie, FALSE) <= 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
825 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
826 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
827 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
828
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
829 for (;;) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
830 i_assert(trie->file_lock == NULL);
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
831 ret = squat_trie_file_lock(trie, trie->fd, trie->filepath,
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
832 lock_type, &trie->file_lock);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
833 if (ret <= 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
834 return ret;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
835
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
836 /* if the trie has been compressed, we need to reopen the
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
837 file and try to lock again */
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
838 ret = squat_trie_is_file_stale(trie);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
839 if (ret == 0)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
840 break;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
841
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
842 file_unlock(&trie->file_lock);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
843 if (ret < 0)
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
844 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
845
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
846 trie_file_close(trie);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
847 if (trie_file_open(trie, FALSE) <= 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
848 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
849 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
850
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
851 if (created) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
852 /* we possibly created this file. now that we've locked the
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
853 file, we can safely check if someone else already wrote the
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
854 header or if we should do it now */
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
855 if (trie_file_create_finish(trie) < 0) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
856 file_unlock(&trie->file_lock);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
857 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
858 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
859 }
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
860
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
861 if (squat_trie_map(trie) <= 0) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
862 file_unlock(&trie->file_lock);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
863 return -1;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
864 }
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
865 if (squat_uidlist_refresh(trie->uidlist) < 0) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
866 file_unlock(&trie->file_lock);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
867 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
868 }
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
869
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
870 trie->lock_count++;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
871 trie->lock_type = lock_type;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
872 return 1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
873 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
874
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
875 void squat_trie_unlock(struct squat_trie *trie)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
876 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
877 i_assert(trie->lock_count > 0);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
879 if (--trie->lock_count > 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
880 return;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
881
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
882 file_unlock(&trie->file_lock);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
883 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
884
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
885 static struct trie_node *
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
886 node_alloc(uint16_t chr, unsigned int level)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
887 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
888 struct trie_node *node;
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
889 unsigned int i, idx_size, idx_offset = sizeof(*node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
890
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
891 idx_size = level < BLOCK_SIZE ?
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
892 sizeof(struct trie_node *) : sizeof(uint32_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
893
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
894 if (level <= FAST_8BIT_LEVEL) {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
895 uint8_t *chars;
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
896 unsigned int chars16_count = chr >= MAX_8BIT_CHAR_COUNT ? 1 : 0;
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
897
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
898 node = i_malloc(sizeof(*node) +
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
899 ALIGN(MAX_8BIT_CHAR_COUNT) +
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
900 ALIGN(sizeof(uint16_t) * chars16_count) +
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
901 (MAX_8BIT_CHAR_COUNT + chars16_count) *
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
902 idx_size);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
903 node->chars_8bit_count = MAX_8BIT_CHAR_COUNT;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
904
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
905 chars = NODE_CHARS8(node);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
906 for (i = 0; i < MAX_8BIT_CHAR_COUNT; i++)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
907 chars[i] = i;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
908
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
909 if (chars16_count > 0) {
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
910 uint16_t *chars16 = NODE_CHARS16(node, 0);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
911
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
912 node->chars_16bit_count = chars16_count;
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
913 chars16[0] = chr;
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
914 }
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
915 } else if (chr < MAX_8BIT_CHAR_COUNT) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
916 uint8_t *chrp;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
917
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
918 idx_offset += ALIGN(sizeof(*chrp));
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
919 node = i_malloc(idx_offset + idx_size);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
920 node->chars_8bit_count = 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
921
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
922 chrp = PTR_OFFSET(node, sizeof(*node));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
923 *chrp = chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
924 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
925 uint16_t *chrp;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
926
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
927 idx_offset += ALIGN(sizeof(*chrp));
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
928 node = i_malloc(idx_offset + idx_size);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
929 node->chars_16bit_count = 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
930
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
931 chrp = PTR_OFFSET(node, sizeof(*node));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
932 *chrp = chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
933 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
934
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
935 node->modified = TRUE;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
936 node->resized = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
937 return node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
938 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
939
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
940 static struct trie_node *
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
941 node_realloc(struct trie_node *node, uint32_t char_idx, uint16_t chr,
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
942 unsigned int level)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
943 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
944 struct trie_node *new_node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
945 unsigned int old_size_8bit, old_size_16bit, old_idx_offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
946 unsigned int idx_size, old_size, new_size, new_idx_offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
947 unsigned int hole1_pos, hole2_pos, skip;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
948
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
949 idx_size = level < BLOCK_SIZE ?
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
950 sizeof(struct trie_node *) : sizeof(uint32_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
951
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
952 old_size_8bit = ALIGN(node->chars_8bit_count) +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
953 node->chars_8bit_count * idx_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
954 old_size_16bit = ALIGN(sizeof(uint16_t) * node->chars_16bit_count) +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
955 node->chars_16bit_count * idx_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
956 old_size = sizeof(*node) + old_size_8bit + old_size_16bit;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
957
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
958 if (chr < MAX_8BIT_CHAR_COUNT) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
959 new_idx_offset = sizeof(*node) +
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
960 ALIGN(node->chars_8bit_count + 1);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
961 new_size = new_idx_offset + old_size_16bit +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
962 (node->chars_8bit_count + 1) * idx_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
963 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
964 new_idx_offset = sizeof(*node) + old_size_8bit +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
965 ALIGN((node->chars_16bit_count + 1) * sizeof(uint16_t));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
966 new_size = new_idx_offset +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
967 (node->chars_16bit_count + 1) * idx_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
968 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
969
4897
30834ce4362f Use realloc instead of alloc+free.
Timo Sirainen <tss@iki.fi>
parents: 4890
diff changeset
970 new_node = t_buffer_get(new_size);
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
971 if (chr < MAX_8BIT_CHAR_COUNT) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
972 hole1_pos = sizeof(*node) + char_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
973 old_idx_offset = sizeof(*node) + ALIGN(node->chars_8bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
974 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
975 hole1_pos = sizeof(*node) + old_size_8bit +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
976 char_idx * sizeof(uint16_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
977 old_idx_offset = sizeof(*node) + old_size_8bit +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
978 ALIGN(node->chars_16bit_count * sizeof(uint16_t));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
979 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
980 hole2_pos = old_idx_offset + idx_size * char_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
981
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
982 /* 0..character position */
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
983 memcpy(new_node, node, hole1_pos);
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
984 if (chr < MAX_8BIT_CHAR_COUNT) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
985 uint8_t *chrp = PTR_OFFSET(new_node, hole1_pos);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
986 *chrp = chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
987 new_node->chars_8bit_count++;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
988
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
989 /* rest of the characters */
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
990 memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint8_t)),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
991 PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
992 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
993 uint16_t *chrp = PTR_OFFSET(new_node, hole1_pos);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
994 *chrp = chr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
995 new_node->chars_16bit_count++;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
996
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
997 /* rest of the characters */
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
998 memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint16_t)),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
999 PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1000 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1001
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1002 /* indexes from 0 to character position */
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1003 memcpy(PTR_OFFSET(new_node, new_idx_offset),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1004 PTR_OFFSET(node, old_idx_offset),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1005 hole2_pos - old_idx_offset);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1006
4947
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1007 /* zero the inserted character index */
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1008 skip = char_idx * idx_size;
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1009 memset(PTR_OFFSET(new_node, new_idx_offset + skip), 0, idx_size);
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1010
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1011 /* rest of the indexes */
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1012 skip += idx_size;
8be1ce6cfeec Cleanups and fixes
Timo Sirainen <tss@iki.fi>
parents: 4902
diff changeset
1013 memcpy(PTR_OFFSET(new_node, new_idx_offset + skip),
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1014 PTR_OFFSET(node, hole2_pos),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1015 old_size - hole2_pos);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1016
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1017 new_node->resized = TRUE;
4897
30834ce4362f Use realloc instead of alloc+free.
Timo Sirainen <tss@iki.fi>
parents: 4890
diff changeset
1018
30834ce4362f Use realloc instead of alloc+free.
Timo Sirainen <tss@iki.fi>
parents: 4890
diff changeset
1019 node = i_realloc(node, 0, new_size);
30834ce4362f Use realloc instead of alloc+free.
Timo Sirainen <tss@iki.fi>
parents: 4890
diff changeset
1020 memcpy(node, new_node, new_size);
30834ce4362f Use realloc instead of alloc+free.
Timo Sirainen <tss@iki.fi>
parents: 4890
diff changeset
1021 return node;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1022 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1023
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1024 static int
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1025 trie_insert_node(struct squat_trie_build_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1026 struct trie_node **parent,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1027 const uint16_t *data, uint32_t uid, unsigned int level)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1028 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1029 struct squat_trie *trie = ctx->trie;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1030 struct trie_node *node = *parent;
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1031 struct trie_node **children;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1032 uint32_t char_idx;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1033 bool modified = FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1034 int ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1035
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1036 if (*data < MAX_8BIT_CHAR_COUNT) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1037 unsigned int count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1038
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1039 if (node == NULL) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1040 ctx->node_count++;
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1041 node = *parent = node_alloc(*data, level);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1042 char_idx = level <= FAST_8BIT_LEVEL ? *data : 0;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1043 modified = TRUE;
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1044 } else if (level <= FAST_8BIT_LEVEL) {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1045 char_idx = *data;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1046 } else {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1047 uint8_t *chars = NODE_CHARS8(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1048 uint8_t *pos;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1049
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1050 count = node->chars_8bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1051 pos = bsearch_insert_pos(data, chars, count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1052 sizeof(chars[0]),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1053 chr_8bit_cmp);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1054 char_idx = pos - chars;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1055 if (char_idx == count || *pos != *data) {
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1056 node = node_realloc(node, char_idx,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1057 *data, level);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1058 *parent = node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1059 modified = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1060 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1061 }
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1062 children = NODE_CHILDREN8(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1063 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1064 unsigned int offset = sizeof(*node);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1065 unsigned int count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1066
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1067 if (node == NULL) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1068 ctx->node_count++;
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1069 node = *parent = node_alloc(*data, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1070 char_idx = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1071 modified = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1072 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1073 unsigned int idx_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1074 uint16_t *chars, *pos;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1075
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1076 idx_size = level < BLOCK_SIZE ?
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1077 sizeof(struct trie_node *) : sizeof(uint32_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1078 offset += ALIGN(node->chars_8bit_count) +
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1079 idx_size * node->chars_8bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1080 chars = PTR_OFFSET(node, offset);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1081
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1082 count = node->chars_16bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1083 pos = bsearch_insert_pos(data, chars, count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1084 sizeof(chars[0]),
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1085 chr_16bit_cmp);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1086 char_idx = pos - chars;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1087 if (char_idx == count || *pos != *data) {
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1088 node = node_realloc(node, char_idx,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1089 *data, level);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1090 *parent = node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1091 modified = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1092 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1093 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1094
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1095 children = NODE_CHILDREN16(node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1096 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1097
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1098 if (level < BLOCK_SIZE) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1099 size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1100
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1101 if ((child_idx & 1) != 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1102 if (trie_map_node(trie, child_idx & ~1, level + 1,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1103 &children[char_idx]) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1104 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1105 }
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1106
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1107 if (children[char_idx] == NULL)
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1108 node->resized = TRUE;
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1109
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1110 ret = trie_insert_node(ctx, &children[char_idx],
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1111 data + 1, uid, level + 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1112 if (ret < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1113 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1114 if (ret > 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1115 node->modified = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1116 } else {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1117 uint32_t *uid_lists = (uint32_t *)children;
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1118
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1119 if (uid_lists[char_idx] == 0)
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1120 node->resized = TRUE;
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1121
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1122 if (squat_uidlist_add(trie->uidlist, &uid_lists[char_idx],
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1123 uid) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1124 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1125
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1126 node->modified = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1127 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1128 return modified ? 1 : 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1129 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1130
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1131 static uint32_t
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1132 trie_lookup_node(struct squat_trie *trie, struct trie_node *node,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1133 const uint16_t *data, unsigned int level)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1134 {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1135 struct trie_node **children;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1136 uint32_t char_idx;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1137
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1138 if (node == NULL)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1139 return 0;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1140
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1141 if (*data < MAX_8BIT_CHAR_COUNT) {
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1142 if (level <= FAST_8BIT_LEVEL)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1143 char_idx = *data;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1144 else {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1145 const uint8_t *chars, *pos;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1146 chars = NODE_CHARS8(node);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1147 pos = bsearch(data, chars, node->chars_8bit_count,
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1148 sizeof(chars[0]), chr_8bit_cmp);
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1149 if (pos == NULL || *pos != *data)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1150 return 0;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1151
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1152 char_idx = pos - chars;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1153 }
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1154 children = NODE_CHILDREN8(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1155 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1156 const uint16_t *chars, *pos;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1157
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1158 chars = NODE_CHARS16(node, level);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1159 pos = bsearch(data, chars, node->chars_16bit_count,
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1160 sizeof(chars[0]), chr_16bit_cmp);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1161 if (pos == NULL || *pos != *data)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1162 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1163
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1164 char_idx = pos - chars;
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1165 children = NODE_CHILDREN16(node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1166 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1167
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1168 if (level < BLOCK_SIZE) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1169 size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1170
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1171 if ((child_idx & 1) != 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1172 /* not mapped to memory yet. do it. */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1173 if (trie_map_node(trie, child_idx & ~1, level + 1,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1174 &children[char_idx]) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1175 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1176 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1177
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1178 return trie_lookup_node(trie, children[char_idx],
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1179 data + 1, level + 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1180 } else {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1181 const uint32_t *uid_lists = (const uint32_t *)children;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1182
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1183 return uid_lists[char_idx];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1184 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1185 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1186
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1187 static bool block_want_add(const uint16_t *data)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1188 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1189 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1190
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1191 /* skip all blocks that contain spaces or control characters.
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1192 no-one searches them anyway */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1193 for (i = 0; i < BLOCK_SIZE; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1194 if (data[i] == 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1195 return FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1196 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1197 return TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1198 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1199
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1200 struct squat_trie_build_context *
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1201 squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1202 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1203 struct squat_trie_build_context *ctx;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1204
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1205 ctx = i_new(struct squat_trie_build_context, 1);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1206 ctx->trie = trie;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1207
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1208 if (squat_trie_lock(trie, F_WRLCK) <= 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1209 ctx->failed = TRUE;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1210 else {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1211 ctx->locked = TRUE;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1212 ctx->node_count = trie->hdr->node_count;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1213
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1214 if (squat_uidlist_get_last_uid(trie->uidlist, last_uid_r) < 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1215 ctx->failed = TRUE;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1216 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1217
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1218 if (ctx->failed)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1219 *last_uid_r = 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1220 return ctx;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1221 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1222
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1223 int squat_trie_build_deinit(struct squat_trie_build_context *ctx)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1224 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1225 int ret = ctx->failed ? -1 : 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1226
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1227 if (ret == 0)
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1228 ret = squat_trie_build_flush(ctx, TRUE);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1229
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1230 if (ctx->locked)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1231 squat_trie_unlock(ctx->trie);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1232
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1233 i_free(ctx);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1234 return ret;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1235 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1236
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1237 int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid,
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1238 const unsigned char *data, size_t size)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1239 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1240 const uint16_t *str;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1241 uint16_t buf[(BLOCK_SIZE-1)*2];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1242 unsigned int i, tmp_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1243
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1244 if (ctx->failed)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1245 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1246
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1247 t_push();
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1248 str = data_normalize(data, size, ctx->trie->buf);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1249
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1250 if (uid == ctx->prev_uid) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1251 /* @UNSAFE: continue from last block */
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1252 memcpy(buf, ctx->prev_added,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1253 sizeof(buf[0]) * ctx->prev_added_size);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1254 tmp_size = I_MIN(size, BLOCK_SIZE-1);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1255 memcpy(buf + ctx->prev_added_size, str,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1256 sizeof(buf[0]) * tmp_size);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1257
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1258 tmp_size += ctx->prev_added_size;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1259 for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1260 if (block_want_add(buf+i)) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1261 if (trie_insert_node(ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1262 &ctx->trie->root,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1263 buf + i, uid, 1) < 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1264 t_pop();
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1265 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1266 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1267 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1268 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1269
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1270 if (size < BLOCK_SIZE) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1271 ctx->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1272 memcpy(ctx->prev_added, buf + i,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1273 sizeof(buf[0]) * ctx->prev_added_size);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1274 t_pop();
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1275 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1276 }
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1277 } else if (squat_uidlist_want_flush(ctx->trie->uidlist)) {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1278 if (squat_trie_build_flush(ctx, FALSE) < 0) {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1279 ctx->failed = TRUE;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1280 t_pop();
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1281 return -1;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1282 }
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1283 str = data_normalize(data, size, ctx->trie->buf);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1284 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1285
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1286 ctx->prev_uid = uid;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1287 for (i = 0; i + BLOCK_SIZE <= size; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1288 if (block_want_add(str+i)) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1289 if (trie_insert_node(ctx, &ctx->trie->root,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1290 str + i, uid, 1) < 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1291 t_pop();
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1292 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1293 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1294 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1295 }
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1296 ctx->prev_added_size = I_MIN(size, BLOCK_SIZE-1);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1297 memcpy(ctx->prev_added, str + i,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1298 sizeof(ctx->prev_added[0]) * ctx->prev_added_size);
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1299
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1300 t_pop();
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1301 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1302 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1303
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1304 static void node_pack_children(buffer_t *buf, struct trie_node **children,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1305 unsigned int count)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1306 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1307 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1308 size_t child_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1309 uint32_t idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1310
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1311 for (i = 0; i < count; i++) {
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1312 if (children[i] == NULL)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1313 continue;
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1314
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1315 child_idx = POINTER_CAST_TO(children[i], size_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1316 if ((child_idx & 1) != 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1317 idx = child_idx & ~1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1318 else
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1319 idx = children[i]->file_offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1320 buffer_append(buf, &idx, sizeof(idx));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1321 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1322 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1323
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1324 static void node_pack(buffer_t *buf, struct trie_node *node)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1325 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1326 uint8_t *chars8 = NODE_CHARS8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1327 uint16_t *chars16 = NODE_CHARS16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1328 struct trie_node **children8 = NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1329 struct trie_node **children16 = NODE_CHILDREN16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1330
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1331 buffer_set_used_size(buf, 0);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1332 _squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1333 (node->chars_16bit_count > 0 ? 1 : 0));
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1334 buffer_append(buf, chars8, node->chars_8bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1335 node_pack_children(buf, children8, node->chars_8bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1336
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1337 if (node->chars_16bit_count > 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1338 _squat_trie_pack_num(buf, node->chars_16bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1339 buffer_append(buf, chars16,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1340 sizeof(*chars16) * node->chars_16bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1341 node_pack_children(buf, children16, node->chars_16bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1342 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1343 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1344
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1345 static int node_leaf_finish(struct squat_trie *trie, struct trie_node *node)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1346 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1347 uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1348 uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1349 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1350
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1351 for (i = 0; i < node->chars_8bit_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1352 if (squat_uidlist_finish_list(trie->uidlist, &idx8[i]) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1353 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1354 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1355 for (i = 0; i < node->chars_16bit_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1356 if (squat_uidlist_finish_list(trie->uidlist, &idx16[i]) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1357 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1358 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1359 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1360 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1361
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1362 static void node_pack_leaf(buffer_t *buf, struct trie_node *node)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1363 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1364 uint8_t *chars8 = NODE_CHARS8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1365 uint16_t *chars16 = NODE_CHARS16(node, BLOCK_SIZE);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1366 uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1367 uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1368
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1369 buffer_set_used_size(buf, 0);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1370 _squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1371 (node->chars_16bit_count > 0 ? 1 : 0));
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1372 buffer_append(buf, chars8, node->chars_8bit_count);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1373 buffer_append(buf, idx8, sizeof(*idx8) * node->chars_8bit_count);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1374
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1375 if (node->chars_16bit_count > 0) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1376 _squat_trie_pack_num(buf, node->chars_16bit_count);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1377 buffer_append(buf, chars16,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1378 sizeof(*chars16) * node->chars_16bit_count);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1379 buffer_append(buf, idx16,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1380 sizeof(*idx16) * node->chars_16bit_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1381 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1382 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1383
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1384 static int
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1385 trie_write_node_children(struct squat_trie_build_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1386 unsigned int level, struct trie_node **children,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1387 unsigned int count)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1388 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1389 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1390 size_t child_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1391
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1392 for (i = 0; i < count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1393 child_idx = POINTER_CAST_TO(children[i], size_t);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1394 if ((child_idx & 1) == 0 && children[i] != NULL) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1395 if (trie_write_node(ctx, level, children[i]) < 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1396 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1397 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1398 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1399 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1400 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1401
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1402 static int trie_write_node(struct squat_trie_build_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1403 unsigned int level, struct trie_node *node)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1404 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1405 struct squat_trie *trie = ctx->trie;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1406 uoff_t offset;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1407
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1408 if (level < BLOCK_SIZE) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1409 struct trie_node **children8 = NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1410 struct trie_node **children16 = NODE_CHILDREN16(node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1411
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1412 if (trie_write_node_children(ctx, level + 1,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1413 children8,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1414 node->chars_8bit_count) < 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1415 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1416 if (trie_write_node_children(ctx, level + 1,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1417 children16,
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1418 node->chars_16bit_count) < 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1419 return -1;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1420 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1421
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1422 if (!node->modified)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1423 return 0;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1424
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1425 if (level < BLOCK_SIZE) {
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1426 if (level <= FAST_8BIT_LEVEL)
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1427 squat_trie_compress_chars8(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1428 node_pack(trie->buf, node);
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1429 } else {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1430 if (node_leaf_finish(trie, node) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1431 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1432
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1433 node_pack_leaf(trie->buf, node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1434 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1435
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1436 offset = ctx->output->offset;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1437 if ((offset & 1) != 0) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1438 o_stream_send(ctx->output, "", 1);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1439 offset++;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1440 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1441
5068
65a4e1e61555 Some bugfixes. Moved header struct to a separate file.
Timo Sirainen <tss@iki.fi>
parents: 5010
diff changeset
1442 if (node->resized && node->orig_size < trie->buf->used) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1443 /* append to end of file. the parent node is written later. */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1444 node->file_offset = offset;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1445 o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1446
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1447 ctx->deleted_space += node->orig_size;
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1448 } else {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1449 /* overwrite node's contents */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1450 i_assert(node->file_offset != 0);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1451 i_assert(trie->buf->used <= node->orig_size);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1452
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1453 /* FIXME: write only the indexes if !node->resized */
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1454 o_stream_seek(ctx->output, node->file_offset);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1455 o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1456 o_stream_seek(ctx->output, offset);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1457
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1458 ctx->deleted_space += trie->buf->used - node->orig_size;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1459 }
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1460
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1461 ctx->modified = TRUE;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1462 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1463 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1464
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1465 static int
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1466 trie_nodes_write(struct squat_trie_build_context *ctx, uint32_t *uidvalidity_r)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1467 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1468 struct squat_trie *trie = ctx->trie;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1469 struct squat_trie_header hdr;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1470
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1471 hdr = *trie->hdr;
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1472 if (lseek(trie->fd, hdr.used_file_size, SEEK_SET) < 0) {
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1473 squat_trie_set_syscall_error(trie, "lseek()");
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1474 return -1;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1475 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1476
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1477 ctx->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE);
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1478 if (hdr.used_file_size == 0) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1479 o_stream_send(ctx->output, &hdr, sizeof(hdr));
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1480 ctx->modified = TRUE;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1481 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1482
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1483 ctx->deleted_space = 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1484 if (trie_write_node(ctx, 1, trie->root) < 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1485 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1486
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1487 if (ctx->modified) {
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1488 /* update the header */
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1489 hdr.root_offset = trie->root->file_offset;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1490 hdr.used_file_size = ctx->output->offset;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1491 hdr.deleted_space += ctx->deleted_space;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1492 hdr.node_count = ctx->node_count;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1493 hdr.modify_counter++;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1494 o_stream_seek(ctx->output, 0);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1495 o_stream_send(ctx->output, &hdr, sizeof(hdr));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1496 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1497
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1498 o_stream_destroy(&ctx->output);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1499 *uidvalidity_r = hdr.uidvalidity;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1500 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1501 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1502
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1503 static bool squat_trie_need_compress(struct squat_trie *trie,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1504 unsigned int current_message_count)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1505 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1506 uint32_t max_del_space;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1507
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1508 if (trie->hdr->used_file_size >= TRIE_COMPRESS_MIN_SIZE) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1509 /* see if we've reached the max. deleted space in file */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1510 max_del_space = trie->hdr->used_file_size / 100 *
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1511 TRIE_COMPRESS_PERCENTAGE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1512 if (trie->hdr->deleted_space > max_del_space)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1513 return TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1514 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1515
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1516 return squat_uidlist_need_compress(trie->uidlist,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1517 current_message_count);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1518 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1519
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1520 static int
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1521 squat_trie_build_flush(struct squat_trie_build_context *ctx, bool finish)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1522 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1523 struct squat_trie *trie = ctx->trie;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1524 uint32_t uidvalidity;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1525
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1526 if (trie->root == NULL) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1527 /* nothing changed */
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1528 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1529 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1530
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1531 if (trie->corrupted)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1532 return -1;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1533
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1534 if (trie_nodes_write(ctx, &uidvalidity) < 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1535 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1536 if (squat_uidlist_flush(trie->uidlist, uidvalidity) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1537 return -1;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1538
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1539 squat_trie_unmap(trie);
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1540 if (squat_trie_map(trie) <= 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1541 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1542
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1543 if (squat_trie_need_compress(trie, (unsigned int)-1)) {
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1544 if (ctx->locked && finish) {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1545 squat_trie_unlock(ctx->trie);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1546 ctx->locked = FALSE;
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1547 }
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1548
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1549 if (squat_trie_compress(trie, NULL) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1550 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1551 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1552 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1553 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1554
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1555 static void squat_trie_compress_chars8(struct trie_node *node)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1556 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1557 uint8_t *chars = NODE_CHARS8(node);
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1558 uint16_t *chars16, *old_chars16 = NODE_CHARS16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1559 struct trie_node **child_src = NODE_CHILDREN8(node);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1560 struct trie_node **child_dest;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1561 unsigned int i, j, old_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1562
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1563 old_count = node->chars_8bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1564 for (i = j = 0; i < old_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1565 if (child_src[i] != NULL)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1566 chars[j++] = chars[i];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1567 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1568
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1569 node->chars_8bit_count = j;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1570 child_dest = NODE_CHILDREN8(node);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1571
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1572 for (i = j = 0; i < old_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1573 if (child_src[i] != NULL)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1574 child_dest[j++] = child_src[i];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1575 }
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1576
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1577 if (node->chars_16bit_count > 0) {
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1578 chars16 = NODE_CHARS16(node, 0);
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1579 memmove(chars16, old_chars16,
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1580 ALIGN(sizeof(*chars16) * node->chars_16bit_count) +
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1581 sizeof(*child_src) * node->chars_16bit_count);
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1582 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1583 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1584
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1585 static void squat_trie_compress_chars16(struct trie_node *node)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1586 {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1587 uint16_t *chars = NODE_CHARS16(node, 0);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1588 struct trie_node **child_src = NODE_CHILDREN16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1589 struct trie_node **child_dest;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1590 unsigned int i, j, old_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1591
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1592 old_count = node->chars_16bit_count;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1593 for (i = j = 0; i < old_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1594 if (child_src[i] != NULL)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1595 chars[j++] = chars[i];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1596 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1597
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1598 node->chars_16bit_count = j;
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1599 child_dest = NODE_CHILDREN16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1600
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1601 for (i = j = 0; i < old_count; i++) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1602 if (child_src[i] != NULL)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1603 child_dest[j++] = child_src[i];
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1604 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1605 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1606
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1607 static void squat_trie_compress_leaf_chars8(struct trie_node *node)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1608 {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1609 uint8_t *chars = NODE_CHARS8(node);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1610 uint32_t *child_src = (uint32_t *)NODE_CHILDREN8(node);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1611 uint32_t *child_dest;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1612 unsigned int i, j, old_count;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1613
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1614 old_count = node->chars_8bit_count;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1615 for (i = j = 0; i < old_count; i++) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1616 if (child_src[i] != 0)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1617 chars[j++] = chars[i];
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1618 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1619
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1620 node->chars_8bit_count = j;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1621 child_dest = (uint32_t *)NODE_CHILDREN8(node);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1622
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1623 for (i = j = 0; i < old_count; i++) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1624 if (child_src[i] != 0)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1625 child_dest[j++] = child_src[i];
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1626 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1627 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1628
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1629 static void squat_trie_compress_leaf_chars16(struct trie_node *node)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1630 {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1631 uint16_t *chars = NODE_CHARS16(node, BLOCK_SIZE);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1632 uint32_t *child_src = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1633 uint32_t *child_dest;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1634 unsigned int i, j, old_count;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1635
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1636 old_count = node->chars_16bit_count;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1637 for (i = j = 0; i < old_count; i++) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1638 if (child_src[i] != 0)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1639 chars[j++] = chars[i];
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1640 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1641
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1642 node->chars_16bit_count = j;
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1643 child_dest = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1644
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1645 for (i = j = 0; i < old_count; i++) {
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1646 if (child_src[i] != 0)
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1647 child_dest[j++] = child_src[i];
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1648 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1649 }
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1650
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1651 static int
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1652 squat_trie_compress_children(struct squat_trie_compress_context *ctx,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1653 struct trie_node **children, unsigned int count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1654 unsigned int level)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1655 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1656 struct trie_node *child_node;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1657 size_t child_idx;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1658 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1659 int ret = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1660 bool need_char_compress = FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1661
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1662 for (i = 0; i < count; i++) {
4901
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1663 if (children[i] == NULL) {
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1664 need_char_compress = TRUE;
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1665 continue;
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1666 }
5e604d6f391e Only ASCII texts were actually indexed. Now UTF-8 input is properly
Timo Sirainen <tss@iki.fi>
parents: 4898
diff changeset
1667
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1668 child_idx = POINTER_CAST_TO(children[i], size_t);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1669 i_assert((child_idx & 1) != 0);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1670 child_idx &= ~1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1671
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1672 if (trie_map_node(ctx->trie, child_idx, level, &child_node) < 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1673 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1674
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1675 ret = squat_trie_compress_node(ctx, child_node, level);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1676 if (child_node->file_offset != 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1677 children[i] = POINTER_CAST(child_node->file_offset | 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1678 else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1679 children[i] = NULL;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1680 need_char_compress = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1681 }
4887
463a4ebba685 Another try at memory reduction / leak fixes
Timo Sirainen <tss@iki.fi>
parents: 4886
diff changeset
1682 i_free(child_node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1683
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1684 if (ret < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1685 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1686 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1687 return need_char_compress ? 0 : 1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1688 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1689
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1690 static int
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1691 squat_trie_compress_leaf_uidlist(struct squat_trie_compress_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1692 struct trie_node *node)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1693 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1694 uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1695 uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1696 unsigned int i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1697 int ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1698 bool compress_chars = FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1699
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1700 for (i = 0; i < node->chars_8bit_count; i++) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1701 ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx8[i]);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1702 if (ret < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1703 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1704 if (ret == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1705 idx8[i] = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1706 compress_chars = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1707 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1708 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1709 if (compress_chars) {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1710 squat_trie_compress_leaf_chars8(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1711 compress_chars = FALSE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1712 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1713 for (i = 0; i < node->chars_16bit_count; i++) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1714 ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx16[i]);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1715 if (ret < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1716 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1717 if (ret == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1718 idx16[i] = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1719 compress_chars = TRUE;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1720 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1721 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1722 if (compress_chars) {
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1723 squat_trie_compress_leaf_chars16(node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1724 node->chars_16bit_count = i;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1725 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1726 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1727 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1728
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1729 static int
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1730 squat_trie_compress_node(struct squat_trie_compress_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1731 struct trie_node *node, unsigned int level)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1732 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1733 struct squat_trie *trie = ctx->trie;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1734 int ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1735
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1736 if (level == BLOCK_SIZE) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1737 if (squat_trie_compress_leaf_uidlist(ctx, node))
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1738 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1739
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1740 if (node->chars_8bit_count == 0 &&
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1741 node->chars_16bit_count == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1742 /* everything expunged */
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1743 ctx->node_count--;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1744 node->file_offset = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1745 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1746 }
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1747 node_pack_leaf(trie->buf, node);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1748 } else {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1749 struct trie_node **children8 = NODE_CHILDREN8(node);
4890
cb3b4153136c 64bit and big endian fixes
Timo Sirainen <tss@iki.fi>
parents: 4887
diff changeset
1750 struct trie_node **children16 = NODE_CHILDREN16(node, 0);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1751
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1752 if ((ret = squat_trie_compress_children(ctx, children8,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1753 node->chars_8bit_count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1754 level + 1)) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1755 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1756 if (ret == 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1757 squat_trie_compress_chars8(node);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1758 if ((ret = squat_trie_compress_children(ctx, children16,
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1759 node->chars_16bit_count,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1760 level + 1)) < 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1761 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1762 if (ret == 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1763 squat_trie_compress_chars16(node);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1764
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1765 if (node->chars_8bit_count == 0 &&
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1766 node->chars_16bit_count == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1767 /* everything expunged */
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1768 ctx->node_count--;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1769 node->file_offset = 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1770 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1771 }
4898
07a038b57946 Optimization.
Timo Sirainen <tss@iki.fi>
parents: 4897
diff changeset
1772
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1773 node_pack(trie->buf, node);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1774 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1775
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1776 if ((ctx->output->offset & 1) != 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1777 o_stream_send(ctx->output, "", 1);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1778 node->file_offset = ctx->output->offset;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1779
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1780 o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1781 return 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1782 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1783
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1784 static int squat_trie_compress_init(struct squat_trie_compress_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1785 struct squat_trie *trie)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1786 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1787 struct squat_trie_header hdr;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1788
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1789 memset(ctx, 0, sizeof(*ctx));
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1790
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1791 ctx->tmp_path = t_strconcat(trie->filepath, ".tmp", NULL);
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1792 ctx->fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1793 if (ctx->fd == -1) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1794 i_error("open(%s, O_CREAT) failed: %m", ctx->tmp_path);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1795 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1796 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1797
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1798 ctx->trie = trie;
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1799 ctx->output = o_stream_create_file(ctx->fd, default_pool, 0, FALSE);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1800 ctx->node_count = trie->hdr->node_count;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1801
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1802 /* write a dummy header first */
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1803 memset(&hdr, 0, sizeof(hdr));
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1804 o_stream_send(ctx->output, &hdr, sizeof(hdr));
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1805 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1806 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1807
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1808 static void
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1809 squat_trie_compress_write_header(struct squat_trie_compress_context *ctx,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1810 struct trie_node *root_node)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1811 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1812 struct squat_trie_header hdr;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1813
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1814 memset(&hdr, 0, sizeof(hdr));
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1815 hdr.version = SQUAT_TRIE_VERSION;
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1816 hdr.uidvalidity = ctx->trie->uidvalidity;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1817 hdr.root_offset = root_node->file_offset;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1818 hdr.used_file_size = ctx->output->offset;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1819 hdr.node_count = ctx->node_count;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1820
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1821 o_stream_seek(ctx->output, 0);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1822 o_stream_send(ctx->output, &hdr, sizeof(hdr));
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1823 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1824
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1825 int squat_trie_compress(struct squat_trie *trie,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1826 const ARRAY_TYPE(seq_range) *existing_uids)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1827 {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1828 struct squat_trie_compress_context ctx;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1829 struct trie_node *node;
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1830 struct file_lock *file_lock = NULL;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1831 unsigned int orig_lock_count;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1832 int ret;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1833
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1834 orig_lock_count = trie->lock_count;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1835 if (squat_trie_lock(trie, F_WRLCK) <= 0)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1836 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1837
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1838 if (squat_trie_compress_init(&ctx, trie) < 0) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1839 squat_trie_unlock(trie);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1840 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1841 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1842
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1843 ret = trie_map_node(trie, trie->hdr->root_offset, 1, &node);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1844 if (ret == 0) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1845 /* do the compression */
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1846 ctx.uidlist_ctx = squat_uidlist_compress_begin(trie->uidlist,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1847 existing_uids);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1848 if ((ret = squat_trie_compress_node(&ctx, node, 1)) < 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1849 squat_uidlist_compress_rollback(&ctx.uidlist_ctx);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1850 else {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1851 ret = squat_uidlist_compress_commit(&ctx.uidlist_ctx);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1852
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1853 squat_trie_compress_write_header(&ctx, node);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1854 }
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1855 }
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1856
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1857 if (ret == 0 && orig_lock_count > 0) {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1858 /* lock the file before renaming so we can keep it locked. */
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1859 if (squat_trie_file_lock(trie, ctx.fd, ctx.tmp_path, F_WRLCK,
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1860 &file_lock) <= 0)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1861 ret = -1;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1862 }
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1863
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1864 if (ret == 0) {
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1865 if (rename(ctx.tmp_path, trie->filepath) < 0) {
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1866 i_error("rename(%s, %s) failed: %m",
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1867 ctx.tmp_path, trie->filepath);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1868 ret = -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1869 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1870 }
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1871
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1872 o_stream_destroy(&ctx.output);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1873 squat_trie_unlock(trie);
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1874
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1875 if (ret < 0) {
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1876 if (file_lock != NULL)
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1877 file_lock_free(&file_lock);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1878 (void)close(ctx.fd);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1879 (void)unlink(ctx.tmp_path);
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1880 } else {
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1881 trie_file_close(trie);
4886
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1882 trie_file_open_fd(trie, ctx.fd);
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1883
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1884 trie->file_lock = file_lock;
ae2114f11a0e Memory leak fixes. Also when building a large mailbox flush once in a while
Timo Sirainen <tss@iki.fi>
parents: 4883
diff changeset
1885 if (squat_trie_map(trie) <= 0)
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1886 return -1;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1887 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1888 return ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1889 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1890
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1891 int squat_trie_mark_having_expunges(struct squat_trie *trie,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1892 const ARRAY_TYPE(seq_range) *existing_uids,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1893 unsigned int current_message_count)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1894 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1895 bool compress;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1896 int ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1897
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1898 if ((ret = squat_trie_lock(trie, F_RDLCK)) <= 0)
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1899 return ret;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1900 compress = squat_trie_need_compress(trie, current_message_count);
4883
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1901 squat_trie_unlock(trie);
d8adbe93c969 Added support for mmap_disable=yes and several other fixes.
Timo Sirainen <tss@iki.fi>
parents: 4879
diff changeset
1902
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1903 ret = squat_uidlist_mark_having_expunges(trie->uidlist, compress);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1904
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1905 if (compress)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1906 ret = squat_trie_compress(trie, existing_uids);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1907 return ret;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1908 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1909
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1910 size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1911 {
4879
Timo Sirainen <tss@iki.fi>
parents: 4878
diff changeset
1912 *count_r = trie->hdr == NULL ? 0 : trie->hdr->node_count;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1913
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1914 return trie->mmap_size;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1915 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1916
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1917 static int squat_trie_lookup_init(struct squat_trie *trie, const char *str,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1918 const uint16_t **data_r, unsigned int *len_r)
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1919 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1920 const uint16_t *data;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1921 unsigned int len = strlen(str);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1922
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1923 if (len < BLOCK_SIZE)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1924 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1925
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1926 data = data_normalize(str, len, trie->buf);
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1927
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1928 /* skip the blocks that can't exist */
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1929 while (!block_want_add(data + len - BLOCK_SIZE)) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1930 if (--len < BLOCK_SIZE)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1931 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1932 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1933
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1934 if (squat_trie_lock(trie, F_RDLCK) <= 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1935 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1936
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1937 *data_r = data;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1938 *len_r = len;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1939 return 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1940 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1941
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1942 static int
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1943 squat_trie_lookup_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1944 const uint16_t *data, unsigned int len)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1945 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1946 uint32_t list;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1947
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1948 list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1949 if (list == 0)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1950 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1951
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1952 if (squat_uidlist_get(trie->uidlist, list, result) < 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1953 squat_trie_set_corrupted(trie, "uidlist offset broken");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1954 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1955 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1956 while (len > BLOCK_SIZE) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1957 len--;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1958
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1959 if (!block_want_add(data + len - BLOCK_SIZE))
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1960 continue;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1961
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1962 list = trie_lookup_node(trie, trie->root,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1963 data + len - BLOCK_SIZE, 1);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1964 if (list == 0) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1965 array_clear(result);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1966 return 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1967 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1968 if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1969 squat_trie_set_corrupted(trie, "uidlist offset broken");
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1970 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1971 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1972 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1973 return array_count(result) > 0 ? 1 : 0;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1974 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1975
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1976 int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1977 const char *str)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1978 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1979 const uint16_t *data;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1980 unsigned int len;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1981 int ret;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1982
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1983 if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1984 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1985
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1986 ret = squat_trie_lookup_locked(trie, result, data, len);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1987 squat_trie_unlock(trie);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1988 return ret;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1989 }
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1990
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1991 static int
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1992 squat_trie_filter_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1993 const uint16_t *data, unsigned int len)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1994 {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1995 uint32_t list;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1996
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1997 for (; len >= BLOCK_SIZE; len--) {
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1998 if (!block_want_add(data + len - BLOCK_SIZE))
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
1999 continue;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2000
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2001 list = trie_lookup_node(trie, trie->root,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2002 data + len - BLOCK_SIZE, 1);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2003 if (list == 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2004 array_clear(result);
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2005 return 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2006 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2007 if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2008 squat_trie_set_corrupted(trie, "uidlist offset broken");
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2009 return -1;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2010 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2011 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2012 return array_count(result) > 0 ? 1 : 0;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2013 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2014
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2015 int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2016 const char *str)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2017 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2018 const uint16_t *data;
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2019 unsigned int len;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2020 int ret;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2021
4878
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2022 if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2023 return -1;
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2024 ret = squat_trie_filter_locked(trie, result, data, len);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2025 squat_trie_unlock(trie);
88a91d9a867b Fixes. Should be pretty much working now.
Timo Sirainen <tss@iki.fi>
parents: 4855
diff changeset
2026 return ret;
4855
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2027 }
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2028
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2029 struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2030 {
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2031 return trie->uidlist;
5bc593f1a8f6 Added "squat" full text search indexer backend. Its name and basic ideas
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2032 }