Mercurial > dovecot > core-2.2
annotate src/lib-fts/fts-language.c @ 22656:1789bf2a1e01
director: Make sure HOST-RESET-USERS isn't used with max_moving_users=0
The reset command would just hang in that case. doveadm would never have
sent this, so this is just an extra sanity check.
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Sun, 05 Nov 2017 23:51:56 +0200 |
parents | 2e2563132d5f |
children | cb108f786fb4 |
rev | line source |
---|---|
21390
2e2563132d5f
Updated copyright notices to include the year 2017.
Stephan Bosch <stephan.bosch@dovecot.fi>
parents:
19552
diff
changeset
|
1 /* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */ |
18414 | 2 |
3 #include "lib.h" | |
4 #include "array.h" | |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
5 #include "llist.h" |
18414 | 6 #include "fts-language.h" |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
7 |
18414 | 8 |
9 #ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H | |
10 # include <libexttextcat/textcat.h> | |
18426
50ef619ce58a
lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents:
18417
diff
changeset
|
11 #elif defined (HAVE_FTS_EXTTEXTCAT) |
18414 | 12 # include <textcat.h> |
13 #endif | |
14 | |
15 #ifndef TEXTCAT_RESULT_UNKNOWN /* old textcat.h has typos */ | |
16 # ifdef TEXTCAT_RESULT_UNKOWN | |
17 # define TEXTCAT_RESULT_UNKNOWN TEXTCAT_RESULT_UNKOWN | |
18 # endif | |
19 #endif | |
20 | |
21 #define DETECT_STR_MAX_LEN 200 | |
22 | |
23 struct fts_language_list { | |
24 pool_t pool; | |
25 ARRAY_TYPE(fts_language) languages; | |
26 const char *textcat_config; | |
27 const char *textcat_datadir; | |
28 void *textcat_handle; | |
29 bool textcat_failed; | |
30 }; | |
31 | |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
32 pool_t fts_languages_pool; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
33 ARRAY_TYPE(fts_language) fts_languages; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
34 |
19373
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
35 /* ISO 639-1 alpha 2 codes for languages */ |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
36 const struct fts_language fts_languages_builtin [] = { |
19373
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
37 { "da" }, /* Danish */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
38 { "de" }, /* German */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
39 { "en" }, /* English */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
40 { "es" }, /* Spanish */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
41 { "fi" }, /* Finnish */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
42 { "fr" }, /* French */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
43 { "it" }, /* Italian */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
44 { "nl" }, /* Dutch */ |
19374
301d48ef7398
lib-fts: Add Norwegian.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19373
diff
changeset
|
45 { "no" }, /* Both Bokmal and Nynorsk are detected as Norwegian */ |
19373
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
46 { "pt" }, /* Portuguese */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
47 { "ro" }, /* Romanian */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
48 { "ru" }, /* Russian */ |
f31fadf622f2
lib-fts: Add comment to language names.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19372
diff
changeset
|
49 { "sv" } /* Swedish */ |
18414 | 50 }; |
51 | |
52 const struct fts_language fts_language_data = { | |
53 "data" | |
54 }; | |
55 | |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
56 void fts_languages_init(void) |
18414 | 57 { |
58 unsigned int i; | |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
59 const struct fts_language *lp; |
18414 | 60 |
19379
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
61 fts_languages_pool = pool_alloconly_create("fts_language", |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
62 sizeof(fts_languages_builtin)); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
63 p_array_init(&fts_languages, fts_languages_pool, |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
64 N_ELEMENTS(fts_languages_builtin)); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
65 for (i = 0; i < N_ELEMENTS(fts_languages_builtin); i++){ |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
66 lp = &fts_languages_builtin[i]; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
67 array_append(&fts_languages, &lp, 1); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
68 } |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
69 } |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
70 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
71 void fts_languages_deinit(void) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
72 { |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
73 if (fts_languages_pool != NULL) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
74 pool_unref(&fts_languages_pool); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
75 } |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
76 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
77 void fts_language_register(const char *name) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
78 { |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
79 struct fts_language *lang; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
80 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
81 if (fts_language_find(name) != NULL) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
82 return; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
83 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
84 lang = p_new(fts_languages_pool, struct fts_language, 1); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
85 lang->name = p_strdup(fts_languages_pool, name); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
86 array_append(&fts_languages, (const struct fts_language **)&lang, 1); |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
87 } |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
88 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
89 const struct fts_language *fts_language_find(const char *name) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
90 { |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
91 const struct fts_language *const *langp = NULL; |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
92 |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
93 array_foreach(&fts_languages, langp) { |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
94 if (strcmp((*langp)->name, name) == 0) |
92aa48461150
lib-fts: Added fts_language_register() to register more languages in plugins.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19374
diff
changeset
|
95 return *langp; |
18414 | 96 } |
97 return NULL; | |
98 } | |
99 | |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
100 int fts_language_list_init(const char *const *settings, |
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
101 struct fts_language_list **list_r, |
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
102 const char **error_r) |
18414 | 103 { |
104 struct fts_language_list *lp; | |
105 pool_t pool; | |
106 unsigned int i; | |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
107 const char *conf = NULL, *data = NULL; |
18414 | 108 |
109 for (i = 0; settings[i] != NULL; i += 2) { | |
110 const char *key = settings[i], *value = settings[i+1]; | |
111 | |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
112 if (strcmp(key, "fts_language_config") == 0) |
18414 | 113 conf = value; |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
114 else if (strcmp(key, "fts_language_data") == 0) |
18414 | 115 data = value; |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
116 else { |
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
117 *error_r = t_strdup_printf("Unknown setting: %s", key); |
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
118 return -1; |
18414 | 119 } |
120 } | |
121 | |
122 pool = pool_alloconly_create("fts_language_list", 128); | |
123 lp = p_new(pool, struct fts_language_list, 1); | |
124 lp->pool = pool; | |
125 if (conf != NULL) | |
126 lp->textcat_config = p_strdup(pool, conf); | |
127 else | |
128 lp->textcat_config = NULL; | |
129 if (data != NULL) | |
130 lp->textcat_datadir = p_strdup(pool, data); | |
131 else | |
132 lp->textcat_datadir = NULL; | |
133 p_array_init(&lp->languages, pool, 32); | |
18608
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
134 *list_r = lp; |
1fc7ae2640b0
lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents:
18426
diff
changeset
|
135 return 0; |
18414 | 136 } |
137 | |
138 void fts_language_list_deinit(struct fts_language_list **list) | |
139 { | |
140 struct fts_language_list *lp = *list; | |
141 | |
142 *list = NULL; | |
18426
50ef619ce58a
lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents:
18417
diff
changeset
|
143 #ifdef HAVE_FTS_EXTTEXTCAT |
18414 | 144 if (lp->textcat_handle != NULL) |
145 textcat_Done(lp->textcat_handle); | |
146 #endif | |
147 pool_unref(&lp->pool); | |
148 } | |
149 | |
150 static const struct fts_language * | |
151 fts_language_list_find(struct fts_language_list *list, const char *name) | |
152 { | |
153 const struct fts_language *const *langp; | |
154 | |
155 array_foreach(&list->languages, langp) { | |
156 if (strcmp((*langp)->name, name) == 0) | |
157 return *langp; | |
158 } | |
159 return NULL; | |
160 } | |
161 | |
162 void fts_language_list_add(struct fts_language_list *list, | |
163 const struct fts_language *lang) | |
164 { | |
165 i_assert(fts_language_list_find(list, lang->name) == NULL); | |
166 array_append(&list->languages, &lang, 1); | |
167 } | |
168 | |
169 bool fts_language_list_add_names(struct fts_language_list *list, | |
170 const char *names, | |
171 const char **unknown_name_r) | |
172 { | |
173 const char *const *langs; | |
174 const struct fts_language *lang; | |
175 | |
176 for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) { | |
177 lang = fts_language_find(*langs); | |
178 if (lang == NULL) { | |
179 /* unknown language */ | |
180 *unknown_name_r = *langs; | |
181 return FALSE; | |
182 } | |
183 if (fts_language_list_find(list, lang->name) == NULL) | |
184 fts_language_list_add(list, lang); | |
185 } | |
186 return TRUE; | |
187 } | |
188 | |
189 const ARRAY_TYPE(fts_language) * | |
190 fts_language_list_get_all(struct fts_language_list *list) | |
191 { | |
192 return &list->languages; | |
193 } | |
194 | |
195 const struct fts_language * | |
196 fts_language_list_get_first(struct fts_language_list *list) | |
197 { | |
198 const struct fts_language *const *langp; | |
199 | |
200 langp = array_idx(&list->languages, 0); | |
201 return *langp; | |
202 } | |
203 | |
18426
50ef619ce58a
lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents:
18417
diff
changeset
|
204 #ifdef HAVE_FTS_EXTTEXTCAT |
18414 | 205 static bool fts_language_match_lists(struct fts_language_list *list, |
206 candidate_t *candp, int candp_len, | |
207 const struct fts_language **lang_r) | |
208 { | |
209 const char *name; | |
210 | |
211 for (int i = 0; i < candp_len; i++) { | |
212 /* name is <lang>-<optional country or characterset>-<encoding> | |
213 eg, fi--utf8 or pt-PT-utf8 */ | |
214 name = t_strcut(candp[i].name, '-'); | |
19374
301d48ef7398
lib-fts: Add Norwegian.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19373
diff
changeset
|
215 |
301d48ef7398
lib-fts: Add Norwegian.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19373
diff
changeset
|
216 /* For Norwegian we treat both bokmal and nynorsk as "no". */ |
301d48ef7398
lib-fts: Add Norwegian.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19373
diff
changeset
|
217 if (strcmp(name, "nb") == 0 || strcmp(name, "nn") == 0) |
301d48ef7398
lib-fts: Add Norwegian.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
19373
diff
changeset
|
218 name = "no"; |
18414 | 219 if ((*lang_r = fts_language_list_find(list, name)) != NULL) |
220 return TRUE; | |
221 } | |
222 return FALSE; | |
223 } | |
224 #endif | |
225 | |
18426
50ef619ce58a
lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents:
18417
diff
changeset
|
226 #ifdef HAVE_FTS_EXTTEXTCAT |
18414 | 227 static int fts_language_textcat_init(struct fts_language_list *list) |
228 { | |
229 const char *config_path; | |
230 const char *data_dir; | |
231 | |
232 if (list->textcat_handle != NULL) | |
233 return 0; | |
234 | |
235 if (list->textcat_failed) | |
236 return -1; | |
237 | |
238 config_path = list->textcat_config != NULL ? list->textcat_config : | |
18417
cf04173f3f69
lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents:
18414
diff
changeset
|
239 TEXTCAT_DATADIR"/fpdb.conf"; |
18414 | 240 data_dir = list->textcat_datadir != NULL ? list->textcat_datadir : |
18417
cf04173f3f69
lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents:
18414
diff
changeset
|
241 TEXTCAT_DATADIR"/"; |
18414 | 242 list->textcat_handle = special_textcat_Init(config_path, data_dir); |
243 if (list->textcat_handle == NULL) { | |
244 i_error("special_textcat_Init(%s, %s) failed", | |
245 config_path, data_dir); | |
246 list->textcat_failed = TRUE; | |
247 return -1; | |
248 } | |
249 /* The textcat minimum document size could be set here. It | |
250 currently defaults to 3. UTF8 is enabled by default. */ | |
251 return 0; | |
252 } | |
253 #endif | |
254 | |
255 static enum fts_language_result | |
256 fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED, | |
257 const unsigned char *text ATTR_UNUSED, | |
258 size_t size ATTR_UNUSED, | |
259 const struct fts_language **lang_r ATTR_UNUSED) | |
260 { | |
18426
50ef619ce58a
lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents:
18417
diff
changeset
|
261 #ifdef HAVE_FTS_EXTTEXTCAT |
18414 | 262 candidate_t *candp; /* textcat candidate result array pointer */ |
263 int cnt; | |
264 bool match = FALSE; | |
265 | |
266 if (fts_language_textcat_init(list) < 0) | |
267 return FTS_LANGUAGE_RESULT_ERROR; | |
268 | |
269 candp = textcat_GetClassifyFullOutput(list->textcat_handle); | |
270 if (candp == NULL) | |
271 i_fatal_status(FATAL_OUTOFMEM, "textcat_GetCLassifyFullOutput failed: malloc() returned NULL"); | |
272 cnt = textcat_ClassifyFull(list->textcat_handle, (const void *)text, | |
273 I_MIN(size, DETECT_STR_MAX_LEN), candp); | |
274 if (cnt > 0) { | |
275 T_BEGIN { | |
276 match = fts_language_match_lists(list, candp, cnt, lang_r); | |
277 } T_END; | |
278 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp); | |
279 if (match) | |
280 return FTS_LANGUAGE_RESULT_OK; | |
281 else | |
282 return FTS_LANGUAGE_RESULT_UNKNOWN; | |
283 } else { | |
284 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp); | |
285 switch (cnt) { | |
286 case TEXTCAT_RESULT_SHORT: | |
287 i_assert(size < DETECT_STR_MAX_LEN); | |
288 return FTS_LANGUAGE_RESULT_SHORT; | |
289 case TEXTCAT_RESULT_UNKNOWN: | |
290 return FTS_LANGUAGE_RESULT_UNKNOWN; | |
291 default: | |
292 i_unreached(); | |
293 } | |
294 } | |
295 #else | |
296 return FTS_LANGUAGE_RESULT_UNKNOWN; | |
297 #endif | |
298 } | |
299 | |
300 enum fts_language_result | |
301 fts_language_detect(struct fts_language_list *list, | |
302 const unsigned char *text ATTR_UNUSED, | |
303 size_t size ATTR_UNUSED, | |
304 const struct fts_language **lang_r) | |
305 { | |
306 i_assert(array_count(&list->languages) > 0); | |
307 | |
308 /* if there's only a single wanted language, return it always. */ | |
309 if (array_count(&list->languages) == 1) { | |
310 const struct fts_language *const *langp = | |
311 array_idx(&list->languages, 0); | |
312 *lang_r = *langp; | |
313 return FTS_LANGUAGE_RESULT_OK; | |
314 } | |
315 return fts_language_detect_textcat(list, text, size, lang_r); | |
316 } |