annotate src/lib-fts/fts-language.c @ 18608:1fc7ae2640b0

lib-fts: fts_language_list_init() API changed to return errors.
author Timo Sirainen <tss@iki.fi>
date Sat, 09 May 2015 19:14:07 +0300
parents 50ef619ce58a
children e3603730b2df
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
4 #include "array.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "fts-language.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6 #include "strfuncs.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7 #include "llist.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10 # include <libexttextcat/textcat.h>
18426
50ef619ce58a lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents: 18417
diff changeset
11 #elif defined (HAVE_FTS_EXTTEXTCAT)
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12 # include <textcat.h>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
13 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
15 #ifndef TEXTCAT_RESULT_UNKNOWN /* old textcat.h has typos */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
16 # ifdef TEXTCAT_RESULT_UNKOWN
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
17 # define TEXTCAT_RESULT_UNKNOWN TEXTCAT_RESULT_UNKOWN
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18 # endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
19 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
20
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
21 #define DETECT_STR_MAX_LEN 200
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
22
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
23 struct fts_language_list {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
24 pool_t pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
25 ARRAY_TYPE(fts_language) languages;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
26 const char *textcat_config;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
27 const char *textcat_datadir;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
28 void *textcat_handle;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
29 bool textcat_failed;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
30 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
31
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
32 const struct fts_language fts_languages[] = {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
33 { "en" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 { "fi" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35 { "fr" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
36 { "de" }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39 const struct fts_language fts_language_data = {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40 "data"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
43 const struct fts_language *fts_language_find(const char *name)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
44 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
45 unsigned int i;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47 for (i = 0; i < N_ELEMENTS(fts_languages); i++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 if (strcmp(fts_languages[i].name, name) == 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 return &fts_languages[i];
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51 return NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
52 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
54 int fts_language_list_init(const char *const *settings,
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
55 struct fts_language_list **list_r,
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
56 const char **error_r)
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
58 struct fts_language_list *lp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 pool_t pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60 unsigned int i;
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
61 const char *conf = NULL, *data = NULL;
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63 for (i = 0; settings[i] != NULL; i += 2) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
64 const char *key = settings[i], *value = settings[i+1];
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
65
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
66 if (strcmp(key, "fts_language_config") == 0)
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67 conf = value;
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
68 else if (strcmp(key, "fts_language_data") == 0)
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 data = value;
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
70 else {
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
71 *error_r = t_strdup_printf("Unknown setting: %s", key);
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
72 return -1;
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
73 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
74 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
75
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
76 pool = pool_alloconly_create("fts_language_list", 128);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
77 lp = p_new(pool, struct fts_language_list, 1);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
78 lp->pool = pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
79 if (conf != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
80 lp->textcat_config = p_strdup(pool, conf);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
81 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
82 lp->textcat_config = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
83 if (data != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
84 lp->textcat_datadir = p_strdup(pool, data);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
85 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
86 lp->textcat_datadir = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
87 p_array_init(&lp->languages, pool, 32);
18608
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
88 *list_r = lp;
1fc7ae2640b0 lib-fts: fts_language_list_init() API changed to return errors.
Timo Sirainen <tss@iki.fi>
parents: 18426
diff changeset
89 return 0;
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
90 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
91
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
92 void fts_language_list_deinit(struct fts_language_list **list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
93 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
94 struct fts_language_list *lp = *list;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
95
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
96 *list = NULL;
18426
50ef619ce58a lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents: 18417
diff changeset
97 #ifdef HAVE_FTS_EXTTEXTCAT
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
98 if (lp->textcat_handle != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
99 textcat_Done(lp->textcat_handle);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
100 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
101 pool_unref(&lp->pool);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
102 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
103
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
104 static const struct fts_language *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
105 fts_language_list_find(struct fts_language_list *list, const char *name)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
106 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
107 const struct fts_language *const *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
108
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
109 array_foreach(&list->languages, langp) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
110 if (strcmp((*langp)->name, name) == 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
111 return *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
112 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
113 return NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
114 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
115
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
116 void fts_language_list_add(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117 const struct fts_language *lang)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
118 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
119 i_assert(fts_language_list_find(list, lang->name) == NULL);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
120 array_append(&list->languages, &lang, 1);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
122
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
123 bool fts_language_list_add_names(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
124 const char *names,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 const char **unknown_name_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
126 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
127 const char *const *langs;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
128 const struct fts_language *lang;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
130 for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
131 lang = fts_language_find(*langs);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
132 if (lang == NULL) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
133 /* unknown language */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
134 *unknown_name_r = *langs;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
135 return FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
136 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137 if (fts_language_list_find(list, lang->name) == NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
138 fts_language_list_add(list, lang);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
139 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
140 return TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
141 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
143 const ARRAY_TYPE(fts_language) *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
144 fts_language_list_get_all(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
145 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
146 return &list->languages;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
147 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
148
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
149 const struct fts_language *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150 fts_language_list_get_first(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
151 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
152 const struct fts_language *const *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
153
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154 langp = array_idx(&list->languages, 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155 return *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
156 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157
18426
50ef619ce58a lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents: 18417
diff changeset
158 #ifdef HAVE_FTS_EXTTEXTCAT
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 static bool fts_language_match_lists(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160 candidate_t *candp, int candp_len,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161 const struct fts_language **lang_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 const char *name;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
165 for (int i = 0; i < candp_len; i++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
166 /* name is <lang>-<optional country or characterset>-<encoding>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167 eg, fi--utf8 or pt-PT-utf8 */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
168 name = t_strcut(candp[i].name, '-');
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169 if ((*lang_r = fts_language_list_find(list, name)) != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
170 return TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
171 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 return FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
173 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
175
18426
50ef619ce58a lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents: 18417
diff changeset
176 #ifdef HAVE_FTS_EXTTEXTCAT
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177 static int fts_language_textcat_init(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
178 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179 const char *config_path;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
180 const char *data_dir;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
181
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
182 if (list->textcat_handle != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
183 return 0;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
184
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
185 if (list->textcat_failed)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
186 return -1;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
187
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
188 config_path = list->textcat_config != NULL ? list->textcat_config :
18417
cf04173f3f69 lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents: 18414
diff changeset
189 TEXTCAT_DATADIR"/fpdb.conf";
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
190 data_dir = list->textcat_datadir != NULL ? list->textcat_datadir :
18417
cf04173f3f69 lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents: 18414
diff changeset
191 TEXTCAT_DATADIR"/";
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
192 list->textcat_handle = special_textcat_Init(config_path, data_dir);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
193 if (list->textcat_handle == NULL) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
194 i_error("special_textcat_Init(%s, %s) failed",
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
195 config_path, data_dir);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
196 list->textcat_failed = TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
197 return -1;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
198 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
199 /* The textcat minimum document size could be set here. It
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
200 currently defaults to 3. UTF8 is enabled by default. */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
201 return 0;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
202 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
203 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
204
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
205 static enum fts_language_result
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
206 fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
207 const unsigned char *text ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
208 size_t size ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
209 const struct fts_language **lang_r ATTR_UNUSED)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
210 {
18426
50ef619ce58a lib-fts requires libexttextcat actually - don't even try to use textcat for it.
Timo Sirainen <tss@iki.fi>
parents: 18417
diff changeset
211 #ifdef HAVE_FTS_EXTTEXTCAT
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
212 candidate_t *candp; /* textcat candidate result array pointer */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
213 int cnt;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
214 bool match = FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
215
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
216 if (fts_language_textcat_init(list) < 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
217 return FTS_LANGUAGE_RESULT_ERROR;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
218
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
219 candp = textcat_GetClassifyFullOutput(list->textcat_handle);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
220 if (candp == NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
221 i_fatal_status(FATAL_OUTOFMEM, "textcat_GetCLassifyFullOutput failed: malloc() returned NULL");
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
222 cnt = textcat_ClassifyFull(list->textcat_handle, (const void *)text,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
223 I_MIN(size, DETECT_STR_MAX_LEN), candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
224 if (cnt > 0) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
225 T_BEGIN {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
226 match = fts_language_match_lists(list, candp, cnt, lang_r);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
227 } T_END;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
228 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
229 if (match)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
230 return FTS_LANGUAGE_RESULT_OK;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
231 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
232 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
233 } else {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
234 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
235 switch (cnt) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
236 case TEXTCAT_RESULT_SHORT:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
237 i_assert(size < DETECT_STR_MAX_LEN);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
238 return FTS_LANGUAGE_RESULT_SHORT;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
239 case TEXTCAT_RESULT_UNKNOWN:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
240 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
241 default:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
242 i_unreached();
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
243 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
244 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
245 #else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
246 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
247 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
248 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
249
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
250 enum fts_language_result
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
251 fts_language_detect(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
252 const unsigned char *text ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
253 size_t size ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
254 const struct fts_language **lang_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
255 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
256 i_assert(array_count(&list->languages) > 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
257
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
258 /* if there's only a single wanted language, return it always. */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
259 if (array_count(&list->languages) == 1) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
260 const struct fts_language *const *langp =
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
261 array_idx(&list->languages, 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
262 *lang_r = *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
263 return FTS_LANGUAGE_RESULT_OK;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
264 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
265 return fts_language_detect_textcat(list, text, size, lang_r);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
266 }