annotate src/lib-fts/fts-language.c @ 18417:cf04173f3f69

lib-fts: Fixed default textcat datadir paths.
author Timo Sirainen <tss@iki.fi>
date Mon, 20 Apr 2015 17:01:12 +0300
parents 81e5b977e5c5
children 50ef619ce58a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
4 #include "array.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "fts-language.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6 #include "strfuncs.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7 #include "llist.h"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10 # include <libexttextcat/textcat.h>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11 # define HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12 #elif defined (HAVE_LIBTEXTCAT_TEXTCAT_H)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
13 # include <libtextcat/textcat.h>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14 # define HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
15 #elif defined (HAVE_FTS_TEXTCAT)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
16 # include <textcat.h>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
17 # define HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
19
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
20 #ifndef TEXTCAT_RESULT_UNKNOWN /* old textcat.h has typos */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
21 # ifdef TEXTCAT_RESULT_UNKOWN
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
22 # define TEXTCAT_RESULT_UNKNOWN TEXTCAT_RESULT_UNKOWN
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
23 # endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
24 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
25
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
26 #define DETECT_STR_MAX_LEN 200
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
27
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
28 struct fts_language_list {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
29 pool_t pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
30 ARRAY_TYPE(fts_language) languages;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
31 const char *textcat_config;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
32 const char *textcat_datadir;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
33 void *textcat_handle;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 bool textcat_failed;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
36
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37 const struct fts_language fts_languages[] = {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38 { "en" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39 { "fi" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40 { "fr" },
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41 { "de" }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
43
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
44 const struct fts_language fts_language_data = {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
45 "data"
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46 };
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 const struct fts_language *fts_language_find(const char *name)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 unsigned int i;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
52 for (i = 0; i < N_ELEMENTS(fts_languages); i++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53 if (strcmp(fts_languages[i].name, name) == 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
54 return &fts_languages[i];
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
55 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
56 return NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
58
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 struct fts_language_list *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60 fts_language_list_init(const char *const *settings)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
61 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62 struct fts_language_list *lp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63 pool_t pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
64 unsigned int i;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
65 const char *conf = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
66 const char *data = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
68 for (i = 0; settings[i] != NULL; i += 2) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 const char *key = settings[i], *value = settings[i+1];
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
70
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
71 if (strcmp(key, "fts_language_config") == 0) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
72 conf = value;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
73 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
74 else if (strcmp(key, "fts_language_data") == 0) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
75 data = value;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
76 } else {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
77 i_debug("Unknown setting: %s", key);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
78 return NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
79 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
80 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
81
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
82 pool = pool_alloconly_create("fts_language_list", 128);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
83 lp = p_new(pool, struct fts_language_list, 1);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
84 lp->pool = pool;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
85 if (conf != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
86 lp->textcat_config = p_strdup(pool, conf);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
87 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
88 lp->textcat_config = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
89 if (data != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
90 lp->textcat_datadir = p_strdup(pool, data);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
91 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
92 lp->textcat_datadir = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
93 p_array_init(&lp->languages, pool, 32);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
94 return lp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
95 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
96
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
97 void fts_language_list_deinit(struct fts_language_list **list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
98 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
99 struct fts_language_list *lp = *list;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
100
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
101 *list = NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
102 #ifdef HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
103 if (lp->textcat_handle != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
104 textcat_Done(lp->textcat_handle);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
105 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
106 pool_unref(&lp->pool);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
107 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
108
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
109 static const struct fts_language *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
110 fts_language_list_find(struct fts_language_list *list, const char *name)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
111 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
112 const struct fts_language *const *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
113
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
114 array_foreach(&list->languages, langp) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
115 if (strcmp((*langp)->name, name) == 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
116 return *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
118 return NULL;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
119 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
120
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121 void fts_language_list_add(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
122 const struct fts_language *lang)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
123 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
124 i_assert(fts_language_list_find(list, lang->name) == NULL);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 array_append(&list->languages, &lang, 1);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
126 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
127
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
128 bool fts_language_list_add_names(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129 const char *names,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
130 const char **unknown_name_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
131 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
132 const char *const *langs;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
133 const struct fts_language *lang;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
134
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
135 for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
136 lang = fts_language_find(*langs);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137 if (lang == NULL) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
138 /* unknown language */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
139 *unknown_name_r = *langs;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
140 return FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
141 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142 if (fts_language_list_find(list, lang->name) == NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
143 fts_language_list_add(list, lang);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
144 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
145 return TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
146 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
147
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
148 const ARRAY_TYPE(fts_language) *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
149 fts_language_list_get_all(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
151 return &list->languages;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
152 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
153
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154 const struct fts_language *
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155 fts_language_list_get_first(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
156 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157 const struct fts_language *const *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 langp = array_idx(&list->languages, 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160 return *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 #ifdef HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164 static bool fts_language_match_lists(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
165 candidate_t *candp, int candp_len,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
166 const struct fts_language **lang_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
168 const char *name;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
170 for (int i = 0; i < candp_len; i++) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
171 /* name is <lang>-<optional country or characterset>-<encoding>
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 eg, fi--utf8 or pt-PT-utf8 */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
173 name = t_strcut(candp[i].name, '-');
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174 if ((*lang_r = fts_language_list_find(list, name)) != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
175 return TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
176 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177 return FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
178 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
180
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
181 #ifdef HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
182 static int fts_language_textcat_init(struct fts_language_list *list)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
183 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
184 const char *config_path;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
185 const char *data_dir;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
186
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
187 if (list->textcat_handle != NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
188 return 0;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
189
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
190 if (list->textcat_failed)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
191 return -1;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
192
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
193 config_path = list->textcat_config != NULL ? list->textcat_config :
18417
cf04173f3f69 lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents: 18414
diff changeset
194 TEXTCAT_DATADIR"/fpdb.conf";
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
195 data_dir = list->textcat_datadir != NULL ? list->textcat_datadir :
18417
cf04173f3f69 lib-fts: Fixed default textcat datadir paths.
Timo Sirainen <tss@iki.fi>
parents: 18414
diff changeset
196 TEXTCAT_DATADIR"/";
18414
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
197 list->textcat_handle = special_textcat_Init(config_path, data_dir);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
198 if (list->textcat_handle == NULL) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
199 i_error("special_textcat_Init(%s, %s) failed",
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
200 config_path, data_dir);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
201 list->textcat_failed = TRUE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
202 return -1;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
203 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
204 /* The textcat minimum document size could be set here. It
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
205 currently defaults to 3. UTF8 is enabled by default. */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
206 return 0;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
207 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
208 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
209
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
210 static enum fts_language_result
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
211 fts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
212 const unsigned char *text ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
213 size_t size ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
214 const struct fts_language **lang_r ATTR_UNUSED)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
215 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
216 #ifdef HAVE_TEXTCAT
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
217 candidate_t *candp; /* textcat candidate result array pointer */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
218 int cnt;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
219 bool match = FALSE;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
220
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
221 if (fts_language_textcat_init(list) < 0)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
222 return FTS_LANGUAGE_RESULT_ERROR;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
223
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
224 candp = textcat_GetClassifyFullOutput(list->textcat_handle);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
225 if (candp == NULL)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
226 i_fatal_status(FATAL_OUTOFMEM, "textcat_GetCLassifyFullOutput failed: malloc() returned NULL");
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
227 cnt = textcat_ClassifyFull(list->textcat_handle, (const void *)text,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
228 I_MIN(size, DETECT_STR_MAX_LEN), candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
229 if (cnt > 0) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
230 T_BEGIN {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
231 match = fts_language_match_lists(list, candp, cnt, lang_r);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
232 } T_END;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
233 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
234 if (match)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
235 return FTS_LANGUAGE_RESULT_OK;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
236 else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
237 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
238 } else {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
239 textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
240 switch (cnt) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
241 case TEXTCAT_RESULT_SHORT:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
242 i_assert(size < DETECT_STR_MAX_LEN);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
243 return FTS_LANGUAGE_RESULT_SHORT;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
244 case TEXTCAT_RESULT_UNKNOWN:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
245 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
246 default:
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
247 i_unreached();
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
248 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
249 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
250 #else
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
251 return FTS_LANGUAGE_RESULT_UNKNOWN;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
252 #endif
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
253 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
254
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
255 enum fts_language_result
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
256 fts_language_detect(struct fts_language_list *list,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
257 const unsigned char *text ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
258 size_t size ATTR_UNUSED,
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
259 const struct fts_language **lang_r)
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
260 {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
261 i_assert(array_count(&list->languages) > 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
262
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
263 /* if there's only a single wanted language, return it always. */
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
264 if (array_count(&list->languages) == 1) {
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
265 const struct fts_language *const *langp =
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
266 array_idx(&list->languages, 0);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
267 *lang_r = *langp;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
268 return FTS_LANGUAGE_RESULT_OK;
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
269 }
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
270 return fts_language_detect_textcat(list, text, size, lang_r);
81e5b977e5c5 Initial import for lib-fts.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
271 }