changeset 20248:e0c290c2166c

lib-fts: Move stopwords to subdirectory. All files incluided in dist are explicitly mentioned. The whole subdirectory 'stopwords' could also be distributed, but that is more error prone.
author Teemu Huovila <teemu.huovila@dovecot.fi>
date Mon, 30 May 2016 11:54:26 +0300
parents 7a06e0c5a1fc
children dc6814d5683d
files src/lib-fts/Makefile.am src/lib-fts/stopwords/stopwords_en.txt src/lib-fts/stopwords/stopwords_fi.txt src/lib-fts/stopwords/stopwords_fr.txt src/lib-fts/stopwords/stopwords_no.txt src/lib-fts/stopwords/stopwords_sv.txt src/lib-fts/stopwords_en.txt src/lib-fts/stopwords_fi.txt src/lib-fts/stopwords_fr.txt src/lib-fts/stopwords_no.txt src/lib-fts/stopwords_sv.txt
diffstat 11 files changed, 662 insertions(+), 662 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/Makefile.am	Thu Jun 02 00:52:37 2016 +0300
+++ b/src/lib-fts/Makefile.am	Mon May 30 11:54:26 2016 +0300
@@ -12,15 +12,15 @@
 	$(LIBICU_CFLAGS) \
 	-DUDHRDIR=\""$(top_srcdir)/src/lib-fts"\" \
 	-DDATADIR=\"$(pkgdatadir)\" \
-	-DTEST_STOPWORDS_DIR=\""$(top_srcdir)/src/lib-fts"\"
+	-DTEST_STOPWORDS_DIR=\""$(top_srcdir)/src/lib-fts/stopwords"\"
 
 stopwordsdir = $(datadir)/${PACKAGE_TARNAME}/stopwords
 dist_stopwords_DATA = \
-	stopwords_en.txt \
-	stopwords_fi.txt \
-	stopwords_fr.txt \
-	stopwords_no.txt \
-	stopwords_sv.txt
+	stopwords/stopwords_en.txt \
+	stopwords/stopwords_fi.txt \
+	stopwords/stopwords_fr.txt \
+	stopwords/stopwords_no.txt \
+	stopwords/stopwords_sv.txt
 
 BUILT_SOURCES = word-boundary-data.c word-break-data.c
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/stopwords/stopwords_en.txt	Mon May 30 11:54:26 2016 +0300
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/stopwords/stopwords_fi.txt	Mon May 30 11:54:26 2016 +0300
@@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/stopwords/stopwords_fr.txt	Mon May 30 11:54:26 2016 +0300
@@ -0,0 +1,184 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  `of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+cela           |  that
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/stopwords/stopwords_no.txt	Mon May 30 11:54:26 2016 +0300
@@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+på             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+så             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+nå             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sjøl           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+når            | when
+være           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+å              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+sånn           | such a
+inni           | inside/within
+mellom         | between
+vår            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+både           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+før            | before
+mange          | many
+også           | also
+slik           | just
+vært           | been
+være           | to be
+båe            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+då             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hjå            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/stopwords/stopwords_sv.txt	Mon May 30 11:54:26 2016 +0300
@@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  så = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+på             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+för            | for
+så             | so (also: seed)
+till           | to
+är             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+då             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte någon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+där            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+något          | some etc
+från           | from, off
+ut             | out
+när            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+över           | over
+än             | than
+dig            | you
+kan            | can
+sina           | his
+här            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+någon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+själv          | myself, yourself etc
+detta          | this/that
+åt             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+några          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+sådan          | such a
+vår            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+sådant         | such a
+varför         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+sådana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+vårt           | our
+våra           | our
+ert            | your
+era            | your
+vilkas         | whose
+
--- a/src/lib-fts/stopwords_en.txt	Thu Jun 02 00:52:37 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
--- a/src/lib-fts/stopwords_fi.txt	Thu Jun 02 00:52:37 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
- 
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole        | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en         | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
-minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
-sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
-hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
-me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
-te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
-he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
-
-tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
-tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
-se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
-nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
-nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
-ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
-
-kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
-mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
-mitkä                                                                                    | (pl)
-
-joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
-jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
-
-| conjunctions
-
-että   | that
-ja     | and
-jos    | if
-koska  | because
-kuin   | than
-mutta  | but
-niin   | so
-sekä   | and
-sillä  | for
-tai    | or
-vaan   | but
-vai    | or
-vaikka | although
-
-
-| prepositions
-
-kanssa  | with
-mukaan  | according to
-noin    | about
-poikki  | across
-yli     | over, across
-
-| other
-
-kun    | when
-niin   | so
-nyt    | now
-itse   | self
-
--- a/src/lib-fts/stopwords_fr.txt	Thu Jun 02 00:52:37 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au             |  a + le
-aux            |  a + les
-avec           |  with
-ce             |  this
-ces            |  these
-dans           |  with
-de             |  of
-des            |  de + les
-du             |  de + le
-elle           |  she
-en             |  `of them' etc
-et             |  and
-eux            |  them
-il             |  he
-je             |  I
-la             |  the
-le             |  the
-leur           |  their
-lui            |  him
-ma             |  my (fem)
-mais           |  but
-me             |  me
-même           |  same; as in moi-même (myself) etc
-mes            |  me (pl)
-moi            |  me
-mon            |  my (masc)
-ne             |  not
-nos            |  our (pl)
-notre          |  our
-nous           |  we
-on             |  one
-ou             |  where
-par            |  by
-pas            |  not
-pour           |  for
-qu             |  que before vowel
-que            |  that
-qui            |  who
-sa             |  his, her (fem)
-se             |  oneself
-ses            |  his (pl)
-son            |  his, her (masc)
-sur            |  on
-ta             |  thy (fem)
-te             |  thee
-tes            |  thy (pl)
-toi            |  thee
-ton            |  thy (masc)
-tu             |  thou
-un             |  a
-une            |  a
-vos            |  your (pl)
-votre          |  your
-vous           |  you
-
-               |  single letter forms
-
-c              |  c'
-d              |  d'
-j              |  j'
-l              |  l'
-à              |  to, at
-m              |  m'
-n              |  n'
-s              |  s'
-t              |  t'
-y              |  there
-
-               | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
-               | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
-               | Later additions (from Jean-Christophe Deschamps)
-ceci           |  this
-cela           |  that
-celà           |  that
-cet            |  this
-cette          |  this
-ici            |  here
-ils            |  they
-les            |  the (pl)
-leurs          |  their (pl)
-quel           |  which
-quels          |  which
-quelle         |  which
-quelles        |  which
-sans           |  without
-soi            |  oneself
-
--- a/src/lib-fts/stopwords_no.txt	Thu Jun 02 00:52:37 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
-
-og             | and
-i              | in
-jeg            | I
-det            | it/this/that
-at             | to (w. inf.)
-en             | a/an
-et             | a/an
-den            | it/this/that
-til            | to
-er             | is/am/are
-som            | who/that
-på             | on
-de             | they / you(formal)
-med            | with
-han            | he
-av             | of
-ikke           | not
-ikkje          | not *
-der            | there
-så             | so
-var            | was/were
-meg            | me
-seg            | you
-men            | but
-ett            | one
-har            | have
-om             | about
-vi             | we
-min            | my
-mitt           | my
-ha             | have
-hadde          | had
-hun            | she
-nå             | now
-over           | over
-da             | when/as
-ved            | by/know
-fra            | from
-du             | you
-ut             | out
-sin            | your
-dem            | them
-oss            | us
-opp            | up
-man            | you/one
-kan            | can
-hans           | his
-hvor           | where
-eller          | or
-hva            | what
-skal           | shall/must
-selv           | self (reflective)
-sjøl           | self (reflective)
-her            | here
-alle           | all
-vil            | will
-bli            | become
-ble            | became
-blei           | became *
-blitt          | have become
-kunne          | could
-inn            | in
-når            | when
-være           | be
-kom            | come
-noen           | some
-noe            | some
-ville          | would
-dere           | you
-som            | who/which/that
-deres          | their/theirs
-kun            | only/just
-ja             | yes
-etter          | after
-ned            | down
-skulle         | should
-denne          | this
-for            | for/because
-deg            | you
-si             | hers/his
-sine           | hers/his
-sitt           | hers/his
-mot            | against
-å              | to
-meget          | much
-hvorfor        | why
-dette          | this
-disse          | these/those
-uten           | without
-hvordan        | how
-ingen          | none
-din            | your
-ditt           | your
-blir           | become
-samme          | same
-hvilken        | which
-hvilke         | which (plural)
-sånn           | such a
-inni           | inside/within
-mellom         | between
-vår            | our
-hver           | each
-hvem           | who
-vors           | us/ours
-hvis           | whose
-både           | both
-bare           | only/just
-enn            | than
-fordi          | as/because
-før            | before
-mange          | many
-også           | also
-slik           | just
-vært           | been
-være           | to be
-båe            | both *
-begge          | both
-siden          | since
-dykk           | your *
-dykkar         | yours *
-dei            | they *
-deira          | them *
-deires         | theirs *
-deim           | them *
-di             | your (fem.) *
-då             | as/when *
-eg             | I *
-ein            | a/an *
-eit            | a/an *
-eitt           | a/an *
-elles          | or *
-honom          | he *
-hjå            | at *
-ho             | she *
-hoe            | she *
-henne          | her
-hennar         | her/hers
-hennes         | hers
-hoss           | how *
-hossen         | how *
-ikkje          | not *
-ingi           | noone *
-inkje          | noone *
-korleis        | how *
-korso          | how *
-kva            | what/which *
-kvar           | where *
-kvarhelst      | where *
-kven           | who/whom *
-kvi            | why *
-kvifor         | why *
-me             | we *
-medan          | while *
-mi             | my *
-mine           | my *
-mykje          | much *
-no             | now *
-nokon          | some (masc./neut.) *
-noka           | some (fem.) *
-nokor          | some *
-noko           | some *
-nokre          | some *
-si             | his/hers *
-sia            | since *
-sidan          | since *
-so             | so *
-somt           | some *
-somme          | some *
-um             | about*
-upp            | up *
-vere           | be *
-vore           | was *
-verte          | become *
-vort           | become *
-varte          | became *
-vart           | became *
-
--- a/src/lib-fts/stopwords_sv.txt	Thu Jun 02 00:52:37 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- |  så = so, but also seed. These are indicated clearly below.
-
-och            | and
-det            | it, this/that
-att            | to (with infinitive)
-i              | in, at
-en             | a
-jag            | I
-hon            | she
-som            | who, that
-han            | he
-på             | on
-den            | it, this/that
-med            | with
-var            | where, each
-sig            | him(self) etc
-för            | for
-så             | so (also: seed)
-till           | to
-är             | is
-men            | but
-ett            | a
-om             | if; around, about
-hade           | had
-de             | they, these/those
-av             | of
-icke           | not, no
-mig            | me
-du             | you
-henne          | her
-då             | then, when
-sin            | his
-nu             | now
-har            | have
-inte           | inte någon = no one
-hans           | his
-honom          | him
-skulle         | 'sake'
-hennes         | her
-där            | there
-min            | my
-man            | one (pronoun)
-ej             | nor
-vid            | at, by, on (also: vast)
-kunde          | could
-något          | some etc
-från           | from, off
-ut             | out
-när            | when
-efter          | after, behind
-upp            | up
-vi             | we
-dem            | them
-vara           | be
-vad            | what
-över           | over
-än             | than
-dig            | you
-kan            | can
-sina           | his
-här            | here
-ha             | have
-mot            | towards
-alla           | all
-under          | under (also: wonder)
-någon          | some etc
-eller          | or (else)
-allt           | all
-mycket         | much
-sedan          | since
-ju             | why
-denna          | this/that
-själv          | myself, yourself etc
-detta          | this/that
-åt             | to
-utan           | without
-varit          | was
-hur            | how
-ingen          | no
-mitt           | my
-ni             | you
-bli            | to be, become
-blev           | from bli
-oss            | us
-din            | thy
-dessa          | these/those
-några          | some etc
-deras          | their
-blir           | from bli
-mina           | my
-samma          | (the) same
-vilken         | who, that
-er             | you, your
-sådan          | such a
-vår            | our
-blivit         | from bli
-dess           | its
-inom           | within
-mellan         | between
-sådant         | such a
-varför         | why
-varje          | each
-vilka          | who, that
-ditt           | thy
-vem            | who
-vilket         | who, that
-sitta          | his
-sådana         | such a
-vart           | each
-dina           | thy
-vars           | whose
-vårt           | our
-våra           | our
-ert            | your
-era            | your
-vilkas         | whose
-