diff --git a/.travis.yml b/.travis.yml
index 4d750d05..d78ff4b0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,4 +24,4 @@ before_script:
   - export TZ=Europe/Berlin
 
 script:
-  - sbt ++$TRAVIS_SCALA_VERSION ";project root ;scalafmtCheckAll ;make ;test"
+  - sbt -J-XX:+UseG1GC ++$TRAVIS_SCALA_VERSION ";project root ;scalafmtCheckAll ;make ;test"
diff --git a/docker/joex-base.dockerfile b/docker/joex-base.dockerfile
index 0baa1973..8ebad224 100644
--- a/docker/joex-base.dockerfile
+++ b/docker/joex-base.dockerfile
@@ -15,6 +15,7 @@ RUN apk add --no-cache openjdk11-jre \
     tesseract-ocr \
     tesseract-ocr-data-deu \
     tesseract-ocr-data-fra \
+    tesseract-ocr-data-ita \
     unpaper \
     wkhtmltopdf \
     libreoffice \
diff --git a/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala b/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala
new file mode 100644
index 00000000..a1b426e5
--- /dev/null
+++ b/modules/analysis/src/main/scala/docspell/analysis/NlpSettings.scala
@@ -0,0 +1,7 @@
+package docspell.analysis
+
+import java.nio.file.Path
+
+import docspell.common._
+
+case class NlpSettings(lang: Language, highRecall: Boolean, regexNer: Option[Path])
diff --git a/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala b/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala
index a9234027..c2deafce 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala
@@ -10,13 +10,13 @@ import docspell.analysis.date.DateFind
 import docspell.analysis.nlp._
 import docspell.common._
 
-import edu.stanford.nlp.pipeline.StanfordCoreNLP
+import org.log4s.getLogger
 
 trait TextAnalyser[F[_]] {
 
   def annotate(
       logger: Logger[F],
-      settings: StanfordNerSettings,
+      settings: NlpSettings,
       cacheKey: Ident,
       text: String
   ): F[TextAnalyser.Result]
@@ -24,6 +24,7 @@ trait TextAnalyser[F[_]] {
   def classifier: TextClassifier[F]
 }
 object TextAnalyser {
+  private[this] val logger = getLogger
 
   case class Result(labels: Vector[NerLabel], dates: Vector[NerDateLabel]) {
 
@@ -41,13 +42,13 @@ object TextAnalyser {
         new TextAnalyser[F] {
           def annotate(
               logger: Logger[F],
-              settings: StanfordNerSettings,
+              settings: NlpSettings,
               cacheKey: Ident,
               text: String
           ): F[TextAnalyser.Result] =
             for {
               input <- textLimit(logger, text)
-              tags0 <- stanfordNer(Nlp.Input(cacheKey, settings, input))
+              tags0 <- stanfordNer(Nlp.Input(cacheKey, settings, logger, input))
               tags1 <- contactNer(input)
               dates <- dateNer(settings.lang, input)
               list  = tags0 ++ tags1
@@ -77,31 +78,36 @@ object TextAnalyser {
         }
       )
 
+  /** Provides the nlp pipeline based on the configuration. */
   private object Nlp {
-
     def apply[F[_]: Concurrent: Timer: BracketThrow](
         cfg: TextAnalysisConfig.NlpConfig
-    ): F[Input => F[Vector[NerLabel]]] =
+    ): F[Input[F] => F[Vector[NerLabel]]] =
       cfg.mode match {
-        case NlpMode.Full =>
-          PipelineCache.full(cfg.clearInterval).map(cache => full(cache))
-        case NlpMode.Basic =>
-          PipelineCache.basic(cfg.clearInterval).map(cache => basic(cache))
         case NlpMode.Disabled =>
-          Applicative[F].pure(_ => Vector.empty[NerLabel].pure[F])
+          Logger.log4s(logger).info("NLP is disabled as defined in config.") *>
+            Applicative[F].pure(_ => Vector.empty[NerLabel].pure[F])
+        case _ =>
+          PipelineCache(cfg.clearInterval)(
+            Annotator[F](cfg.mode),
+            Annotator.clearCaches[F]
+          )
+            .map(annotate[F])
       }
 
-    final case class Input(key: Ident, settings: StanfordNerSettings, text: String)
+    final case class Input[F[_]](
+        key: Ident,
+        settings: NlpSettings,
+        logger: Logger[F],
+        text: String
+    )
 
-    def full[F[_]: BracketThrow](
-        cache: PipelineCache[F, StanfordCoreNLP]
-    )(input: Input): F[Vector[NerLabel]] =
-      StanfordNerAnnotator.nerAnnotate(input.key.id, cache)(input.settings, input.text)
-
-    def basic[F[_]: BracketThrow](
-        cache: PipelineCache[F, BasicCRFAnnotator.Annotator]
-    )(input: Input): F[Vector[NerLabel]] =
-      BasicCRFAnnotator.nerAnnotate(input.key.id, cache)(input.settings, input.text)
+    def annotate[F[_]: BracketThrow](
+        cache: PipelineCache[F]
+    )(input: Input[F]): F[Vector[NerLabel]] =
+      cache
+        .obtain(input.key.id, input.settings)
+        .use(ann => ann.nerAnnotate(input.logger)(input.text))
 
   }
 }
diff --git a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala
index 90fcd8cd..5feb8b57 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala
@@ -41,23 +41,30 @@ object DateFind {
   }
 
   object SimpleDate {
-    val p0 = (readYear >> readMonth >> readDay).map { case ((y, m), d) =>
-      List(SimpleDate(y, m, d))
+    def pattern0(lang: Language) = (readYear >> readMonth(lang) >> readDay).map {
+      case ((y, m), d) =>
+        List(SimpleDate(y, m, d))
     }
-    val p1 = (readDay >> readMonth >> readYear).map { case ((d, m), y) =>
-      List(SimpleDate(y, m, d))
+    def pattern1(lang: Language) = (readDay >> readMonth(lang) >> readYear).map {
+      case ((d, m), y) =>
+        List(SimpleDate(y, m, d))
     }
-    val p2 = (readMonth >> readDay >> readYear).map { case ((m, d), y) =>
-      List(SimpleDate(y, m, d))
+    def pattern2(lang: Language) = (readMonth(lang) >> readDay >> readYear).map {
+      case ((m, d), y) =>
+        List(SimpleDate(y, m, d))
     }
 
     // ymd ✔, ydm, dmy ✔, dym, myd, mdy ✔
     def fromParts(parts: List[Word], lang: Language): List[SimpleDate] = {
+      val p0 = pattern0(lang)
+      val p1 = pattern1(lang)
+      val p2 = pattern2(lang)
       val p = lang match {
         case Language.English =>
           p2.alt(p1).map(t => t._1 ++ t._2).or(p2).or(p0).or(p1)
-        case Language.German => p1.or(p0).or(p2)
-        case Language.French => p1.or(p0).or(p2)
+        case Language.German  => p1.or(p0).or(p2)
+        case Language.French  => p1.or(p0).or(p2)
+        case Language.Italian => p1.or(p0).or(p2)
       }
       p.read(parts) match {
         case Result.Success(sds, _) =>
@@ -76,9 +83,11 @@ object DateFind {
         }
       )
 
-    def readMonth: Reader[Int] =
+    def readMonth(lang: Language): Reader[Int] =
       Reader.readFirst(w =>
-        Some(months.indexWhere(_.contains(w.value))).filter(_ >= 0).map(_ + 1)
+        Some(MonthName.getAll(lang).indexWhere(_.contains(w.value)))
+          .filter(_ >= 0)
+          .map(_ + 1)
       )
 
     def readDay: Reader[Int] =
@@ -150,20 +159,5 @@ object DateFind {
             Failure
         }
     }
-
-    private val months = List(
-      List("jan", "january", "januar", "01"),
-      List("feb", "february", "februar", "02"),
-      List("mar", "march", "märz", "marz", "03"),
-      List("apr", "april", "04"),
-      List("may", "mai", "05"),
-      List("jun", "june", "juni", "06"),
-      List("jul", "july", "juli", "07"),
-      List("aug", "august", "08"),
-      List("sep", "september", "09"),
-      List("oct", "october", "oktober", "10"),
-      List("nov", "november", "11"),
-      List("dec", "december", "dezember", "12")
-    )
   }
 }
diff --git a/modules/analysis/src/main/scala/docspell/analysis/date/MonthName.scala b/modules/analysis/src/main/scala/docspell/analysis/date/MonthName.scala
new file mode 100644
index 00000000..cf61cd72
--- /dev/null
+++ b/modules/analysis/src/main/scala/docspell/analysis/date/MonthName.scala
@@ -0,0 +1,101 @@
+package docspell.analysis.date
+
+import docspell.common.Language
+
+object MonthName {
+
+  def getAll(lang: Language): List[List[String]] =
+    merge(numbers, forLang(lang))
+
+  private def merge(n0: List[List[String]], ns: List[List[String]]*): List[List[String]] =
+    ns.foldLeft(n0) { (res, el) =>
+      res.zip(el).map({ case (a, b) => a ++ b })
+    }
+
+  private def forLang(lang: Language): List[List[String]] =
+    lang match {
+      case Language.English =>
+        english
+      case Language.German =>
+        german
+      case Language.French =>
+        french
+      case Language.Italian =>
+        italian
+    }
+
+  private val numbers = List(
+    List("01"),
+    List("02"),
+    List("03"),
+    List("04"),
+    List("05"),
+    List("06"),
+    List("07"),
+    List("08"),
+    List("09"),
+    List("10"),
+    List("11"),
+    List("12")
+  )
+
+  private val english = List(
+    List("jan", "january"),
+    List("feb", "february"),
+    List("mar", "march"),
+    List("apr", "april"),
+    List("may"),
+    List("jun", "june"),
+    List("jul", "july"),
+    List("aug", "august"),
+    List("sept", "september"),
+    List("oct", "october"),
+    List("nov", "november"),
+    List("dec", "december")
+  )
+
+  private val german = List(
+    List("jan", "januar"),
+    List("feb", "februar"),
+    List("märz"),
+    List("apr", "april"),
+    List("mai"),
+    List("juni"),
+    List("juli"),
+    List("aug", "august"),
+    List("sept", "september"),
+    List("okt", "oktober"),
+    List("nov", "november"),
+    List("dez", "dezember")
+  )
+
+  private val french = List(
+    List("janv", "janvier"),
+    List("févr", "fevr", "février", "fevrier"),
+    List("mars"),
+    List("avril"),
+    List("mai"),
+    List("juin"),
+    List("juil", "juillet"),
+    List("aout", "août"),
+    List("sept", "septembre"),
+    List("oct", "octobre"),
+    List("nov", "novembre"),
+    List("dec", "déc", "décembre", "decembre")
+  )
+
+  private val italian = List(
+    List("genn", "gennaio"),
+    List("febbr", "febbraio"),
+    List("mar", "marzo"),
+    List("apr", "aprile"),
+    List("magg", "maggio"),
+    List("giugno"),
+    List("luglio"),
+    List("ag", "agosto"),
+    List("sett", "settembre"),
+    List("ott", "ottobre"),
+    List("nov", "novembre"),
+    List("dic", "dicembre")
+  )
+}
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/Annotator.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/Annotator.scala
new file mode 100644
index 00000000..d509805a
--- /dev/null
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/Annotator.scala
@@ -0,0 +1,98 @@
+package docspell.analysis.nlp
+
+import cats.effect.Sync
+import cats.implicits._
+import cats.{Applicative, FlatMap}
+
+import docspell.analysis.NlpSettings
+import docspell.common._
+
+import edu.stanford.nlp.pipeline.StanfordCoreNLP
+
+/** Analyses a text to mark certain parts with a `NerLabel`. */
+trait Annotator[F[_]] { self =>
+  def nerAnnotate(logger: Logger[F])(text: String): F[Vector[NerLabel]]
+
+  def ++(next: Annotator[F])(implicit F: FlatMap[F]): Annotator[F] =
+    new Annotator[F] {
+      def nerAnnotate(logger: Logger[F])(text: String): F[Vector[NerLabel]] =
+        for {
+          n0 <- self.nerAnnotate(logger)(text)
+          n1 <- next.nerAnnotate(logger)(text)
+        } yield (n0 ++ n1).distinct
+    }
+}
+
+object Annotator {
+
+  /** Creates an annotator according to the given `mode` and `settings`.
+    *
+    * There are the following ways:
+    *
+    * - disabled: it returns a no-op annotator that always gives an empty list
+    * - full: the complete stanford pipeline is used
+    * - basic: only the ner classifier is used
+    *
+    * Additionally, if there is a regexNer-file specified, the regexner annotator is
+    * also run. In case the full pipeline is used, this is already included.
+    */
+  def apply[F[_]: Sync](mode: NlpMode)(settings: NlpSettings): Annotator[F] =
+    mode match {
+      case NlpMode.Disabled =>
+        Annotator.none[F]
+      case NlpMode.Full =>
+        StanfordNerSettings.fromNlpSettings(settings) match {
+          case Some(ss) =>
+            Annotator.pipeline(StanfordNerAnnotator.makePipeline(ss))
+          case None =>
+            Annotator.none[F]
+        }
+      case NlpMode.Basic =>
+        StanfordNerSettings.fromNlpSettings(settings) match {
+          case Some(StanfordNerSettings.Full(lang, _, Some(file))) =>
+            Annotator.basic(BasicCRFAnnotator.Cache.getAnnotator(lang)) ++
+              Annotator.pipeline(StanfordNerAnnotator.regexNerPipeline(file))
+          case Some(StanfordNerSettings.Full(lang, _, None)) =>
+            Annotator.basic(BasicCRFAnnotator.Cache.getAnnotator(lang))
+          case Some(StanfordNerSettings.RegexOnly(file)) =>
+            Annotator.pipeline(StanfordNerAnnotator.regexNerPipeline(file))
+          case None =>
+            Annotator.none[F]
+        }
+      case NlpMode.RegexOnly =>
+        settings.regexNer match {
+          case Some(file) =>
+            Annotator.pipeline(StanfordNerAnnotator.regexNerPipeline(file))
+          case None =>
+            Annotator.none[F]
+        }
+    }
+
+  def none[F[_]: Applicative]: Annotator[F] =
+    new Annotator[F] {
+      def nerAnnotate(logger: Logger[F])(text: String): F[Vector[NerLabel]] =
+        logger.debug("Running empty annotator. NLP not supported.") *>
+          Vector.empty[NerLabel].pure[F]
+    }
+
+  def basic[F[_]: Sync](ann: BasicCRFAnnotator.Annotator): Annotator[F] =
+    new Annotator[F] {
+      def nerAnnotate(logger: Logger[F])(text: String): F[Vector[NerLabel]] =
+        Sync[F].delay(
+          BasicCRFAnnotator.nerAnnotate(ann)(text)
+        )
+    }
+
+  def pipeline[F[_]: Sync](cp: StanfordCoreNLP): Annotator[F] =
+    new Annotator[F] {
+      def nerAnnotate(logger: Logger[F])(text: String): F[Vector[NerLabel]] =
+        Sync[F].delay(StanfordNerAnnotator.nerAnnotate(cp, text))
+
+    }
+
+  def clearCaches[F[_]: Sync]: F[Unit] =
+    Sync[F].delay {
+      StanfordCoreNLP.clearAnnotatorPool()
+      BasicCRFAnnotator.Cache.clearCache()
+    }
+}
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala
index a6fb6af0..76ffe7c6 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala
@@ -7,9 +7,7 @@ import java.util.zip.GZIPInputStream
 import scala.jdk.CollectionConverters._
 import scala.util.Using
 
-import cats.Applicative
-import cats.effect.BracketThrow
-
+import docspell.common.Language.NLPLanguage
 import docspell.common._
 
 import edu.stanford.nlp.ie.AbstractSequenceClassifier
@@ -30,14 +28,6 @@ object BasicCRFAnnotator {
 
   type Annotator = AbstractSequenceClassifier[CoreLabel]
 
-  def nerAnnotate[F[_]: BracketThrow](
-      cacheKey: String,
-      cache: PipelineCache[F, Annotator]
-  )(settings: StanfordNerSettings, text: String): F[Vector[NerLabel]] =
-    cache
-      .obtain(cacheKey, settings)
-      .use(crf => Applicative[F].pure(nerAnnotate(crf)(text)))
-
   def nerAnnotate(nerClassifier: Annotator)(text: String): Vector[NerLabel] =
     nerClassifier
       .classify(text)
@@ -52,7 +42,7 @@ object BasicCRFAnnotator {
       })
       .toVector
 
-  private def makeClassifier(lang: Language): Annotator = {
+  def makeAnnotator(lang: NLPLanguage): Annotator = {
     logger.info(s"Creating ${lang.name} Stanford NLP NER-only classifier...")
     val ner = classifierResource(lang)
     Using(new GZIPInputStream(ner.openStream())) { in =>
@@ -60,7 +50,7 @@ object BasicCRFAnnotator {
     }.fold(throw _, identity)
   }
 
-  private def classifierResource(lang: Language): URL = {
+  private def classifierResource(lang: NLPLanguage): URL = {
     def check(name: String): URL =
       Option(getClass.getResource(name)) match {
         case None =>
@@ -79,11 +69,11 @@ object BasicCRFAnnotator {
   }
 
   final class Cache {
-    private[this] lazy val germanNerClassifier  = makeClassifier(Language.German)
-    private[this] lazy val englishNerClassifier = makeClassifier(Language.English)
-    private[this] lazy val frenchNerClassifier  = makeClassifier(Language.French)
+    private[this] lazy val germanNerClassifier  = makeAnnotator(Language.German)
+    private[this] lazy val englishNerClassifier = makeAnnotator(Language.English)
+    private[this] lazy val frenchNerClassifier  = makeAnnotator(Language.French)
 
-    def forLang(language: Language): Annotator =
+    def forLang(language: NLPLanguage): Annotator =
       language match {
         case Language.French  => frenchNerClassifier
         case Language.German  => germanNerClassifier
@@ -95,7 +85,7 @@ object BasicCRFAnnotator {
 
     private[this] val cacheRef = new AtomicReference[Cache](new Cache)
 
-    def getAnnotator(language: Language): Annotator =
+    def getAnnotator(language: NLPLanguage): Annotator =
       cacheRef.get().forLang(language)
 
     def clearCache(): Unit =
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala
index 2b567548..3b38da22 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala
@@ -3,14 +3,13 @@ package docspell.analysis.nlp
 import scala.concurrent.duration.{Duration => _, _}
 
 import cats.Applicative
-import cats.data.Kleisli
 import cats.effect._
 import cats.effect.concurrent.Ref
 import cats.implicits._
 
+import docspell.analysis.NlpSettings
 import docspell.common._
 
-import edu.stanford.nlp.pipeline.StanfordCoreNLP
 import org.log4s.getLogger
 
 /** Creating the StanfordCoreNLP pipeline is quite expensive as it
@@ -20,58 +19,32 @@ import org.log4s.getLogger
   *
   * **This is an internal API**
   */
-trait PipelineCache[F[_], A] {
+trait PipelineCache[F[_]] {
 
-  def obtain(key: String, settings: StanfordNerSettings): Resource[F, A]
+  def obtain(key: String, settings: NlpSettings): Resource[F, Annotator[F]]
 
 }
 
 object PipelineCache {
   private[this] val logger = getLogger
 
-  def none[F[_]: Applicative, A](
-      creator: Kleisli[F, StanfordNerSettings, A]
-  ): PipelineCache[F, A] =
-    new PipelineCache[F, A] {
-      def obtain(
-          ignored: String,
-          settings: StanfordNerSettings
-      ): Resource[F, A] =
-        Resource.liftF(creator.run(settings))
-    }
-
-  def apply[F[_]: Concurrent: Timer, A](clearInterval: Duration)(
-      creator: StanfordNerSettings => A,
+  def apply[F[_]: Concurrent: Timer](clearInterval: Duration)(
+      creator: NlpSettings => Annotator[F],
       release: F[Unit]
-  ): F[PipelineCache[F, A]] =
+  ): F[PipelineCache[F]] =
     for {
-      data       <- Ref.of(Map.empty[String, Entry[A]])
+      data       <- Ref.of(Map.empty[String, Entry[Annotator[F]]])
       cacheClear <- CacheClearing.create(data, clearInterval, release)
-    } yield new Impl[F, A](data, creator, cacheClear)
+      _          <- Logger.log4s(logger).info("Creating nlp pipeline cache")
+    } yield new Impl[F](data, creator, cacheClear)
 
-  def full[F[_]: Concurrent: Timer](
-      clearInterval: Duration
-  ): F[PipelineCache[F, StanfordCoreNLP]] =
-    apply(clearInterval)(
-      StanfordNerAnnotator.makePipeline,
-      StanfordNerAnnotator.clearPipelineCaches
-    )
-
-  def basic[F[_]: Concurrent: Timer](
-      clearInterval: Duration
-  ): F[PipelineCache[F, BasicCRFAnnotator.Annotator]] =
-    apply(clearInterval)(
-      settings => BasicCRFAnnotator.Cache.getAnnotator(settings.lang),
-      Sync[F].delay(BasicCRFAnnotator.Cache.clearCache())
-    )
-
-  final private class Impl[F[_]: Sync, A](
-      data: Ref[F, Map[String, Entry[A]]],
-      creator: StanfordNerSettings => A,
+  final private class Impl[F[_]: Sync](
+      data: Ref[F, Map[String, Entry[Annotator[F]]]],
+      creator: NlpSettings => Annotator[F],
       cacheClear: CacheClearing[F]
-  ) extends PipelineCache[F, A] {
+  ) extends PipelineCache[F] {
 
-    def obtain(key: String, settings: StanfordNerSettings): Resource[F, A] =
+    def obtain(key: String, settings: NlpSettings): Resource[F, Annotator[F]] =
       for {
         _  <- cacheClear.withCache
         id <- Resource.liftF(makeSettingsId(settings))
@@ -83,10 +56,10 @@ object PipelineCache {
     private def getOrCreate(
         key: String,
         id: String,
-        cache: Map[String, Entry[A]],
-        settings: StanfordNerSettings,
-        creator: StanfordNerSettings => A
-    ): (Map[String, Entry[A]], A) =
+        cache: Map[String, Entry[Annotator[F]]],
+        settings: NlpSettings,
+        creator: NlpSettings => Annotator[F]
+    ): (Map[String, Entry[Annotator[F]]], Annotator[F]) =
       cache.get(key) match {
         case Some(entry) =>
           if (entry.id == id) (cache, entry.value)
@@ -105,7 +78,7 @@ object PipelineCache {
           (cache.updated(key, e), nlp)
       }
 
-    private def makeSettingsId(settings: StanfordNerSettings): F[String] = {
+    private def makeSettingsId(settings: NlpSettings): F[String] = {
       val base = settings.copy(regexNer = None).toString
       val size: F[Long] =
         settings.regexNer match {
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala
index 46a614d1..75fe9d36 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala
@@ -1,9 +1,11 @@
 package docspell.analysis.nlp
 
+import java.nio.file.Path
 import java.util.{Properties => JProps}
 
 import docspell.analysis.nlp.Properties.Implicits._
 import docspell.common._
+import docspell.common.syntax.FileSyntax._
 
 object Properties {
 
@@ -17,18 +19,21 @@ object Properties {
     p
   }
 
-  def forSettings(settings: StanfordNerSettings): JProps = {
-    val regexNerFile = settings.regexNer
-      .map(p => p.normalize().toAbsolutePath().toString())
-    settings.lang match {
-      case Language.German =>
-        Properties.nerGerman(regexNerFile, settings.highRecall)
-      case Language.English =>
-        Properties.nerEnglish(regexNerFile)
-      case Language.French =>
-        Properties.nerFrench(regexNerFile, settings.highRecall)
+  def forSettings(settings: StanfordNerSettings): JProps =
+    settings match {
+      case StanfordNerSettings.Full(lang, highRecall, regexNer) =>
+        val regexNerFile = regexNer.map(p => p.absolutePathAsString)
+        lang match {
+          case Language.German =>
+            Properties.nerGerman(regexNerFile, highRecall)
+          case Language.English =>
+            Properties.nerEnglish(regexNerFile)
+          case Language.French =>
+            Properties.nerFrench(regexNerFile, highRecall)
+        }
+      case StanfordNerSettings.RegexOnly(path) =>
+        Properties.regexNerOnly(path)
     }
-  }
 
   def nerGerman(regexNerMappingFile: Option[String], highRecall: Boolean): JProps =
     Properties(
@@ -76,6 +81,11 @@ object Properties {
       "ner.model"                   -> "edu/stanford/nlp/models/ner/french-wikiner-4class.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"
     ).withRegexNer(regexNerMappingFile).withHighRecall(highRecall)
 
+  def regexNerOnly(regexNerMappingFile: Path): JProps =
+    Properties(
+      "annotators" -> "tokenize,ssplit"
+    ).withRegexNer(Some(regexNerMappingFile.absolutePathAsString))
+
   object Implicits {
     implicit final class JPropsOps(val p: JProps) extends AnyVal {
 
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala
index 37b54b40..2ec4e802 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerAnnotator.scala
@@ -1,8 +1,9 @@
 package docspell.analysis.nlp
 
+import java.nio.file.Path
+
 import scala.jdk.CollectionConverters._
 
-import cats.Applicative
 import cats.effect._
 
 import docspell.common._
@@ -24,24 +25,24 @@ object StanfordNerAnnotator {
     * a new classifier must be created. It will then replace the
     * previous one.
     */
-  def nerAnnotate[F[_]: BracketThrow](
-      cacheKey: String,
-      cache: PipelineCache[F, StanfordCoreNLP]
-  )(settings: StanfordNerSettings, text: String): F[Vector[NerLabel]] =
-    cache
-      .obtain(cacheKey, settings)
-      .use(crf => Applicative[F].pure(nerAnnotate(crf, text)))
-
   def nerAnnotate(nerClassifier: StanfordCoreNLP, text: String): Vector[NerLabel] = {
     val doc = new CoreDocument(text)
     nerClassifier.annotate(doc)
     doc.tokens().asScala.collect(Function.unlift(LabelConverter.toNerLabel)).toVector
   }
 
-  def makePipeline(settings: StanfordNerSettings): StanfordCoreNLP = {
-    logger.info(s"Creating ${settings.lang.name} Stanford NLP NER classifier...")
-    new StanfordCoreNLP(Properties.forSettings(settings))
-  }
+  def makePipeline(settings: StanfordNerSettings): StanfordCoreNLP =
+    settings match {
+      case s: StanfordNerSettings.Full =>
+        logger.info(s"Creating ${s.lang.name} Stanford NLP NER classifier...")
+        new StanfordCoreNLP(Properties.forSettings(settings))
+      case StanfordNerSettings.RegexOnly(path) =>
+        logger.info(s"Creating regexNer-only Stanford NLP NER classifier...")
+        regexNerPipeline(path)
+    }
+
+  def regexNerPipeline(regexNerFile: Path): StanfordCoreNLP =
+    new StanfordCoreNLP(Properties.regexNerOnly(regexNerFile))
 
   def clearPipelineCaches[F[_]: Sync]: F[Unit] =
     Sync[F].delay {
diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala
index 06136a18..fd0a7ecd 100644
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordNerSettings.scala
@@ -2,25 +2,41 @@ package docspell.analysis.nlp
 
 import java.nio.file.Path
 
-import docspell.common._
+import docspell.analysis.NlpSettings
+import docspell.common.Language.NLPLanguage
 
-/** Settings for configuring the stanford NER pipeline.
-  *
-  * The language is mandatory, only the provided ones are supported.
-  * The `highRecall` only applies for non-English languages. For
-  * non-English languages the english classifier is run as second
-  * classifier and if `highRecall` is true, then it will be used to
-  * tag untagged tokens. This may lead to a lot of false positives,
-  * but since English is omnipresent in other languages, too it
-  * depends on the use case for whether this is useful or not.
-  *
-  * The `regexNer` allows to specify a text file as described here:
-  * https://nlp.stanford.edu/software/regexner.html. This will be used
-  * as a last step to tag untagged tokens using the provided list of
-  * regexps.
-  */
-case class StanfordNerSettings(
-    lang: Language,
-    highRecall: Boolean,
-    regexNer: Option[Path]
-)
+sealed trait StanfordNerSettings
+
+object StanfordNerSettings {
+
+  /** Settings for configuring the stanford NER pipeline.
+    *
+    * The language is mandatory, only the provided ones are supported.
+    * The `highRecall` only applies for non-English languages. For
+    * non-English languages the english classifier is run as second
+    * classifier and if `highRecall` is true, then it will be used to
+    * tag untagged tokens. This may lead to a lot of false positives,
+    * but since English is omnipresent in other languages, too it
+    * depends on the use case for whether this is useful or not.
+    *
+    * The `regexNer` allows to specify a text file as described here:
+    * https://nlp.stanford.edu/software/regexner.html. This will be used
+    * as a last step to tag untagged tokens using the provided list of
+    * regexps.
+    */
+  case class Full(
+      lang: NLPLanguage,
+      highRecall: Boolean,
+      regexNer: Option[Path]
+  ) extends StanfordNerSettings
+
+  /** Not all languages are supported with predefined statistical models. This allows to provide regexps only.
+    */
+  case class RegexOnly(regexNerFile: Path) extends StanfordNerSettings
+
+  def fromNlpSettings(ns: NlpSettings): Option[StanfordNerSettings] =
+    NLPLanguage.all
+      .find(nl => nl == ns.lang)
+      .map(nl => Full(nl, ns.highRecall, ns.regexNer))
+      .orElse(ns.regexNer.map(nrf => RegexOnly(nrf)))
+}
diff --git a/modules/analysis/src/test/scala/docspell/analysis/nlp/BaseCRFAnnotatorSuite.scala b/modules/analysis/src/test/scala/docspell/analysis/nlp/BaseCRFAnnotatorSuite.scala
index 0abab7e9..2f0cab57 100644
--- a/modules/analysis/src/test/scala/docspell/analysis/nlp/BaseCRFAnnotatorSuite.scala
+++ b/modules/analysis/src/test/scala/docspell/analysis/nlp/BaseCRFAnnotatorSuite.scala
@@ -1,12 +1,13 @@
 package docspell.analysis.nlp
 
+import docspell.common.Language.NLPLanguage
 import minitest.SimpleTestSuite
 import docspell.files.TestFiles
 import docspell.common._
 
 object BaseCRFAnnotatorSuite extends SimpleTestSuite {
 
-  def annotate(language: Language): String => Vector[NerLabel] =
+  def annotate(language: NLPLanguage): String => Vector[NerLabel] =
     BasicCRFAnnotator.nerAnnotate(BasicCRFAnnotator.Cache.getAnnotator(language))
 
   test("find english ner labels") {
diff --git a/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala b/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala
index 1704ef1b..416cdff7 100644
--- a/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala
+++ b/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala
@@ -1,8 +1,12 @@
 package docspell.analysis.nlp
 
+import java.nio.file.Paths
+
+import cats.effect.IO
 import minitest.SimpleTestSuite
 import docspell.files.TestFiles
 import docspell.common._
+import docspell.common.syntax.FileSyntax._
 import edu.stanford.nlp.pipeline.StanfordCoreNLP
 
 object StanfordNerAnnotatorSuite extends SimpleTestSuite {
@@ -68,4 +72,36 @@ object StanfordNerAnnotatorSuite extends SimpleTestSuite {
     assertEquals(labels, expect)
     StanfordCoreNLP.clearAnnotatorPool()
   }
+
+  test("regexner-only annotator") {
+    val regexNerContent =
+      s"""(?i)volantino ag${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
+      |(?i)volantino${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
+      |(?i)ag${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
+      |(?i)andrea rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+      |(?i)andrea${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+      |(?i)rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+      |""".stripMargin
+
+    File
+      .withTempDir[IO](Paths.get("target"), "test-regex-ner")
+      .use { dir =>
+        for {
+          out <- File.writeString[IO](dir / "regex.txt", regexNerContent)
+          ann    = StanfordNerAnnotator.makePipeline(StanfordNerSettings.RegexOnly(out))
+          labels = StanfordNerAnnotator.nerAnnotate(ann, "Hello Andrea Rossi, can you.")
+          _ <- IO(
+            assertEquals(
+              labels,
+              Vector(
+                NerLabel("Andrea", NerTag.Person, 6, 12),
+                NerLabel("Rossi", NerTag.Person, 13, 18)
+              )
+            )
+          )
+        } yield ()
+      }
+      .unsafeRunSync()
+    StanfordCoreNLP.clearAnnotatorPool()
+  }
 }
diff --git a/modules/common/src/main/scala/docspell/common/Language.scala b/modules/common/src/main/scala/docspell/common/Language.scala
index 92c32f4b..f18d4adf 100644
--- a/modules/common/src/main/scala/docspell/common/Language.scala
+++ b/modules/common/src/main/scala/docspell/common/Language.scala
@@ -1,5 +1,7 @@
 package docspell.common
 
+import cats.data.NonEmptyList
+
 import io.circe.{Decoder, Encoder}
 
 sealed trait Language { self: Product =>
@@ -11,28 +13,41 @@ sealed trait Language { self: Product =>
 
   def iso3: String
 
+  val allowsNLP: Boolean = false
+
   private[common] def allNames =
     Set(name, iso3, iso2)
 }
 
 object Language {
+  sealed trait NLPLanguage extends Language with Product {
+    override val allowsNLP = true
+  }
+  object NLPLanguage {
+    val all: NonEmptyList[NLPLanguage] = NonEmptyList.of(German, English, French)
+  }
 
-  case object German extends Language {
+  case object German extends NLPLanguage {
     val iso2 = "de"
     val iso3 = "deu"
   }
 
-  case object English extends Language {
+  case object English extends NLPLanguage {
     val iso2 = "en"
     val iso3 = "eng"
   }
 
-  case object French extends Language {
+  case object French extends NLPLanguage {
     val iso2 = "fr"
     val iso3 = "fra"
   }
 
-  val all: List[Language] = List(German, English, French)
+  case object Italian extends Language {
+    val iso2 = "it"
+    val iso3 = "ita"
+  }
+
+  val all: List[Language] = List(German, English, French, Italian)
 
   def fromString(str: String): Either[String, Language] = {
     val lang = str.toLowerCase
diff --git a/modules/common/src/main/scala/docspell/common/NlpMode.scala b/modules/common/src/main/scala/docspell/common/NlpMode.scala
index 36ebf7db..013b2275 100644
--- a/modules/common/src/main/scala/docspell/common/NlpMode.scala
+++ b/modules/common/src/main/scala/docspell/common/NlpMode.scala
@@ -6,16 +6,18 @@ sealed trait NlpMode { self: Product =>
     self.productPrefix
 }
 object NlpMode {
-  case object Full     extends NlpMode
-  case object Basic    extends NlpMode
-  case object Disabled extends NlpMode
+  case object Full      extends NlpMode
+  case object Basic     extends NlpMode
+  case object RegexOnly extends NlpMode
+  case object Disabled  extends NlpMode
 
   def fromString(name: String): Either[String, NlpMode] =
     name.toLowerCase match {
-      case "full"     => Right(Full)
-      case "basic"    => Right(Basic)
-      case "disabled" => Right(Disabled)
-      case _          => Left(s"Unknown nlp-mode: $name")
+      case "full"      => Right(Full)
+      case "basic"     => Right(Basic)
+      case "regexonly" => Right(RegexOnly)
+      case "disabled"  => Right(Disabled)
+      case _           => Left(s"Unknown nlp-mode: $name")
     }
 
   def unsafeFromString(name: String): NlpMode =
diff --git a/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala b/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala
new file mode 100644
index 00000000..6eef143b
--- /dev/null
+++ b/modules/common/src/main/scala/docspell/common/syntax/FileSyntax.scala
@@ -0,0 +1,20 @@
+package docspell.common.syntax
+
+import java.nio.file.Path
+
+trait FileSyntax {
+
+  implicit final class PathOps(p: Path) {
+
+    def absolutePath: Path =
+      p.normalize().toAbsolutePath
+
+    def absolutePathAsString: String =
+      absolutePath.toString
+
+    def /(next: String): Path =
+      p.resolve(next)
+  }
+}
+
+object FileSyntax extends FileSyntax
diff --git a/modules/common/src/main/scala/docspell/common/syntax/package.scala b/modules/common/src/main/scala/docspell/common/syntax/package.scala
index 77e17039..8d512741 100644
--- a/modules/common/src/main/scala/docspell/common/syntax/package.scala
+++ b/modules/common/src/main/scala/docspell/common/syntax/package.scala
@@ -2,6 +2,11 @@ package docspell.common
 
 package object syntax {
 
-  object all extends EitherSyntax with StreamSyntax with StringSyntax with LoggerSyntax
+  object all
+      extends EitherSyntax
+      with StreamSyntax
+      with StringSyntax
+      with LoggerSyntax
+      with FileSyntax
 
 }
diff --git a/modules/files/src/test/resources/examples/letter-ita.txt b/modules/files/src/test/resources/examples/letter-ita.txt
new file mode 100644
index 00000000..cca09122
--- /dev/null
+++ b/modules/files/src/test/resources/examples/letter-ita.txt
@@ -0,0 +1,13 @@
+Pontremoli, 9 aprile 2013
+
+Spettabile Villa Albicocca
+Via Francigena, 9
+55100 Pontetetto (LU)
+
+Oggetto: Prenotazione
+
+Gentile Direttore,
+
+Vorrei prenotare una camera matrimoniale …….
+
+In attesa di una Sua pronta risposta, La saluto cordialmente
diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
index 2306a44d..345f4665 100644
--- a/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
+++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
@@ -24,6 +24,7 @@ object Field {
   val content_de     = Field("content_de")
   val content_en     = Field("content_en")
   val content_fr     = Field("content_fr")
+  val content_it     = Field("content_it")
   val itemName       = Field("itemName")
   val itemNotes      = Field("itemNotes")
   val folderId       = Field("folder")
@@ -36,6 +37,8 @@ object Field {
         Field.content_en
       case Language.French =>
         Field.content_fr
+      case Language.Italian =>
+        Field.content_it
     }
 
   implicit val jsonEncoder: Encoder[Field] =
diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala
index 1e3b09b3..0b7e6e31 100644
--- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala
+++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala
@@ -40,6 +40,7 @@ object SolrQuery {
             Field.content_de,
             Field.content_en,
             Field.content_fr,
+            Field.content_it,
             Field.itemName,
             Field.itemNotes,
             Field.attachmentName
diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
index 3deba577..769919bd 100644
--- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
+++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
@@ -63,6 +63,12 @@ object SolrSetup {
             solrEngine,
             "Index all from database",
             FtsMigration.Result.indexAll.pure[F]
+          ),
+          FtsMigration[F](
+            7,
+            solrEngine,
+            "Add content_it field",
+            addContentItField.map(_ => FtsMigration.Result.reIndexAll)
           )
         )
 
@@ -72,6 +78,9 @@ object SolrSetup {
       def addContentFrField: F[Unit] =
         addTextField(Some(Language.French))(Field.content_fr)
 
+      def addContentItField: F[Unit] =
+        addTextField(Some(Language.Italian))(Field.content_it)
+
       def setupCoreSchema: F[Unit] = {
         val cmds0 =
           List(
@@ -90,13 +99,15 @@ object SolrSetup {
         )
           .traverse(addTextField(None))
 
-        val cntLang = Language.all.traverse {
+        val cntLang = List(Language.German, Language.English, Language.French).traverse {
           case l @ Language.German =>
             addTextField(l.some)(Field.content_de)
           case l @ Language.English =>
             addTextField(l.some)(Field.content_en)
           case l @ Language.French =>
             addTextField(l.some)(Field.content_fr)
+          case _ =>
+            ().pure[F]
         }
 
         cmds0 *> cmds1 *> cntLang *> ().pure[F]
@@ -125,6 +136,9 @@ object SolrSetup {
           case Some(Language.French) =>
             run(DeleteField.command(DeleteField(field))).attempt *>
               run(AddField.command(AddField.textFR(field)))
+          case Some(Language.Italian) =>
+            run(DeleteField.command(DeleteField(field))).attempt *>
+              run(AddField.command(AddField.textIT(field)))
         }
     }
   }
@@ -161,6 +175,9 @@ object SolrSetup {
 
     def textFR(field: Field): AddField =
       AddField(field, "text_fr", true, true, false)
+
+    def textIT(field: Field): AddField =
+      AddField(field, "text_it", true, true, false)
   }
 
   case class DeleteField(name: Field)
diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf
index 583b40b1..a495ea5a 100644
--- a/modules/joex/src/main/resources/reference.conf
+++ b/modules/joex/src/main/resources/reference.conf
@@ -277,7 +277,39 @@ docspell.joex {
     # files.
     working-dir = ${java.io.tmpdir}"/docspell-analysis"
 
-    nlp-config {
+    nlp {
+      # The mode for configuring NLP models:
+      #
+      # 1. full – builds the complete pipeline
+      # 2. basic - builds only the ner annotator
+      # 3. regexonly - matches each entry in your address book via regexps
+      # 4. disabled - doesn't use any stanford-nlp feature
+      #
+      # The full and basic variants rely on pre-build language models
+      # that are available for only 3 lanugages at the moment: German,
+      # English and French.
+      #
+      # Memory usage varies greatly among the languages. German has
+      # quite large models, that require about 1G heap. So joex should
+      # run with -Xmx1500M at least when using mode=full.
+      #
+      # The basic variant does a quite good job for German and
+      # English. It might be worse for French, always depending on the
+      # type of text that is analysed. Joex should run with about 600M
+      # heap, here again lanugage German uses the most.
+      #
+      # The regexonly variant doesn't depend on a language. It roughly
+      # works by converting all entries in your addressbook into
+      # regexps and matches each one against the text. This can get
+      # memory intensive, too, when the addressbook grows large. This
+      # is included in the full and basic by default, but can be used
+      # independently by setting mode=regexner.
+      #
+      # When mode=disabled, then the whole nlp pipeline is disabled,
+      # and you won't get any suggestions. Only what the classifier
+      # returns (if enabled).
+      mode = full
+
       # The StanfordCoreNLP library caches language models which
       # requires quite some amount of memory. Setting this interval to a
       # positive duration, the cache is cleared after this amount of
@@ -287,37 +319,28 @@ docspell.joex {
       # This has only any effect, if mode != disabled.
       clear-interval = "15 minutes"
 
-      # The mode for configuring NLP models. Currently 3 are available:
-      #
-      # 1. full – builds the complete pipeline, run with -Xmx1500M or more
-      # 2. basic - builds only the ner annotator, run with -Xmx600M or more
-      # 3. disabled - doesn't use any stanford-nlp feature
-      #
-      # The basic variant does a quite good job for German and
-      # English. It might be worse for French, always depending on the
-      # type of text that is analysed.
-      mode = full
-    }
+      regex-ner {
+        # Whether to enable custom NER annotation. This uses the
+        # address book of a collective as input for NER tagging (to
+        # automatically find correspondent and concerned entities). If
+        # the address book is large, this can be quite memory
+        # intensive and also makes text analysis much slower. But it
+        # improves accuracy and can be used independent of the
+        # lanugage. If this is set to 0, it is effectively disabled
+        # and NER tagging uses only statistical models (that also work
+        # quite well, but are restricted to the languages mentioned
+        # above).
+        #
+        # Note, this is only relevant if nlp-config.mode is not
+        # "disabled".
+        max-entries = 1000
 
-    regex-ner {
-      # Whether to enable custom NER annotation. This uses the address
-      # book of a collective as input for NER tagging (to automatically
-      # find correspondent and concerned entities). If the address book
-      # is large, this can be quite memory intensive and also makes text
-      # analysis slower. But it greatly improves accuracy. If this is
-      # false, NER tagging uses only statistical models (that also work
-      # quite well).
-      #
-      # This setting might be moved to the collective settings in the
-      # future.
-      #
-      # Note, this is only relevant if nlp-config.mode = full.
-      enabled = true
-
-      # The NER annotation uses a file of patterns that is derived from
-      # a collective's address book. This is is the time how long this
-      # file will be kept until a check for a state change is done.
-      file-cache-time = "1 minute"
+        # The NER annotation uses a file of patterns that is derived
+        # from a collective's address book. This is is the time how
+        # long this data will be kept until a check for a state change
+        # is done.
+        file-cache-time = "1 minute"
+      }
     }
 
     # Settings for doing document classification.
diff --git a/modules/joex/src/main/scala/docspell/joex/Config.scala b/modules/joex/src/main/scala/docspell/joex/Config.scala
index 5b2bccc5..4ad72d7c 100644
--- a/modules/joex/src/main/scala/docspell/joex/Config.scala
+++ b/modules/joex/src/main/scala/docspell/joex/Config.scala
@@ -60,15 +60,14 @@ object Config {
   case class TextAnalysis(
       maxLength: Int,
       workingDir: Path,
-      nlpConfig: TextAnalysisConfig.NlpConfig,
-      regexNer: RegexNer,
+      nlp: NlpConfig,
       classification: Classification
   ) {
 
     def textAnalysisConfig: TextAnalysisConfig =
       TextAnalysisConfig(
         maxLength,
-        nlpConfig,
+        TextAnalysisConfig.NlpConfig(nlp.clearInterval, nlp.mode),
         TextClassifierConfig(
           workingDir,
           NonEmptyList
@@ -78,10 +77,16 @@ object Config {
       )
 
     def regexNerFileConfig: RegexNerFile.Config =
-      RegexNerFile.Config(regexNer.enabled, workingDir, regexNer.fileCacheTime)
+      RegexNerFile.Config(
+        nlp.regexNer.maxEntries,
+        workingDir,
+        nlp.regexNer.fileCacheTime
+      )
   }
 
-  case class RegexNer(enabled: Boolean, fileCacheTime: Duration)
+  case class NlpConfig(mode: NlpMode, clearInterval: Duration, regexNer: RegexNer)
+
+  case class RegexNer(maxEntries: Int, fileCacheTime: Duration)
 
   case class Classification(
       enabled: Boolean,
diff --git a/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala b/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala
index 24e7f6ae..56e48012 100644
--- a/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala
+++ b/modules/joex/src/main/scala/docspell/joex/analysis/RegexNerFile.scala
@@ -29,7 +29,7 @@ trait RegexNerFile[F[_]] {
 object RegexNerFile {
   private[this] val logger = getLogger
 
-  case class Config(enabled: Boolean, directory: Path, minTime: Duration)
+  case class Config(maxEntries: Int, directory: Path, minTime: Duration)
 
   def apply[F[_]: Concurrent: ContextShift](
       cfg: Config,
@@ -49,7 +49,7 @@ object RegexNerFile {
   ) extends RegexNerFile[F] {
 
     def makeFile(collective: Ident): F[Option[Path]] =
-      if (cfg.enabled) doMakeFile(collective)
+      if (cfg.maxEntries > 0) doMakeFile(collective)
       else (None: Option[Path]).pure[F]
 
     def doMakeFile(collective: Ident): F[Option[Path]] =
@@ -127,7 +127,7 @@ object RegexNerFile {
 
       for {
         _     <- logger.finfo(s"Generating custom NER file for collective '${collective.id}'")
-        names <- store.transact(QCollective.allNames(collective))
+        names <- store.transact(QCollective.allNames(collective, cfg.maxEntries))
         nerFile = NerFile(collective, lastUpdate, now)
         _ <- update(nerFile, NerFile.mkNerConfig(names))
       } yield nerFile
diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
index 1fd2401a..f336132d 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
@@ -4,9 +4,8 @@ import cats.data.OptionT
 import cats.effect._
 import cats.implicits._
 
-import docspell.analysis.TextAnalyser
 import docspell.analysis.classifier.{ClassifierModel, TextClassifier}
-import docspell.analysis.nlp.StanfordNerSettings
+import docspell.analysis.{NlpSettings, TextAnalyser}
 import docspell.common._
 import docspell.joex.Config
 import docspell.joex.analysis.RegexNerFile
@@ -54,7 +53,7 @@ object TextAnalysis {
       analyser: TextAnalyser[F],
       nerFile: RegexNerFile[F]
   )(rm: RAttachmentMeta): F[(RAttachmentMeta, AttachmentDates)] = {
-    val settings = StanfordNerSettings(ctx.args.meta.language, false, None)
+    val settings = NlpSettings(ctx.args.meta.language, false, None)
     for {
       customNer <- nerFile.makeFile(ctx.args.meta.collective)
       sett = settings.copy(regexNer = customNer)
diff --git a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala
index b9fe40c7..84caa840 100644
--- a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala
@@ -1,10 +1,8 @@
 package docspell.store.queries
 
-import cats.data.OptionT
 import fs2.Stream
 
-import docspell.common.ContactKind
-import docspell.common.{Direction, Ident}
+import docspell.common._
 import docspell.store.qb.DSL._
 import docspell.store.qb._
 import docspell.store.records._
@@ -17,6 +15,7 @@ object QCollective {
   private val t  = RTag.as("t")
   private val ro = ROrganization.as("o")
   private val rp = RPerson.as("p")
+  private val re = REquipment.as("e")
   private val rc = RContact.as("c")
   private val i  = RItem.as("i")
 
@@ -25,13 +24,37 @@ object QCollective {
     val empty = Names(Vector.empty, Vector.empty, Vector.empty)
   }
 
-  def allNames(collective: Ident): ConnectionIO[Names] =
-    (for {
-      orgs <- OptionT.liftF(ROrganization.findAllRef(collective, None, _.name))
-      pers <- OptionT.liftF(RPerson.findAllRef(collective, None, _.name))
-      equp <- OptionT.liftF(REquipment.findAll(collective, None, _.name))
-    } yield Names(orgs.map(_.name), pers.map(_.name), equp.map(_.name)))
-      .getOrElse(Names.empty)
+  def allNames(collective: Ident, maxEntries: Int): ConnectionIO[Names] = {
+    val created = Column[Timestamp]("created", TableDef(""))
+    union(
+      Select(
+        select(ro.name.s, lit(1).as("kind"), ro.created.as(created)),
+        from(ro),
+        ro.cid === collective
+      ),
+      Select(
+        select(rp.name.s, lit(2).as("kind"), rp.created.as(created)),
+        from(rp),
+        rp.cid === collective
+      ),
+      Select(
+        select(re.name.s, lit(3).as("kind"), re.created.as(created)),
+        from(re),
+        re.cid === collective
+      )
+    ).orderBy(created.desc)
+      .limit(Batch.limit(maxEntries))
+      .build
+      .query[(String, Int)]
+      .streamWithChunkSize(maxEntries)
+      .fold(Names.empty) { case (names, (name, kind)) =>
+        if (kind == 1) names.copy(org = names.org :+ name)
+        else if (kind == 2) names.copy(pers = names.pers :+ name)
+        else names.copy(equip = names.equip :+ name)
+      }
+      .compile
+      .lastOrError
+  }
 
   case class InsightData(
       incoming: Int,
diff --git a/modules/webapp/src/main/elm/Data/Language.elm b/modules/webapp/src/main/elm/Data/Language.elm
index 40fe5eb2..c7e04b7b 100644
--- a/modules/webapp/src/main/elm/Data/Language.elm
+++ b/modules/webapp/src/main/elm/Data/Language.elm
@@ -11,6 +11,7 @@ type Language
     = German
     | English
     | French
+    | Italian
 
 
 fromString : String -> Maybe Language
@@ -24,6 +25,8 @@ fromString str =
     else if str == "fra" || str == "fr" || str == "french" then
         Just French
 
+    else if str == "ita" || str == "it" || str == "italian" then
+        Just Italian
     else
         Nothing
 
@@ -40,6 +43,9 @@ toIso3 lang =
         French ->
             "fra"
 
+        Italian ->
+            "ita"
+
 
 toName : Language -> String
 toName lang =
@@ -53,7 +59,10 @@ toName lang =
         French ->
             "French"
 
+        Italian ->
+            "Italian"
+
 
 all : List Language
 all =
-    [ German, English, French ]
+    [ German, English, French, Italian ]
diff --git a/nix/module-joex.nix b/nix/module-joex.nix
index 373a6aed..aae8d835 100644
--- a/nix/module-joex.nix
+++ b/nix/module-joex.nix
@@ -98,9 +98,13 @@ let
     };
     text-analysis = {
       max-length = 10000;
-      regex-ner = {
-        enabled = true;
-        file-cache-time = "1 minute";
+      nlp = {
+        mode = "full";
+        clear-interval = "15 minutes";
+        regex-ner = {
+          max-entries = 1000;
+          file-cache-time = "1 minute";
+        };
       };
       classification = {
         enabled = true;
@@ -118,7 +122,6 @@ let
         ];
       };
       working-dir = "/tmp/docspell-analysis";
-      clear-stanford-nlp-interval = "15 minutes";
     };
     processing = {
       max-due-date-years = 10;
@@ -772,47 +775,96 @@ in {
                 files.
               '';
             };
-            clear-stanford-nlp-interval = mkOption {
-              type = types.str;
-              default = defaults.text-analysis.clear-stanford-nlp-interval;
-              description = ''
-                Idle time after which the NLP caches are cleared to free
-                memory. If <= 0 clearing the cache is disabled.
-              '';
-            };
 
-            regex-ner = mkOption {
+            nlp = mkOption {
               type = types.submodule({
                 options = {
-                  enabled = mkOption {
-                    type = types.bool;
-                    default = defaults.text-analysis.regex-ner.enabled;
+                  mode = mkOption {
+                    type = types.str;
+                    default = defaults.text-analysis.nlp.mode;
                     description = ''
-                      Whether to enable custom NER annotation. This uses the address
-                      book of a collective as input for NER tagging (to automatically
-                      find correspondent and concerned entities). If the address book
-                      is large, this can be quite memory intensive and also makes text
-                      analysis slower. But it greatly improves accuracy. If this is
-                      false, NER tagging uses only statistical models (that also work
-                      quite well).
+                      The mode for configuring NLP models:
 
-                      This setting might be moved to the collective settings in the
-                      future.
+                      1. full – builds the complete pipeline
+                      2. basic - builds only the ner annotator
+                      3. regexonly - matches each entry in your address book via regexps
+                      4. disabled - doesn't use any stanford-nlp feature
+
+                      The full and basic variants rely on pre-build language models
+                      that are available for only 3 lanugages at the moment: German,
+                      English and French.
+
+                      Memory usage varies greatly among the languages. German has
+                      quite large models, that require about 1G heap. So joex should
+                      run with -Xmx1500M at least when using mode=full.
+
+                      The basic variant does a quite good job for German and
+                      English. It might be worse for French, always depending on the
+                      type of text that is analysed. Joex should run with about 600M
+                      heap, here again lanugage German uses the most.
+
+                      The regexonly variant doesn't depend on a language. It roughly
+                      works by converting all entries in your addressbook into
+                      regexps and matches each one against the text. This can get
+                      memory intensive, too, when the addressbook grows large. This
+                      is included in the full and basic by default, but can be used
+                      independently by setting mode=regexner.
+
+                      When mode=disabled, then the whole nlp pipeline is disabled,
+                      and you won't get any suggestions. Only what the classifier
+                      returns (if enabled).
                     '';
                   };
-                  file-cache-time = mkOption {
+
+                  clear-interval = mkOption {
                     type = types.str;
-                    default = defaults.text-analysis.ner-file-cache-time;
+                    default = defaults.text-analysis.nlp.clear-interval;
                     description = ''
-                      The NER annotation uses a file of patterns that is derived from
-                      a collective's address book. This is is the time how long this
-                      file will be kept until a check for a state change is done.
+                      Idle time after which the NLP caches are cleared to free
+                      memory. If <= 0 clearing the cache is disabled.
                     '';
                   };
+
+                  regex-ner = mkOption {
+                    type = types.submodule({
+                      options = {
+                        enabled = mkOption {
+                          type = types.int;
+                          default = defaults.text-analysis.regex-ner.max-entries;
+                          description = ''
+                            Whether to enable custom NER annotation. This uses the
+                            address book of a collective as input for NER tagging (to
+                            automatically find correspondent and concerned entities). If
+                            the address book is large, this can be quite memory
+                            intensive and also makes text analysis much slower. But it
+                            improves accuracy and can be used independent of the
+                            lanugage. If this is set to 0, it is effectively disabled
+                            and NER tagging uses only statistical models (that also work
+                            quite well, but are restricted to the languages mentioned
+                            above).
+
+                            Note, this is only relevant if nlp-config.mode is not
+                            "disabled".
+                          '';
+                        };
+                        file-cache-time = mkOption {
+                          type = types.str;
+                          default = defaults.text-analysis.ner-file-cache-time;
+                          description = ''
+                            The NER annotation uses a file of patterns that is derived from
+                            a collective's address book. This is is the time how long this
+                            file will be kept until a check for a state change is done.
+                          '';
+                        };
+                      };
+                    });
+                    default = defaults.text-analysis.nlp.regex-ner;
+                    description = "";
+                  };
                 };
               });
-              default = defaults.text-analysis.regex-ner;
-              description = "";
+              default = defaults.text-analysis.nlp;
+              description = "Configure NLP";
             };
 
             classification = mkOption {