Update scalafmt settings

2025-08-05 02:24:52 +00:00 · 2021-09-22 17:23:24 +02:00
parent c37f1d7c31
commit 9013f2de5b
277 changed files with 1579 additions and 1615 deletions
--- a/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala
@ -54,7 +54,7 @@ object TextAnalyser {
              tags0 <- stanfordNer(Nlp.Input(cacheKey, settings, logger, input))
              tags1 <- contactNer(input)
              dates <- dateNer(settings.lang, input)
-              list  = tags0 ++ tags1
+              list = tags0 ++ tags1
              spans = NerLabelSpan.build(list)
            } yield Result(spans ++ list, dates)

--- a/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/classifier/StanfordTextClassifier.scala
@ -31,10 +31,10 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
      .withTempDir(cfg.workingDir, "trainclassifier")
      .use { dir =>
        for {
-          rawData   <- writeDataFile(dir, data)
-          _         <- logger.debug(s"Learning from ${rawData.count} items.")
+          rawData <- writeDataFile(dir, data)
+          _ <- logger.debug(s"Learning from ${rawData.count} items.")
          trainData <- splitData(logger, rawData)
-          scores    <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
+          scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
          sorted = scores.sortBy(-_.score)
          res <- handler(sorted.head.model)
        } yield res
@ -77,7 +77,7 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
    } yield res

  def splitData(logger: Logger[F], in: RawData): F[TrainData] = {
-    val f     = if (cfg.classifierConfigs.size > 1) 0.15 else 0.0
+    val f = if (cfg.classifierConfigs.size > 1) 0.15 else 0.0
    val nTest = (in.count * f).toLong

    val td =
@ -142,8 +142,8 @@ final class StanfordTextClassifier[F[_]: Async](cfg: TextClassifierConfig)
      props: Map[String, String]
  ): Map[String, String] =
    prepend("2.", props) ++ Map(
-      "trainFile"   -> trainData.train.absolutePathAsString,
-      "testFile"    -> trainData.test.absolutePathAsString,
+      "trainFile" -> trainData.train.absolutePathAsString,
+      "testFile" -> trainData.test.absolutePathAsString,
      "serializeTo" -> trainData.modelFile.absolutePathAsString
    ).toList

--- a/modules/analysis/src/main/scala/docspell/analysis/contact/Contact.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/contact/Contact.scala
@ -33,7 +33,7 @@ object Contact {
    if (atIdx <= 0 || str.indexOf('@', atIdx + 1) > 0) false
    else {
      val name = str.substring(0, atIdx)
-      val dom  = str.substring(atIdx + 1)
+      val dom = str.substring(atIdx + 1)
      Domain.isDomain(dom) && name.forall(c => !c.isWhitespace)
    }
  }
--- a/modules/analysis/src/main/scala/docspell/analysis/contact/Tld.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/contact/Tld.scala
@ -14,8 +14,7 @@ private[analysis] object Tld {
  def endsWithTld(str: String): Boolean =
    findTld(str).isDefined

-  /** Some selected TLDs.
-    */
+  /** Some selected TLDs. */
  private[this] val known = List(
    ".com",
    ".org",
--- a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala
@ -177,17 +177,17 @@ object DateFind {

    object Result {
      final case class Success[A](value: A, rest: List[Word]) extends Result[A] {
-        val toOption                                 = Some(value)
+        val toOption = Some(value)
        def flatMap[B](f: A => Result[B]): Result[B] = f(value)
-        def map[B](f: A => B): Result[B]             = Success(f(value), rest)
+        def map[B](f: A => B): Result[B] = Success(f(value), rest)
        def next[B](r: Reader[B]): Result[(A, B)] =
          r.read(rest).map(b => (value, b))
      }
      final case object Failure extends Result[Nothing] {
-        val toOption                                       = None
+        val toOption = None
        def flatMap[B](f: Nothing => Result[B]): Result[B] = this
-        def map[B](f: Nothing => B): Result[B]             = this
-        def next[B](r: Reader[B]): Result[(Nothing, B)]    = this
+        def map[B](f: Nothing => B): Result[B] = this
+        def next[B](r: Reader[B]): Result[(Nothing, B)] = this
      }

      implicit def resultSemigroup[A: Semigroup]: Semigroup[Result[A]] =
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/BasicCRFAnnotator.scala
@ -74,9 +74,9 @@ object BasicCRFAnnotator {
  }

  final class Cache {
-    private[this] lazy val germanNerClassifier  = makeAnnotator(Language.German)
+    private[this] lazy val germanNerClassifier = makeAnnotator(Language.German)
    private[this] lazy val englishNerClassifier = makeAnnotator(Language.English)
-    private[this] lazy val frenchNerClassifier  = makeAnnotator(Language.French)
+    private[this] lazy val frenchNerClassifier = makeAnnotator(Language.French)

    def forLang(language: NLPLanguage): Annotator =
      language match {
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/PipelineCache.scala
@ -38,9 +38,9 @@ object PipelineCache {
      release: F[Unit]
  ): F[PipelineCache[F]] =
    for {
-      data       <- Ref.of(Map.empty[String, Entry[Annotator[F]]])
+      data <- Ref.of(Map.empty[String, Entry[Annotator[F]]])
      cacheClear <- CacheClearing.create(data, clearInterval, release)
-      _          <- Logger.log4s(logger).info("Creating nlp pipeline cache")
+      _ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
    } yield new Impl[F](data, creator, cacheClear)

  final private class Impl[F[_]: Async](
@ -51,7 +51,7 @@ object PipelineCache {

    def obtain(key: String, settings: NlpSettings): Resource[F, Annotator[F]] =
      for {
-        _  <- cacheClear.withCache
+        _ <- cacheClear.withCache
        id <- Resource.eval(makeSettingsId(settings))
        nlp <- Resource.eval(
          data.modify(cache => getOrCreate(key, id, cache, settings, creator))
@ -73,13 +73,13 @@ object PipelineCache {
              s"StanfordNLP settings changed for key $key. Creating new classifier"
            )
            val nlp = creator(settings)
-            val e   = Entry(id, nlp)
+            val e = Entry(id, nlp)
            (cache.updated(key, e), nlp)
          }

        case None =>
          val nlp = creator(settings)
-          val e   = Entry(id, nlp)
+          val e = Entry(id, nlp)
          (cache.updated(key, e), nlp)
      }

@ -114,7 +114,7 @@ object PipelineCache {
        release: F[Unit]
    ): F[CacheClearing[F]] =
      for {
-        counter  <- Ref.of(0L)
+        counter <- Ref.of(0L)
        cleaning <- Ref.of(None: Option[Fiber[F, Throwable, Unit]])
        log = Logger.log4s(logger)
        result <-
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/Properties.scala
@ -44,47 +44,47 @@ object Properties {

  def nerGerman(regexNerMappingFile: Option[String], highRecall: Boolean): JProps =
    Properties(
-      "annotators"           -> "tokenize,ssplit,mwt,pos,lemma,ner",
-      "tokenize.language"    -> "de",
-      "mwt.mappingFile"      -> "edu/stanford/nlp/models/mwt/german/german-mwt.tsv",
-      "pos.model"            -> "edu/stanford/nlp/models/pos-tagger/german-ud.tagger",
-      "ner.statisticalOnly"  -> "true",
-      "ner.rulesOnly"        -> "false",
+      "annotators" -> "tokenize,ssplit,mwt,pos,lemma,ner",
+      "tokenize.language" -> "de",
+      "mwt.mappingFile" -> "edu/stanford/nlp/models/mwt/german/german-mwt.tsv",
+      "pos.model" -> "edu/stanford/nlp/models/pos-tagger/german-ud.tagger",
+      "ner.statisticalOnly" -> "true",
+      "ner.rulesOnly" -> "false",
      "ner.applyFineGrained" -> "false",
      "ner.applyNumericClassifiers" -> "false", //only english supported, not needed currently
      "ner.useSUTime" -> "false", //only english, unused in docspell
-      "ner.language"  -> "de",
+      "ner.language" -> "de",
      "ner.model" -> "edu/stanford/nlp/models/ner/german.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"
    ).withRegexNer(regexNerMappingFile).withHighRecall(highRecall)

  def nerEnglish(regexNerMappingFile: Option[String]): JProps =
    Properties(
-      "annotators"        -> "tokenize,ssplit,pos,lemma,ner",
+      "annotators" -> "tokenize,ssplit,pos,lemma,ner",
      "tokenize.language" -> "en",
      "pos.model" -> "edu/stanford/nlp/models/pos-tagger/english-left3words-distsim.tagger",
-      "ner.statisticalOnly"         -> "true",
-      "ner.rulesOnly"               -> "false",
-      "ner.applyFineGrained"        -> "false",
+      "ner.statisticalOnly" -> "true",
+      "ner.rulesOnly" -> "false",
+      "ner.applyFineGrained" -> "false",
      "ner.applyNumericClassifiers" -> "false",
-      "ner.useSUTime"               -> "false",
-      "ner.language"                -> "en",
+      "ner.useSUTime" -> "false",
+      "ner.language" -> "en",
      "ner.model" -> "edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"
    ).withRegexNer(regexNerMappingFile)

  def nerFrench(regexNerMappingFile: Option[String], highRecall: Boolean): JProps =
    Properties(
-      "annotators"        -> "tokenize,ssplit,mwt,pos,lemma,ner",
+      "annotators" -> "tokenize,ssplit,mwt,pos,lemma,ner",
      "tokenize.language" -> "fr",
-      "mwt.mappingFile"   -> "edu/stanford/nlp/models/mwt/french/french-mwt.tsv",
-      "mwt.pos.model"     -> "edu/stanford/nlp/models/mwt/french/french-mwt.tagger",
+      "mwt.mappingFile" -> "edu/stanford/nlp/models/mwt/french/french-mwt.tsv",
+      "mwt.pos.model" -> "edu/stanford/nlp/models/mwt/french/french-mwt.tagger",
      "mwt.statisticalMappingFile" -> "edu/stanford/nlp/models/mwt/french/french-mwt-statistical.tsv",
-      "pos.model"            -> "edu/stanford/nlp/models/pos-tagger/french-ud.tagger",
-      "ner.statisticalOnly"  -> "true",
-      "ner.rulesOnly"        -> "false",
+      "pos.model" -> "edu/stanford/nlp/models/pos-tagger/french-ud.tagger",
+      "ner.statisticalOnly" -> "true",
+      "ner.rulesOnly" -> "false",
      "ner.applyFineGrained" -> "false",
      "ner.applyNumericClassifiers" -> "false",
-      "ner.useSUTime"               -> "false",
-      "ner.language"                -> "de",
+      "ner.useSUTime" -> "false",
+      "ner.language" -> "de",
      "ner.model" -> "edu/stanford/nlp/models/ner/french-wikiner-4class.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"
    ).withRegexNer(regexNerMappingFile).withHighRecall(highRecall)

--- a/modules/analysis/src/main/scala/docspell/analysis/split/TextSplitter.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/split/TextSplitter.scala
@ -8,15 +8,14 @@ package docspell.analysis.split

 import fs2.Stream

-/** Splits text into words.
-  */
+/** Splits text into words. */
 object TextSplitter {
  private[this] val trimChars =
    ".,…_[]^!<>=&ſ/{}*?()-:#$|~`+%\\\"'; \t\r\n".toSet

  def split[F[_]](str: String, sep: Set[Char], start: Int = 0): Stream[F, Word] = {
    val indexes = sep.map(c => str.indexOf(c.toInt)).filter(_ >= 0)
-    val index   = if (indexes.isEmpty) -1 else indexes.min
+    val index = if (indexes.isEmpty) -1 else indexes.min

    if (index < 0) Stream.emit(Word(str, start, start + str.length))
    else if (index == 0) split(str.substring(1), sep, start + 1)
--- a/modules/analysis/src/main/scala/docspell/analysis/split/Word.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/split/Word.scala
@ -7,9 +7,9 @@
 package docspell.analysis.split

 case class Word(value: String, begin: Int, end: Int) {
-  def isEmpty: Boolean  = value.isEmpty
+  def isEmpty: Boolean = value.isEmpty
  def nonEmpty: Boolean = !isEmpty
-  def length: Int       = value.length
+  def length: Int = value.length

  def trimLeft(chars: Set[Char]): Word = {
    val v = value.dropWhile(chars.contains)
--- a/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala
+++ b/modules/analysis/src/test/scala/docspell/analysis/nlp/StanfordNerAnnotatorSuite.scala
@ -91,19 +91,19 @@ class StanfordNerAnnotatorSuite extends FunSuite {

    val regexNerContent =
      s"""(?i)volantino ag${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
-      |(?i)volantino${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
-      |(?i)ag${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
-      |(?i)andrea rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
-      |(?i)andrea${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
-      |(?i)rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
-      |""".stripMargin
+         |(?i)volantino${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
+         |(?i)ag${"\t"}ORGANIZATION${"\t"}LOCATION,PERSON,MISC${"\t"}3
+         |(?i)andrea rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+         |(?i)andrea${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+         |(?i)rossi${"\t"}PERSON${"\t"}LOCATION,MISC${"\t"}2
+         |""".stripMargin

    File
      .withTempDir[IO](File.path(Paths.get("target")), "test-regex-ner")
      .use { dir =>
        for {
          out <- File.writeString[IO](dir / "regex.txt", regexNerContent)
-          ann    = StanfordNerAnnotator.makePipeline(StanfordNerSettings.RegexOnly(out))
+          ann = StanfordNerAnnotator.makePipeline(StanfordNerSettings.RegexOnly(out))
          labels = StanfordNerAnnotator.nerAnnotate(ann, "Hello Andrea Rossi, can you.")
          _ <- IO(
            assertEquals(