From f02f15e5bd1e3ebac255b6c2b1a10f1b69c4199c Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Wed, 13 Jan 2021 21:26:44 +0100 Subject: [PATCH] Move blocker into constructor of text analyser --- .../main/scala/docspell/analysis/TextAnalyser.scala | 11 +++++------ .../src/main/scala/docspell/joex/JoexAppImpl.scala | 2 +- .../docspell/joex/learn/LearnClassifierTask.scala | 3 +-- .../scala/docspell/joex/process/TextAnalysis.scala | 2 +- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala b/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala index 8ec4854e..c319b784 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/TextAnalyser.scala @@ -21,7 +21,7 @@ trait TextAnalyser[F[_]] { text: String ): F[TextAnalyser.Result] - def classifier(blocker: Blocker)(implicit CS: ContextShift[F]): TextClassifier[F] + def classifier: TextClassifier[F] } object TextAnalyser { @@ -31,8 +31,9 @@ object TextAnalyser { labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString)) } - def create[F[_]: Concurrent: Timer]( - cfg: TextAnalysisConfig + def create[F[_]: Concurrent: Timer: ContextShift]( + cfg: TextAnalysisConfig, + blocker: Blocker ): Resource[F, TextAnalyser[F]] = Resource .liftF(PipelineCache[F](cfg.clearStanfordPipelineInterval)) @@ -53,9 +54,7 @@ object TextAnalyser { spans = NerLabelSpan.build(list) } yield Result(spans ++ list, dates) - def classifier(blocker: Blocker)(implicit - CS: ContextShift[F] - ): TextClassifier[F] = + def classifier: TextClassifier[F] = new StanfordTextClassifier[F](cfg.classifier, blocker) private def textLimit(logger: Logger[F], text: String): F[String] = diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 51fed2bc..cdbb5a50 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -97,7 +97,7 @@ object JoexAppImpl { upload <- OUpload(store, queue, cfg.files, joex) fts <- createFtsClient(cfg)(httpClient) itemOps <- OItem(store, fts, queue, joex) - analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig) + analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig, blocker) regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store) javaEmil = JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug)) diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala index c3d6e3f9..535b7f0d 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala @@ -41,8 +41,7 @@ object LearnClassifierTask { sett.category.getOrElse("") ) _ <- OptionT.liftF( - analyser - .classifier(blocker) + analyser.classifier .trainClassifier[Unit](ctx.logger, data)(Kleisli(handleModel(ctx, blocker))) ) } yield ()) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala index 6864b390..4a868d47 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala @@ -44,7 +44,7 @@ object TextAnalysis { e <- s _ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}") v = t.toVector - tag <- predictTag(ctx, cfg, item.metas, analyser.classifier(ctx.blocker)).value + tag <- predictTag(ctx, cfg, item.metas, analyser.classifier).value } yield item .copy(metas = v.map(_._1), dateLabels = v.map(_._2)) .appendTags(tag.toSeq)