mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-07 15:45:59 +00:00
Move blocker into constructor of text analyser
This commit is contained in:
parent
ffbec3502f
commit
f02f15e5bd
@ -21,7 +21,7 @@ trait TextAnalyser[F[_]] {
|
|||||||
text: String
|
text: String
|
||||||
): F[TextAnalyser.Result]
|
): F[TextAnalyser.Result]
|
||||||
|
|
||||||
def classifier(blocker: Blocker)(implicit CS: ContextShift[F]): TextClassifier[F]
|
def classifier: TextClassifier[F]
|
||||||
}
|
}
|
||||||
object TextAnalyser {
|
object TextAnalyser {
|
||||||
|
|
||||||
@ -31,8 +31,9 @@ object TextAnalyser {
|
|||||||
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
|
labels ++ dates.map(dl => dl.label.copy(label = dl.date.toString))
|
||||||
}
|
}
|
||||||
|
|
||||||
def create[F[_]: Concurrent: Timer](
|
def create[F[_]: Concurrent: Timer: ContextShift](
|
||||||
cfg: TextAnalysisConfig
|
cfg: TextAnalysisConfig,
|
||||||
|
blocker: Blocker
|
||||||
): Resource[F, TextAnalyser[F]] =
|
): Resource[F, TextAnalyser[F]] =
|
||||||
Resource
|
Resource
|
||||||
.liftF(PipelineCache[F](cfg.clearStanfordPipelineInterval))
|
.liftF(PipelineCache[F](cfg.clearStanfordPipelineInterval))
|
||||||
@ -53,9 +54,7 @@ object TextAnalyser {
|
|||||||
spans = NerLabelSpan.build(list)
|
spans = NerLabelSpan.build(list)
|
||||||
} yield Result(spans ++ list, dates)
|
} yield Result(spans ++ list, dates)
|
||||||
|
|
||||||
def classifier(blocker: Blocker)(implicit
|
def classifier: TextClassifier[F] =
|
||||||
CS: ContextShift[F]
|
|
||||||
): TextClassifier[F] =
|
|
||||||
new StanfordTextClassifier[F](cfg.classifier, blocker)
|
new StanfordTextClassifier[F](cfg.classifier, blocker)
|
||||||
|
|
||||||
private def textLimit(logger: Logger[F], text: String): F[String] =
|
private def textLimit(logger: Logger[F], text: String): F[String] =
|
||||||
|
@ -97,7 +97,7 @@ object JoexAppImpl {
|
|||||||
upload <- OUpload(store, queue, cfg.files, joex)
|
upload <- OUpload(store, queue, cfg.files, joex)
|
||||||
fts <- createFtsClient(cfg)(httpClient)
|
fts <- createFtsClient(cfg)(httpClient)
|
||||||
itemOps <- OItem(store, fts, queue, joex)
|
itemOps <- OItem(store, fts, queue, joex)
|
||||||
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
|
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig, blocker)
|
||||||
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store)
|
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, blocker, store)
|
||||||
javaEmil =
|
javaEmil =
|
||||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||||
|
@ -41,8 +41,7 @@ object LearnClassifierTask {
|
|||||||
sett.category.getOrElse("")
|
sett.category.getOrElse("")
|
||||||
)
|
)
|
||||||
_ <- OptionT.liftF(
|
_ <- OptionT.liftF(
|
||||||
analyser
|
analyser.classifier
|
||||||
.classifier(blocker)
|
|
||||||
.trainClassifier[Unit](ctx.logger, data)(Kleisli(handleModel(ctx, blocker)))
|
.trainClassifier[Unit](ctx.logger, data)(Kleisli(handleModel(ctx, blocker)))
|
||||||
)
|
)
|
||||||
} yield ())
|
} yield ())
|
||||||
|
@ -44,7 +44,7 @@ object TextAnalysis {
|
|||||||
e <- s
|
e <- s
|
||||||
_ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}")
|
_ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}")
|
||||||
v = t.toVector
|
v = t.toVector
|
||||||
tag <- predictTag(ctx, cfg, item.metas, analyser.classifier(ctx.blocker)).value
|
tag <- predictTag(ctx, cfg, item.metas, analyser.classifier).value
|
||||||
} yield item
|
} yield item
|
||||||
.copy(metas = v.map(_._1), dateLabels = v.map(_._2))
|
.copy(metas = v.map(_._1), dateLabels = v.map(_._2))
|
||||||
.appendTags(tag.toSeq)
|
.appendTags(tag.toSeq)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user