mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Adopt to new loggin api
This commit is contained in:
@ -15,8 +15,7 @@ import docspell.analysis.contact.Contact
|
||||
import docspell.analysis.date.DateFind
|
||||
import docspell.analysis.nlp._
|
||||
import docspell.common._
|
||||
|
||||
import org.log4s.getLogger
|
||||
import docspell.logging.Logger
|
||||
|
||||
trait TextAnalyser[F[_]] {
|
||||
|
||||
@ -30,7 +29,6 @@ trait TextAnalyser[F[_]] {
|
||||
def classifier: TextClassifier[F]
|
||||
}
|
||||
object TextAnalyser {
|
||||
private[this] val logger = getLogger
|
||||
|
||||
case class Result(labels: Vector[NerLabel], dates: Vector[NerDateLabel]) {
|
||||
|
||||
@ -87,10 +85,11 @@ object TextAnalyser {
|
||||
private object Nlp {
|
||||
def apply[F[_]: Async](
|
||||
cfg: TextAnalysisConfig.NlpConfig
|
||||
): F[Input[F] => F[Vector[NerLabel]]] =
|
||||
): F[Input[F] => F[Vector[NerLabel]]] = {
|
||||
val log = docspell.logging.getLogger[F]
|
||||
cfg.mode match {
|
||||
case NlpMode.Disabled =>
|
||||
Logger.log4s(logger).info("NLP is disabled as defined in config.") *>
|
||||
log.info("NLP is disabled as defined in config.") *>
|
||||
Applicative[F].pure(_ => Vector.empty[NerLabel].pure[F])
|
||||
case _ =>
|
||||
PipelineCache(cfg.clearInterval)(
|
||||
@ -99,6 +98,7 @@ object TextAnalyser {
|
||||
)
|
||||
.map(annotate[F])
|
||||
}
|
||||
}
|
||||
|
||||
final case class Input[F[_]](
|
||||
key: Ident,
|
||||
|
@ -17,6 +17,7 @@ import docspell.analysis.classifier.TextClassifier._
|
||||
import docspell.analysis.nlp.Properties
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.FileSyntax._
|
||||
import docspell.logging.Logger
|
||||
|
||||
import edu.stanford.nlp.classify.ColumnDataClassifier
|
||||
|
||||
|
@ -10,7 +10,7 @@ import cats.data.Kleisli
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.analysis.classifier.TextClassifier.Data
|
||||
import docspell.common._
|
||||
import docspell.logging.Logger
|
||||
|
||||
trait TextClassifier[F[_]] {
|
||||
|
||||
|
@ -12,6 +12,7 @@ import cats.{Applicative, FlatMap}
|
||||
|
||||
import docspell.analysis.NlpSettings
|
||||
import docspell.common._
|
||||
import docspell.logging.Logger
|
||||
|
||||
import edu.stanford.nlp.pipeline.StanfordCoreNLP
|
||||
|
||||
|
@ -19,14 +19,13 @@ import docspell.common._
|
||||
import edu.stanford.nlp.ie.AbstractSequenceClassifier
|
||||
import edu.stanford.nlp.ie.crf.CRFClassifier
|
||||
import edu.stanford.nlp.ling.{CoreAnnotations, CoreLabel}
|
||||
import org.log4s.getLogger
|
||||
|
||||
/** This is only using the CRFClassifier without building an analysis pipeline. The
|
||||
* ner-classifier cannot use results from POS-tagging etc. and is therefore not as good
|
||||
* as the [[StanfordNerAnnotator]]. But it uses less memory, while still being not bad.
|
||||
*/
|
||||
object BasicCRFAnnotator {
|
||||
private[this] val logger = getLogger
|
||||
private[this] val logger = docspell.logging.unsafeLogger
|
||||
|
||||
// assert correct resource names
|
||||
NLPLanguage.all.toList.foreach(classifierResource)
|
||||
|
@ -15,8 +15,6 @@ import cats.implicits._
|
||||
import docspell.analysis.NlpSettings
|
||||
import docspell.common._
|
||||
|
||||
import org.log4s.getLogger
|
||||
|
||||
/** Creating the StanfordCoreNLP pipeline is quite expensive as it involves IO and
|
||||
* initializing large objects.
|
||||
*
|
||||
@ -31,17 +29,19 @@ trait PipelineCache[F[_]] {
|
||||
}
|
||||
|
||||
object PipelineCache {
|
||||
private[this] val logger = getLogger
|
||||
private[this] val logger = docspell.logging.unsafeLogger
|
||||
|
||||
def apply[F[_]: Async](clearInterval: Duration)(
|
||||
creator: NlpSettings => Annotator[F],
|
||||
release: F[Unit]
|
||||
): F[PipelineCache[F]] =
|
||||
): F[PipelineCache[F]] = {
|
||||
val log = docspell.logging.getLogger[F]
|
||||
for {
|
||||
data <- Ref.of(Map.empty[String, Entry[Annotator[F]]])
|
||||
cacheClear <- CacheClearing.create(data, clearInterval, release)
|
||||
_ <- Logger.log4s(logger).info("Creating nlp pipeline cache")
|
||||
_ <- log.info("Creating nlp pipeline cache")
|
||||
} yield new Impl[F](data, creator, cacheClear)
|
||||
}
|
||||
|
||||
final private class Impl[F[_]: Async](
|
||||
data: Ref[F, Map[String, Entry[Annotator[F]]]],
|
||||
@ -116,7 +116,7 @@ object PipelineCache {
|
||||
for {
|
||||
counter <- Ref.of(0L)
|
||||
cleaning <- Ref.of(None: Option[Fiber[F, Throwable, Unit]])
|
||||
log = Logger.log4s(logger)
|
||||
log = docspell.logging.getLogger[F]
|
||||
result <-
|
||||
if (interval.millis <= 0)
|
||||
log
|
||||
@ -145,7 +145,7 @@ object PipelineCache {
|
||||
release: F[Unit]
|
||||
)(implicit F: Async[F])
|
||||
extends CacheClearing[F] {
|
||||
private[this] val log = Logger.log4s[F](logger)
|
||||
private[this] val log = docspell.logging.getLogger[F]
|
||||
|
||||
def withCache: Resource[F, Unit] =
|
||||
Resource.make(counter.update(_ + 1) *> cancelClear)(_ =>
|
||||
|
@ -14,10 +14,9 @@ import fs2.io.file.Path
|
||||
import docspell.common._
|
||||
|
||||
import edu.stanford.nlp.pipeline.{CoreDocument, StanfordCoreNLP}
|
||||
import org.log4s.getLogger
|
||||
|
||||
object StanfordNerAnnotator {
|
||||
private[this] val logger = getLogger
|
||||
private[this] val logger = docspell.logging.unsafeLogger
|
||||
|
||||
/** Runs named entity recognition on the given `text`.
|
||||
*
|
||||
|
@ -21,7 +21,7 @@ import docspell.common._
|
||||
import munit._
|
||||
|
||||
class StanfordTextClassifierSuite extends FunSuite {
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val logger = docspell.logging.getLogger[IO]
|
||||
|
||||
test("learn from data") {
|
||||
val cfg = TextClassifierConfig(File.path(Paths.get("target")), NonEmptyList.of(Map()))
|
||||
|
Reference in New Issue
Block a user