From b12d965223eda87a1660ed4bcc4655596002be34 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Wed, 20 Jan 2021 00:40:58 +0100 Subject: [PATCH] Improve logging --- .../src/main/scala/docspell/joex/learn/Classify.scala | 4 +++- .../main/scala/docspell/joex/process/TextAnalysis.scala | 6 +++--- .../main/scala/docspell/joex/process/TextExtraction.scala | 8 ++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala index 4c65556c..4d4c2676 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala @@ -25,8 +25,9 @@ object Classify { text: String )(cname: ClassifierName): F[Option[String]] = (for { - _ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name} …")) + _ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name} …")) model <- OptionT(store.transact(RClassifierModel.findByName(coll, cname.name))) + .flatTapNone(logger.debug("No classifier model found.")) modelData = store.bitpeace .get(model.fileId.id) @@ -40,6 +41,7 @@ object Classify { .drain .flatMap(_ => classifier.classify(logger, ClassifierModel(modelFile), text)) }).filter(_ != LearnClassifierTask.noClass) + .flatTapNone(logger.debug("Guessed: ")) _ <- OptionT.liftF(logger.debug(s"Guessed: ${cls}")) } yield cls).value diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala index a3c4edb5..33ec72d6 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala @@ -37,8 +37,7 @@ object TextAnalysis { _ <- t.traverse(m => ctx.store.transact(RAttachmentMeta.updateLabels(m._1.id, m._1.nerlabels)) ) - e <- s - _ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}") + v = t.toVector autoTagEnabled <- getActiveAutoTag(ctx, cfg) tag <- @@ -50,6 +49,8 @@ object TextAnalysis { predictItemEntities(ctx, cfg, item.metas, analyser.classifier) else MetaProposalList.empty.pure[F] + e <- s + _ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}") } yield item .copy( metas = v.map(_._1), @@ -109,7 +110,6 @@ object TextAnalysis { mtype: MetaProposalType ): F[Option[MetaProposal]] = for { - _ <- ctx.logger.debug(s"Guessing $mtype using classifier") label <- makeClassify(ctx, cfg, classifier)(text).apply(cname) } yield label.map(str => MetaProposal(mtype, Candidate(IdRef(Ident.unsafe(""), str), Set.empty)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index fee7d323..80b4b13e 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -46,10 +46,14 @@ object TextExtraction { ) _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)).toSeq: _*) dur <- start - _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}") + extractedTags = txt.flatMap(_.tags).distinct.toList + _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.") + _ <- + if (extractedTags.isEmpty) ().pure[F] + else ctx.logger.debug(s"Found tags in file: $extractedTags") } yield item .copy(metas = txt.map(_.am)) - .appendTags(txt.flatMap(_.tags).distinct.toList) + .appendTags(extractedTags) } // -- helpers