From 27c24c128d179d0cb462cb9e48c46506c3772992 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Wed, 20 Jan 2021 00:30:40 +0100
Subject: [PATCH] Store tags guessed with classifier in database

---
 .../docspell/joex/process/CreateItem.scala    |  6 ++-
 .../docspell/joex/process/FindProposal.scala  |  8 ++-
 .../docspell/joex/process/ItemData.scala      |  3 +-
 .../docspell/joex/process/LinkProposal.scala  |  2 +-
 .../docspell/joex/process/ReProcessItem.scala |  3 +-
 .../docspell/joex/process/SaveProposals.scala | 54 ++++++++++---------
 .../docspell/joex/process/SetGivenData.scala  |  3 +-
 .../docspell/joex/process/TextAnalysis.scala  |  4 +-
 .../store/records/RItemProposal.scala         | 20 +++----
 9 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
index 8bc9ccc1..c24ad98c 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
@@ -108,7 +108,8 @@ object CreateItem {
         fm.map(a => a.id -> a.fileId).toMap,
         MetaProposalList.empty,
         Nil,
-        None
+        MetaProposalList.empty,
+        Nil
       )
     }
 
@@ -168,7 +169,8 @@ object CreateItem {
           origMap,
           MetaProposalList.empty,
           Nil,
-          None
+          MetaProposalList.empty,
+          Nil
         )
       )
     }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala
index 4f984b10..fa484772 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala
@@ -5,6 +5,7 @@ import java.time.ZoneId
 import cats.effect.Sync
 import cats.implicits._
 import cats.{Applicative, FlatMap}
+
 import docspell.analysis.contact._
 import docspell.common.MetaProposal.Candidate
 import docspell.common._
@@ -30,11 +31,8 @@ object FindProposal {
             processAttachment(cfg, rm, data.findDates(rm), ctx)
               .map(ml => rm.copy(proposals = ml))
           )
-        clp <- data.classifyProposals match {
-          case Some(cmp) => lookupClassifierProposals(ctx, cmp)
-          case None      => MetaProposalList.empty.pure[F]
-        }
-      } yield data.copy(metas = rmv, classifyProposals = clp.some)
+        clp <- lookupClassifierProposals(ctx, data.classifyProposals)
+      } yield data.copy(metas = rmv, classifyProposals = clp)
     }
 
   def lookupClassifierProposals[F[_]: Sync](
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
index a151e8a6..f7f52fe5 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
@@ -29,7 +29,8 @@ case class ItemData(
     // a list of tags (names or ids) attached to the item if they exist
     tags: List[String],
     // proposals obtained from the classifier
-    classifyProposals: Option[MetaProposalList]
+    classifyProposals: MetaProposalList,
+    classifyTags: List[String]
 ) {
 
   def findMeta(attachId: Ident): Option[RAttachmentMeta] =
diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala
index 6108e216..be8d34c8 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala
@@ -24,7 +24,7 @@ object LinkProposal {
           .flatten(data.metas.map(_.proposals))
           .filter(_.proposalType != MetaProposalType.DocDate)
           .sortByWeights
-          .fillEmptyFrom(data.classifyProposals.getOrElse(MetaProposalList.empty))
+          .fillEmptyFrom(data.classifyProposals)
 
         ctx.logger.info(s"Starting linking proposals") *>
           MetaProposalType.all
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala
index db41e901..42db6033 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala
@@ -66,7 +66,8 @@ object ReProcessItem {
         asrcMap.view.mapValues(_.fileId).toMap,
         MetaProposalList.empty,
         Nil,
-        None
+        MetaProposalList.empty,
+        Nil
       )).getOrElseF(
         Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}"))
       )
diff --git a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
index 060e718e..dfe4e1e2 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
@@ -2,16 +2,18 @@ package docspell.joex.process
 
 import cats.effect.Sync
 import cats.implicits._
+
 import docspell.common._
-import docspell.joex.scheduler.Task
+import docspell.joex.scheduler.{Context, Task}
 import docspell.store.AddResult
 import docspell.store.records._
 
 /** Saves the proposals in the database
   */
 object SaveProposals {
+  type Args = ProcessItemArgs
 
-  def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
+  def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] =
     Task { ctx =>
       for {
         _ <- ctx.logger.info("Storing proposals")
@@ -21,28 +23,32 @@ object SaveProposals {
               s"Storing attachment proposals: ${rm.proposals}"
             ) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
           )
-        _ <- data.classifyProposals match {
-          case Some(clp) =>
-            val itemId = data.item.id
-            ctx.logger.debug(s"Storing classifier proposals: $clp") *>
-              ctx.store
-                .add(
-                  RItemProposal.createNew(itemId, clp),
-                  RItemProposal.exists(itemId)
-                )
-                .flatMap({
-                  case AddResult.EntityExists(_) =>
-                    ctx.store.transact(RItemProposal.updateProposals(itemId, clp))
-                  case AddResult.Failure(ex) =>
-                    ctx.logger
-                      .warn(s"Could not store classifier proposals: ${ex.getMessage}") *>
-                      0.pure[F]
-                  case AddResult.Success =>
-                    1.pure[F]
-                })
-          case None =>
-            0.pure[F]
-        }
+        _ <-
+          if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F]
+          else saveItemProposal(ctx, data)
       } yield data
     }
+
+  def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = {
+    def upsert(v: RItemProposal): F[Int] =
+      ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
+        case AddResult.Success => 1.pure[F]
+        case AddResult.EntityExists(_) =>
+          ctx.store.transact(RItemProposal.update(v))
+        case AddResult.Failure(ex) =>
+          ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0
+            .pure[F]
+      }
+
+    for {
+      _ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}")
+      tags <- ctx.store.transact(
+        RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective)
+      )
+      tagRefs = tags.map(t => IdRef(t.tagId, t.name))
+      now <- Timestamp.current[F]
+      value = RItemProposal(data.item.id, data.classifyProposals, tagRefs.toList, now)
+      _ <- upsert(value)
+    } yield ()
+  }
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala
index 99348419..b668dbe9 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala
@@ -45,7 +45,8 @@ object SetGivenData {
     Task { ctx =>
       val itemId     = data.item.id
       val collective = ctx.args.meta.collective
-      val tags       = (ctx.args.meta.tags.getOrElse(Nil) ++ data.tags).distinct
+      val tags =
+        (ctx.args.meta.tags.getOrElse(Nil) ++ data.tags ++ data.classifyTags).distinct
       for {
         _ <- ctx.logger.info(s"Set tags from given data: ${tags}")
         e <- ops.linkTags(itemId, tags, collective).attempt
diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
index a2561e07..a3c4edb5 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala
@@ -54,9 +54,9 @@ object TextAnalysis {
         .copy(
           metas = v.map(_._1),
           dateLabels = v.map(_._2),
-          classifyProposals = classProposals.some
+          classifyProposals = classProposals,
+          classifyTags = tag
         )
-        .appendTags(tag)
     }
 
   def annotateAttachment[F[_]: Sync](
diff --git a/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala
index 822404ce..c5d44cc0 100644
--- a/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala
@@ -1,7 +1,6 @@
 package docspell.store.records
 
 import cats.data.NonEmptyList
-//import cats.implicits._
 
 import docspell.common._
 import docspell.store.qb.DSL._
@@ -39,22 +38,23 @@ object RItemProposal {
       fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}"
     )
 
+  def update(v: RItemProposal): ConnectionIO[Int] =
+    DML.update(
+      T,
+      T.itemId === v.itemId,
+      DML.set(
+        T.classifyProposals.setTo(v.classifyProposals),
+        T.classifyTags.setTo(v.classifyTags)
+      )
+    )
+
   def deleteByItem(itemId: Ident): ConnectionIO[Int] =
     DML.delete(T, T.itemId === itemId)
 
-  def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
-    for {
-      now <- Timestamp.current[ConnectionIO]
-      value = RItemProposal(itemId, proposals, Nil, now)
-      n <- insert(value)
-    } yield n
-
   def exists(itemId: Ident): ConnectionIO[Boolean] =
     Select(select(countAll), from(T), T.itemId === itemId).build
       .query[Int]
       .unique
       .map(_ > 0)
 
-  def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
-    DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals)))
 }