From 9d83cb7fe461ef12823e2379b7658b92fd49c676 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Tue, 19 Jan 2021 23:48:09 +0100
Subject: [PATCH] Store item based proposals in separate table

Classifier don't work on each attachment, but on all. So the results
must not be stored at an attachment. This reverts some previous
changes to put the classifier results for item entities into its own
table.
---
 .../joex/process/AttachmentPageCount.scala    |  1 -
 .../docspell/joex/process/SaveProposals.scala | 39 ++++++++----
 .../h2/V1.19.0__add_classify_meta.sql         | 10 +++-
 .../mariadb/V1.19.0__add_classify_meta.sql    | 10 +++-
 .../postgresql/V1.19.0__add_classify_meta.sql | 10 +++-
 .../docspell/store/impl/DoobieMeta.scala      |  3 +
 .../docspell/store/queries/QAttachment.scala  | 24 ++++----
 .../scala/docspell/store/queries/QItem.scala  |  3 +-
 .../store/records/RAttachmentMeta.scala       | 34 ++++-------
 .../store/records/RItemProposal.scala         | 60 +++++++++++++++++++
 10 files changed, 142 insertions(+), 52 deletions(-)
 create mode 100644 modules/store/src/main/scala/docspell/store/records/RItemProposal.scala

diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
index 15678322..0373db8a 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
@@ -84,7 +84,6 @@ object AttachmentPageCount {
                 Nil,
                 MetaProposalList.empty,
                 md.pageCount.some,
-                None,
                 None
               )
             )
diff --git a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
index d8abf308..060e718e 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala
@@ -2,9 +2,9 @@ package docspell.joex.process
 
 import cats.effect.Sync
 import cats.implicits._
-
 import docspell.common._
 import docspell.joex.scheduler.Task
+import docspell.store.AddResult
 import docspell.store.records._
 
 /** Saves the proposals in the database
@@ -13,17 +13,36 @@ object SaveProposals {
 
   def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
     Task { ctx =>
-      ctx.logger.info("Storing proposals") *>
-        data.metas
+      for {
+        _ <- ctx.logger.info("Storing proposals")
+        _ <- data.metas
           .traverse(rm =>
             ctx.logger.debug(
-              s"Storing attachment proposals: ${rm.proposals} and ${data.classifyProposals}"
-            ) *>
-              ctx.store.transact(
-                RAttachmentMeta
-                  .updateProposals(rm.id, rm.proposals, data.classifyProposals)
-              )
+              s"Storing attachment proposals: ${rm.proposals}"
+            ) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
           )
-          .map(_ => data)
+        _ <- data.classifyProposals match {
+          case Some(clp) =>
+            val itemId = data.item.id
+            ctx.logger.debug(s"Storing classifier proposals: $clp") *>
+              ctx.store
+                .add(
+                  RItemProposal.createNew(itemId, clp),
+                  RItemProposal.exists(itemId)
+                )
+                .flatMap({
+                  case AddResult.EntityExists(_) =>
+                    ctx.store.transact(RItemProposal.updateProposals(itemId, clp))
+                  case AddResult.Failure(ex) =>
+                    ctx.logger
+                      .warn(s"Could not store classifier proposals: ${ex.getMessage}") *>
+                      0.pure[F]
+                  case AddResult.Success =>
+                    1.pure[F]
+                })
+          case None =>
+            0.pure[F]
+        }
+      } yield data
     }
 }
diff --git a/modules/store/src/main/resources/db/migration/h2/V1.19.0__add_classify_meta.sql b/modules/store/src/main/resources/db/migration/h2/V1.19.0__add_classify_meta.sql
index 2513dc8d..b1c6a6e4 100644
--- a/modules/store/src/main/resources/db/migration/h2/V1.19.0__add_classify_meta.sql
+++ b/modules/store/src/main/resources/db/migration/h2/V1.19.0__add_classify_meta.sql
@@ -1,3 +1,7 @@
-ALTER TABLE "attachmentmeta"
-ADD COLUMN "classify_proposals" text;
-
+CREATE TABLE "item_proposal" (
+  "itemid" varchar(254) not null primary key,
+  "classifier_proposals" text not null,
+  "classifier_tags" text not null,
+  "created" timestamp not null,
+  foreign key ("itemid") references "item"("itemid")
+);
diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.19.0__add_classify_meta.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.19.0__add_classify_meta.sql
index fdc3c9f0..08f947b3 100644
--- a/modules/store/src/main/resources/db/migration/mariadb/V1.19.0__add_classify_meta.sql
+++ b/modules/store/src/main/resources/db/migration/mariadb/V1.19.0__add_classify_meta.sql
@@ -1,3 +1,7 @@
-ALTER TABLE `attachmentmeta`
-ADD COLUMN (`classify_proposals` mediumtext);
-
+CREATE TABLE `item_proposal` (
+  `itemid` varchar(254) not null primary key,
+  `classifier_proposals` mediumtext not null,
+  `classifier_tags` mediumtext not null,
+  `created` timestamp not null,
+  foreign key (`itemid`) references `item`(`itemid`)
+);
diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.19.0__add_classify_meta.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.19.0__add_classify_meta.sql
index 2513dc8d..b1c6a6e4 100644
--- a/modules/store/src/main/resources/db/migration/postgresql/V1.19.0__add_classify_meta.sql
+++ b/modules/store/src/main/resources/db/migration/postgresql/V1.19.0__add_classify_meta.sql
@@ -1,3 +1,7 @@
-ALTER TABLE "attachmentmeta"
-ADD COLUMN "classify_proposals" text;
-
+CREATE TABLE "item_proposal" (
+  "itemid" varchar(254) not null primary key,
+  "classifier_proposals" text not null,
+  "classifier_tags" text not null,
+  "created" timestamp not null,
+  foreign key ("itemid") references "item"("itemid")
+);
diff --git a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
index db60a19e..8952891f 100644
--- a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
@@ -86,6 +86,9 @@ trait DoobieMeta extends EmilDoobieMeta {
   implicit val metaItemProposalList: Meta[MetaProposalList] =
     jsonMeta[MetaProposalList]
 
+  implicit val metaIdRef: Meta[List[IdRef]] =
+    jsonMeta[List[IdRef]]
+
   implicit val metaLanguage: Meta[Language] =
     Meta[String].imap(Language.unsafe)(_.iso3)
 
diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
index b1fb11b8..89c11faf 100644
--- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
@@ -21,6 +21,7 @@ object QAttachment {
   private val item = RItem.as("i")
   private val am   = RAttachmentMeta.as("am")
   private val c    = RCollective.as("c")
+  private val im   = RItemProposal.as("im")
 
   def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
     val findPreview =
@@ -118,24 +119,27 @@ object QAttachment {
     } yield ns.sum
 
   def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
-    val q = Select(
-      select(am.proposals, am.classifyProposals),
+    val qa = Select(
+      select(am.proposals),
       from(am)
         .innerJoin(a, a.id === am.id)
         .innerJoin(item, a.itemId === item.id),
       a.itemId === itemId && item.cid === coll
     ).build
 
+    val qi = Select(
+      select(im.classifyProposals),
+      from(im)
+        .innerJoin(item, item.id === im.itemId),
+      item.cid === coll && im.itemId === itemId
+    ).build
+
     for {
-      ml <- q.query[(MetaProposalList, Option[MetaProposalList])].to[Vector]
-      pairs = ml.foldLeft(
-        (Vector.empty[MetaProposalList], Vector.empty[MetaProposalList])
-      ) { case ((vl, vr), (m, o)) =>
-        (vl.appended(m), o.map(vr.appended).getOrElse(vr))
-      }
+      mla <- qa.query[MetaProposalList].to[Vector]
+      mli <- qi.query[MetaProposalList].to[Vector]
     } yield MetaProposalList
-      .flatten(pairs._1)
-      .fillEmptyFrom(MetaProposalList.flatten(pairs._2))
+      .flatten(mla)
+      .fillEmptyFrom(MetaProposalList.flatten(mli))
   }
 
   def getAttachmentMeta(
diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala
index 7de59437..7a53a192 100644
--- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala
@@ -441,8 +441,9 @@ object QItem {
       tn <- store.transact(RTagItem.deleteItemTags(itemId))
       mn <- store.transact(RSentMail.deleteByItem(itemId))
       cf <- store.transact(RCustomFieldValue.deleteByItem(itemId))
+      im <- store.transact(RItemProposal.deleteByItem(itemId))
       n  <- store.transact(RItem.deleteByIdAndCollective(itemId, collective))
-    } yield tn + rn + n + mn + cf
+    } yield tn + rn + n + mn + cf + im
 
   private def findByFileIdsQuery(
       fileMetaIds: Nel[Ident],
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
index f201525c..5bc8feea 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala
@@ -16,8 +16,7 @@ case class RAttachmentMeta(
     nerlabels: List[NerLabel],
     proposals: MetaProposalList,
     pages: Option[Int],
-    language: Option[Language],
-    classifyProposals: Option[MetaProposalList]
+    language: Option[Language]
 ) {
 
   def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
@@ -30,18 +29,17 @@ case class RAttachmentMeta(
 
 object RAttachmentMeta {
   def empty(attachId: Ident, lang: Language) =
-    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang), None)
+    RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
 
   final case class Table(alias: Option[String]) extends TableDef {
     val tableName = "attachmentmeta"
 
-    val id                = Column[Ident]("attachid", this)
-    val content           = Column[String]("content", this)
-    val nerlabels         = Column[List[NerLabel]]("nerlabels", this)
-    val proposals         = Column[MetaProposalList]("itemproposals", this)
-    val pages             = Column[Int]("page_count", this)
-    val language          = Column[Language]("language", this)
-    val classifyProposals = Column[MetaProposalList]("classify_proposals", this)
+    val id        = Column[Ident]("attachid", this)
+    val content   = Column[String]("content", this)
+    val nerlabels = Column[List[NerLabel]]("nerlabels", this)
+    val proposals = Column[MetaProposalList]("itemproposals", this)
+    val pages     = Column[Int]("page_count", this)
+    val language  = Column[Language]("language", this)
     val all =
       NonEmptyList.of[Column[_]](
         id,
@@ -49,8 +47,7 @@ object RAttachmentMeta {
         nerlabels,
         proposals,
         pages,
-        language,
-        classifyProposals
+        language
       )
   }
 
@@ -62,7 +59,7 @@ object RAttachmentMeta {
     DML.insert(
       T,
       T.all,
-      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language},${v.classifyProposals}"
+      fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
     )
 
   def exists(attachId: Ident): ConnectionIO[Boolean] =
@@ -90,8 +87,7 @@ object RAttachmentMeta {
       DML.set(
         T.content.setTo(v.content),
         T.nerlabels.setTo(v.nerlabels),
-        T.proposals.setTo(v.proposals),
-        T.classifyProposals.setTo(v.classifyProposals)
+        T.proposals.setTo(v.proposals)
       )
     )
 
@@ -106,16 +102,12 @@ object RAttachmentMeta {
 
   def updateProposals(
       mid: Ident,
-      plist: MetaProposalList,
-      clist: Option[MetaProposalList]
+      plist: MetaProposalList
   ): ConnectionIO[Int] =
     DML.update(
       T,
       T.id === mid,
-      DML.set(
-        T.proposals.setTo(plist),
-        T.classifyProposals.setTo(clist)
-      )
+      DML.set(T.proposals.setTo(plist))
     )
 
   def updatePageCount(mid: Ident, pageCount: Option[Int]): ConnectionIO[Int] =
diff --git a/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala
new file mode 100644
index 00000000..822404ce
--- /dev/null
+++ b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala
@@ -0,0 +1,60 @@
+package docspell.store.records
+
+import cats.data.NonEmptyList
+//import cats.implicits._
+
+import docspell.common._
+import docspell.store.qb.DSL._
+import docspell.store.qb._
+
+import doobie._
+import doobie.implicits._
+
+case class RItemProposal(
+    itemId: Ident,
+    classifyProposals: MetaProposalList,
+    classifyTags: List[IdRef],
+    created: Timestamp
+)
+
+object RItemProposal {
+  final case class Table(alias: Option[String]) extends TableDef {
+    val tableName = "item_proposal"
+
+    val itemId            = Column[Ident]("itemid", this)
+    val classifyProposals = Column[MetaProposalList]("classifier_proposals", this)
+    val classifyTags      = Column[List[IdRef]]("classifier_tags", this)
+    val created           = Column[Timestamp]("created", this)
+    val all               = NonEmptyList.of[Column[_]](itemId, classifyProposals, classifyTags, created)
+  }
+
+  val T = Table(None)
+  def as(alias: String): Table =
+    Table(Some(alias))
+
+  def insert(v: RItemProposal): ConnectionIO[Int] =
+    DML.insert(
+      T,
+      T.all,
+      fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}"
+    )
+
+  def deleteByItem(itemId: Ident): ConnectionIO[Int] =
+    DML.delete(T, T.itemId === itemId)
+
+  def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
+    for {
+      now <- Timestamp.current[ConnectionIO]
+      value = RItemProposal(itemId, proposals, Nil, now)
+      n <- insert(value)
+    } yield n
+
+  def exists(itemId: Ident): ConnectionIO[Boolean] =
+    Select(select(countAll), from(T), T.itemId === itemId).build
+      .query[Int]
+      .unique
+      .map(_ > 0)
+
+  def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
+    DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals)))
+}