Extend guessing tags to all tag categories

2025-08-05 02:24:52 +00:00 · 2021-01-18 13:35:53 +01:00
parent c5778880d9
commit 249f9e6e2a
18 changed files with 384 additions and 168 deletions
--- a/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql
+++ b/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql
@ -0,0 +1,21 @@
+CREATE TABLE "classifier_model"(
+  "id" varchar(254) not null primary key,
+  "cid" varchar(254) not null,
+  "name" varchar(254) not null,
+  "file_id" varchar(254) not null,
+  "created" timestamp not null,
+  foreign key ("cid") references "collective"("cid"),
+  foreign key ("file_id") references "filemeta"("id"),
+  unique ("cid", "name")
+);
+
+insert into "classifier_model"
+select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name", "file_id", "created"
+from "classifier_setting"
+where "file_id" is not null;
+
+alter table "classifier_setting"
+drop column "category";
+
+alter table "classifier_setting"
+drop column "file_id";
--- a/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql
+++ b/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql
@ -0,0 +1,26 @@
+CREATE TABLE `classifier_model`(
+  `id` varchar(254) not null primary key,
+  `cid` varchar(254) not null,
+  `name` varchar(254) not null,
+  `file_id` varchar(254) not null,
+  `created` timestamp not null,
+  foreign key (`cid`) references `collective`(`cid`),
+  foreign key (`file_id`) references `filemeta`(`id`),
+  unique (`cid`, `name`)
+);
+
+insert into `classifier_model`
+select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `file_id`, `created`
+from `classifier_setting`
+where `file_id` is not null;
+
+alter table `classifier_setting`
+drop column `category`;
+
+-- mariadb needs special treatment when dropping a column that is part
+-- of an index and foreign key
+alter table `classifier_setting`
+drop constraint `classifier_setting_ibfk_2`;
+
+alter table `classifier_setting`
+drop column `file_id`;
--- a/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql
+++ b/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql
@ -0,0 +1,21 @@
+CREATE TABLE "classifier_model"(
+  "id" varchar(254) not null primary key,
+  "cid" varchar(254) not null,
+  "name" varchar(254) not null,
+  "file_id" varchar(254) not null,
+  "created" timestamp not null,
+  foreign key ("cid") references "collective"("cid"),
+  foreign key ("file_id") references "filemeta"("id"),
+  unique ("cid", "name")
+);
+
+insert into "classifier_model"
+select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name", "file_id", "created"
+from "classifier_setting"
+where "file_id" is not null;
+
+alter table "classifier_setting"
+drop column "category";
+
+alter table "classifier_setting"
+drop column "file_id";
--- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala
@ -543,11 +543,14 @@ object QItem {

  def findAllNewesFirst(
      collective: Ident,
-      chunkSize: Int
+      chunkSize: Int,
+      limit: Batch
  ): Stream[ConnectionIO, Ident] = {
+
    val i = RItem.as("i")
    Select(i.id.s, from(i), i.cid === collective && i.state === ItemState.confirmed)
      .orderBy(i.created.desc)
+      .limit(limit)
      .build
      .query[Ident]
      .streamWithChunkSize(chunkSize)
--- a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala
@ -0,0 +1,78 @@
+package docspell.store.records
+
+import cats.effect._
+import cats.data.NonEmptyList
+import cats.implicits._
+
+import docspell.common._
+import docspell.store.qb.DSL._
+import docspell.store.qb._
+
+import doobie._
+import doobie.implicits._
+
+final case class RClassifierModel(
+    id: Ident,
+    cid: Ident,
+    name: String,
+    fileId: Ident,
+    created: Timestamp
+) {}
+
+object RClassifierModel {
+
+  def createNew[F[_]: Sync](
+      cid: Ident,
+      name: String,
+      fileId: Ident
+  ): F[RClassifierModel] =
+    for {
+      id  <- Ident.randomId[F]
+      now <- Timestamp.current[F]
+    } yield RClassifierModel(id, cid, name, fileId, now)
+
+  final case class Table(alias: Option[String]) extends TableDef {
+    val tableName = "classifier_model"
+
+    val id      = Column[Ident]("id", this)
+    val cid     = Column[Ident]("cid", this)
+    val name    = Column[String]("name", this)
+    val fileId  = Column[Ident]("file_id", this)
+    val created = Column[Timestamp]("created", this)
+
+    val all = NonEmptyList.of[Column[_]](id, cid, name, fileId, created)
+  }
+
+  def as(alias: String): Table =
+    Table(Some(alias))
+
+  val T = Table(None)
+
+  def insert(v: RClassifierModel): ConnectionIO[Int] =
+    DML.insert(
+      T,
+      T.all,
+      fr"${v.id},${v.cid},${v.name},${v.fileId},${v.created}"
+    )
+
+  def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
+    for {
+      n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid)))
+      k <-
+        if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert)
+        else 0.pure[ConnectionIO]
+    } yield n + k
+
+  def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
+    Select(select(T.all), from(T), T.cid === cid && T.name === name).build
+      .query[RClassifierModel]
+      .option
+
+  def findAllByName(
+      cid: Ident,
+      names: NonEmptyList[String]
+  ): ConnectionIO[List[RClassifierModel]] =
+    Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
+      .query[RClassifierModel]
+      .to[List]
+}
--- a/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala
@ -15,9 +15,7 @@ case class RClassifierSetting(
    cid: Ident,
    enabled: Boolean,
    schedule: CalEvent,
-    category: String,
    itemCount: Int,
-    fileId: Option[Ident],
    created: Timestamp
 ) {}

@ -28,12 +26,10 @@ object RClassifierSetting {
    val cid       = Column[Ident]("cid", this)
    val enabled   = Column[Boolean]("enabled", this)
    val schedule  = Column[CalEvent]("schedule", this)
-    val category  = Column[String]("category", this)
    val itemCount = Column[Int]("item_count", this)
-    val fileId    = Column[Ident]("file_id", this)
    val created   = Column[Timestamp]("created", this)
    val all = NonEmptyList
-      .of[Column[_]](cid, enabled, schedule, category, itemCount, fileId, created)
+      .of[Column[_]](cid, enabled, schedule, itemCount, created)
  }

  val T = Table(None)
@ -44,7 +40,7 @@ object RClassifierSetting {
    DML.insert(
      T,
      T.all,
-      fr"${v.cid},${v.enabled},${v.schedule},${v.category},${v.itemCount},${v.fileId},${v.created}"
+      fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}"
    )

  def updateAll(v: RClassifierSetting): ConnectionIO[Int] =
@ -54,15 +50,10 @@ object RClassifierSetting {
      DML.set(
        T.enabled.setTo(v.enabled),
        T.schedule.setTo(v.schedule),
-        T.category.setTo(v.category),
-        T.itemCount.setTo(v.itemCount),
-        T.fileId.setTo(v.fileId)
+        T.itemCount.setTo(v.itemCount)
      )
    )

-  def updateFile(coll: Ident, fid: Ident): ConnectionIO[Int] =
-    DML.update(T, T.cid === coll, DML.set(T.fileId.setTo(fid)))
-
  def updateSettings(v: RClassifierSetting): ConnectionIO[Int] =
    for {
      n1 <- DML.update(
@ -71,8 +62,7 @@ object RClassifierSetting {
        DML.set(
          T.enabled.setTo(v.enabled),
          T.schedule.setTo(v.schedule),
-          T.itemCount.setTo(v.itemCount),
-          T.category.setTo(v.category)
+          T.itemCount.setTo(v.itemCount)
        )
      )
      n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO]
@ -89,8 +79,7 @@ object RClassifierSetting {
  case class Classifier(
      enabled: Boolean,
      schedule: CalEvent,
-      itemCount: Int,
-      category: Option[String]
+      itemCount: Int
  ) {

    def toRecord(coll: Ident, created: Timestamp): RClassifierSetting =
@ -98,15 +87,13 @@ object RClassifierSetting {
        coll,
        enabled,
        schedule,
-        category.getOrElse(""),
        itemCount,
-        None,
        created
      )
  }
  object Classifier {
    def fromRecord(r: RClassifierSetting): Classifier =
-      Classifier(r.enabled, r.schedule, r.itemCount, r.category.some)
+      Classifier(r.enabled, r.schedule, r.itemCount)
  }

 }
--- a/modules/store/src/main/scala/docspell/store/records/RCollective.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RCollective.scala
@ -89,8 +89,7 @@ object RCollective {
        c.integration.s,
        cs.enabled.s,
        cs.schedule.s,
-        cs.itemCount.s,
-        cs.category.s
+        cs.itemCount.s
      ),
      from(c).leftJoin(cs, cs.cid === c.id),
      c.id === coll
--- a/modules/store/src/main/scala/docspell/store/records/RTag.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RTag.scala
@ -148,6 +148,13 @@ object RTag {
    ).orderBy(T.name.asc).build.query[RTag].to[List]
  }

+  def listCategories(coll: Ident, fallback: String): ConnectionIO[List[String]] =
+    Select(
+      coalesce(T.category.s, lit(fallback)).s,
+      from(T),
+      T.cid === coll
+    ).distinct.build.query[String].to[List]
+
  def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
    DML.delete(T, T.tid === tagId && T.cid === coll)
 }