mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-06 23:25:58 +00:00
Store item based proposals in separate table
Classifier don't work on each attachment, but on all. So the results must not be stored at an attachment. This reverts some previous changes to put the classifier results for item entities into its own table.
This commit is contained in:
parent
3ff9284a64
commit
9d83cb7fe4
@ -84,7 +84,6 @@ object AttachmentPageCount {
|
|||||||
Nil,
|
Nil,
|
||||||
MetaProposalList.empty,
|
MetaProposalList.empty,
|
||||||
md.pageCount.some,
|
md.pageCount.some,
|
||||||
None,
|
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -2,9 +2,9 @@ package docspell.joex.process
|
|||||||
|
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.joex.scheduler.Task
|
import docspell.joex.scheduler.Task
|
||||||
|
import docspell.store.AddResult
|
||||||
import docspell.store.records._
|
import docspell.store.records._
|
||||||
|
|
||||||
/** Saves the proposals in the database
|
/** Saves the proposals in the database
|
||||||
@ -13,17 +13,36 @@ object SaveProposals {
|
|||||||
|
|
||||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
ctx.logger.info("Storing proposals") *>
|
for {
|
||||||
data.metas
|
_ <- ctx.logger.info("Storing proposals")
|
||||||
|
_ <- data.metas
|
||||||
.traverse(rm =>
|
.traverse(rm =>
|
||||||
ctx.logger.debug(
|
ctx.logger.debug(
|
||||||
s"Storing attachment proposals: ${rm.proposals} and ${data.classifyProposals}"
|
s"Storing attachment proposals: ${rm.proposals}"
|
||||||
) *>
|
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
|
||||||
ctx.store.transact(
|
|
||||||
RAttachmentMeta
|
|
||||||
.updateProposals(rm.id, rm.proposals, data.classifyProposals)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.map(_ => data)
|
_ <- data.classifyProposals match {
|
||||||
|
case Some(clp) =>
|
||||||
|
val itemId = data.item.id
|
||||||
|
ctx.logger.debug(s"Storing classifier proposals: $clp") *>
|
||||||
|
ctx.store
|
||||||
|
.add(
|
||||||
|
RItemProposal.createNew(itemId, clp),
|
||||||
|
RItemProposal.exists(itemId)
|
||||||
|
)
|
||||||
|
.flatMap({
|
||||||
|
case AddResult.EntityExists(_) =>
|
||||||
|
ctx.store.transact(RItemProposal.updateProposals(itemId, clp))
|
||||||
|
case AddResult.Failure(ex) =>
|
||||||
|
ctx.logger
|
||||||
|
.warn(s"Could not store classifier proposals: ${ex.getMessage}") *>
|
||||||
|
0.pure[F]
|
||||||
|
case AddResult.Success =>
|
||||||
|
1.pure[F]
|
||||||
|
})
|
||||||
|
case None =>
|
||||||
|
0.pure[F]
|
||||||
|
}
|
||||||
|
} yield data
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
ALTER TABLE "attachmentmeta"
|
CREATE TABLE "item_proposal" (
|
||||||
ADD COLUMN "classify_proposals" text;
|
"itemid" varchar(254) not null primary key,
|
||||||
|
"classifier_proposals" text not null,
|
||||||
|
"classifier_tags" text not null,
|
||||||
|
"created" timestamp not null,
|
||||||
|
foreign key ("itemid") references "item"("itemid")
|
||||||
|
);
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
ALTER TABLE `attachmentmeta`
|
CREATE TABLE `item_proposal` (
|
||||||
ADD COLUMN (`classify_proposals` mediumtext);
|
`itemid` varchar(254) not null primary key,
|
||||||
|
`classifier_proposals` mediumtext not null,
|
||||||
|
`classifier_tags` mediumtext not null,
|
||||||
|
`created` timestamp not null,
|
||||||
|
foreign key (`itemid`) references `item`(`itemid`)
|
||||||
|
);
|
||||||
|
@ -1,3 +1,7 @@
|
|||||||
ALTER TABLE "attachmentmeta"
|
CREATE TABLE "item_proposal" (
|
||||||
ADD COLUMN "classify_proposals" text;
|
"itemid" varchar(254) not null primary key,
|
||||||
|
"classifier_proposals" text not null,
|
||||||
|
"classifier_tags" text not null,
|
||||||
|
"created" timestamp not null,
|
||||||
|
foreign key ("itemid") references "item"("itemid")
|
||||||
|
);
|
||||||
|
@ -86,6 +86,9 @@ trait DoobieMeta extends EmilDoobieMeta {
|
|||||||
implicit val metaItemProposalList: Meta[MetaProposalList] =
|
implicit val metaItemProposalList: Meta[MetaProposalList] =
|
||||||
jsonMeta[MetaProposalList]
|
jsonMeta[MetaProposalList]
|
||||||
|
|
||||||
|
implicit val metaIdRef: Meta[List[IdRef]] =
|
||||||
|
jsonMeta[List[IdRef]]
|
||||||
|
|
||||||
implicit val metaLanguage: Meta[Language] =
|
implicit val metaLanguage: Meta[Language] =
|
||||||
Meta[String].imap(Language.unsafe)(_.iso3)
|
Meta[String].imap(Language.unsafe)(_.iso3)
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ object QAttachment {
|
|||||||
private val item = RItem.as("i")
|
private val item = RItem.as("i")
|
||||||
private val am = RAttachmentMeta.as("am")
|
private val am = RAttachmentMeta.as("am")
|
||||||
private val c = RCollective.as("c")
|
private val c = RCollective.as("c")
|
||||||
|
private val im = RItemProposal.as("im")
|
||||||
|
|
||||||
def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
|
def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
|
||||||
val findPreview =
|
val findPreview =
|
||||||
@ -118,24 +119,27 @@ object QAttachment {
|
|||||||
} yield ns.sum
|
} yield ns.sum
|
||||||
|
|
||||||
def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
|
def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
|
||||||
val q = Select(
|
val qa = Select(
|
||||||
select(am.proposals, am.classifyProposals),
|
select(am.proposals),
|
||||||
from(am)
|
from(am)
|
||||||
.innerJoin(a, a.id === am.id)
|
.innerJoin(a, a.id === am.id)
|
||||||
.innerJoin(item, a.itemId === item.id),
|
.innerJoin(item, a.itemId === item.id),
|
||||||
a.itemId === itemId && item.cid === coll
|
a.itemId === itemId && item.cid === coll
|
||||||
).build
|
).build
|
||||||
|
|
||||||
|
val qi = Select(
|
||||||
|
select(im.classifyProposals),
|
||||||
|
from(im)
|
||||||
|
.innerJoin(item, item.id === im.itemId),
|
||||||
|
item.cid === coll && im.itemId === itemId
|
||||||
|
).build
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ml <- q.query[(MetaProposalList, Option[MetaProposalList])].to[Vector]
|
mla <- qa.query[MetaProposalList].to[Vector]
|
||||||
pairs = ml.foldLeft(
|
mli <- qi.query[MetaProposalList].to[Vector]
|
||||||
(Vector.empty[MetaProposalList], Vector.empty[MetaProposalList])
|
|
||||||
) { case ((vl, vr), (m, o)) =>
|
|
||||||
(vl.appended(m), o.map(vr.appended).getOrElse(vr))
|
|
||||||
}
|
|
||||||
} yield MetaProposalList
|
} yield MetaProposalList
|
||||||
.flatten(pairs._1)
|
.flatten(mla)
|
||||||
.fillEmptyFrom(MetaProposalList.flatten(pairs._2))
|
.fillEmptyFrom(MetaProposalList.flatten(mli))
|
||||||
}
|
}
|
||||||
|
|
||||||
def getAttachmentMeta(
|
def getAttachmentMeta(
|
||||||
|
@ -441,8 +441,9 @@ object QItem {
|
|||||||
tn <- store.transact(RTagItem.deleteItemTags(itemId))
|
tn <- store.transact(RTagItem.deleteItemTags(itemId))
|
||||||
mn <- store.transact(RSentMail.deleteByItem(itemId))
|
mn <- store.transact(RSentMail.deleteByItem(itemId))
|
||||||
cf <- store.transact(RCustomFieldValue.deleteByItem(itemId))
|
cf <- store.transact(RCustomFieldValue.deleteByItem(itemId))
|
||||||
|
im <- store.transact(RItemProposal.deleteByItem(itemId))
|
||||||
n <- store.transact(RItem.deleteByIdAndCollective(itemId, collective))
|
n <- store.transact(RItem.deleteByIdAndCollective(itemId, collective))
|
||||||
} yield tn + rn + n + mn + cf
|
} yield tn + rn + n + mn + cf + im
|
||||||
|
|
||||||
private def findByFileIdsQuery(
|
private def findByFileIdsQuery(
|
||||||
fileMetaIds: Nel[Ident],
|
fileMetaIds: Nel[Ident],
|
||||||
|
@ -16,8 +16,7 @@ case class RAttachmentMeta(
|
|||||||
nerlabels: List[NerLabel],
|
nerlabels: List[NerLabel],
|
||||||
proposals: MetaProposalList,
|
proposals: MetaProposalList,
|
||||||
pages: Option[Int],
|
pages: Option[Int],
|
||||||
language: Option[Language],
|
language: Option[Language]
|
||||||
classifyProposals: Option[MetaProposalList]
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
|
def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
|
||||||
@ -30,18 +29,17 @@ case class RAttachmentMeta(
|
|||||||
|
|
||||||
object RAttachmentMeta {
|
object RAttachmentMeta {
|
||||||
def empty(attachId: Ident, lang: Language) =
|
def empty(attachId: Ident, lang: Language) =
|
||||||
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang), None)
|
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
|
||||||
|
|
||||||
final case class Table(alias: Option[String]) extends TableDef {
|
final case class Table(alias: Option[String]) extends TableDef {
|
||||||
val tableName = "attachmentmeta"
|
val tableName = "attachmentmeta"
|
||||||
|
|
||||||
val id = Column[Ident]("attachid", this)
|
val id = Column[Ident]("attachid", this)
|
||||||
val content = Column[String]("content", this)
|
val content = Column[String]("content", this)
|
||||||
val nerlabels = Column[List[NerLabel]]("nerlabels", this)
|
val nerlabels = Column[List[NerLabel]]("nerlabels", this)
|
||||||
val proposals = Column[MetaProposalList]("itemproposals", this)
|
val proposals = Column[MetaProposalList]("itemproposals", this)
|
||||||
val pages = Column[Int]("page_count", this)
|
val pages = Column[Int]("page_count", this)
|
||||||
val language = Column[Language]("language", this)
|
val language = Column[Language]("language", this)
|
||||||
val classifyProposals = Column[MetaProposalList]("classify_proposals", this)
|
|
||||||
val all =
|
val all =
|
||||||
NonEmptyList.of[Column[_]](
|
NonEmptyList.of[Column[_]](
|
||||||
id,
|
id,
|
||||||
@ -49,8 +47,7 @@ object RAttachmentMeta {
|
|||||||
nerlabels,
|
nerlabels,
|
||||||
proposals,
|
proposals,
|
||||||
pages,
|
pages,
|
||||||
language,
|
language
|
||||||
classifyProposals
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,7 +59,7 @@ object RAttachmentMeta {
|
|||||||
DML.insert(
|
DML.insert(
|
||||||
T,
|
T,
|
||||||
T.all,
|
T.all,
|
||||||
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language},${v.classifyProposals}"
|
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
||||||
@ -90,8 +87,7 @@ object RAttachmentMeta {
|
|||||||
DML.set(
|
DML.set(
|
||||||
T.content.setTo(v.content),
|
T.content.setTo(v.content),
|
||||||
T.nerlabels.setTo(v.nerlabels),
|
T.nerlabels.setTo(v.nerlabels),
|
||||||
T.proposals.setTo(v.proposals),
|
T.proposals.setTo(v.proposals)
|
||||||
T.classifyProposals.setTo(v.classifyProposals)
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -106,16 +102,12 @@ object RAttachmentMeta {
|
|||||||
|
|
||||||
def updateProposals(
|
def updateProposals(
|
||||||
mid: Ident,
|
mid: Ident,
|
||||||
plist: MetaProposalList,
|
plist: MetaProposalList
|
||||||
clist: Option[MetaProposalList]
|
|
||||||
): ConnectionIO[Int] =
|
): ConnectionIO[Int] =
|
||||||
DML.update(
|
DML.update(
|
||||||
T,
|
T,
|
||||||
T.id === mid,
|
T.id === mid,
|
||||||
DML.set(
|
DML.set(T.proposals.setTo(plist))
|
||||||
T.proposals.setTo(plist),
|
|
||||||
T.classifyProposals.setTo(clist)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def updatePageCount(mid: Ident, pageCount: Option[Int]): ConnectionIO[Int] =
|
def updatePageCount(mid: Ident, pageCount: Option[Int]): ConnectionIO[Int] =
|
||||||
|
@ -0,0 +1,60 @@
|
|||||||
|
package docspell.store.records
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
//import cats.implicits._
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.store.qb.DSL._
|
||||||
|
import docspell.store.qb._
|
||||||
|
|
||||||
|
import doobie._
|
||||||
|
import doobie.implicits._
|
||||||
|
|
||||||
|
case class RItemProposal(
|
||||||
|
itemId: Ident,
|
||||||
|
classifyProposals: MetaProposalList,
|
||||||
|
classifyTags: List[IdRef],
|
||||||
|
created: Timestamp
|
||||||
|
)
|
||||||
|
|
||||||
|
object RItemProposal {
|
||||||
|
final case class Table(alias: Option[String]) extends TableDef {
|
||||||
|
val tableName = "item_proposal"
|
||||||
|
|
||||||
|
val itemId = Column[Ident]("itemid", this)
|
||||||
|
val classifyProposals = Column[MetaProposalList]("classifier_proposals", this)
|
||||||
|
val classifyTags = Column[List[IdRef]]("classifier_tags", this)
|
||||||
|
val created = Column[Timestamp]("created", this)
|
||||||
|
val all = NonEmptyList.of[Column[_]](itemId, classifyProposals, classifyTags, created)
|
||||||
|
}
|
||||||
|
|
||||||
|
val T = Table(None)
|
||||||
|
def as(alias: String): Table =
|
||||||
|
Table(Some(alias))
|
||||||
|
|
||||||
|
def insert(v: RItemProposal): ConnectionIO[Int] =
|
||||||
|
DML.insert(
|
||||||
|
T,
|
||||||
|
T.all,
|
||||||
|
fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def deleteByItem(itemId: Ident): ConnectionIO[Int] =
|
||||||
|
DML.delete(T, T.itemId === itemId)
|
||||||
|
|
||||||
|
def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
|
||||||
|
for {
|
||||||
|
now <- Timestamp.current[ConnectionIO]
|
||||||
|
value = RItemProposal(itemId, proposals, Nil, now)
|
||||||
|
n <- insert(value)
|
||||||
|
} yield n
|
||||||
|
|
||||||
|
def exists(itemId: Ident): ConnectionIO[Boolean] =
|
||||||
|
Select(select(countAll), from(T), T.itemId === itemId).build
|
||||||
|
.query[Int]
|
||||||
|
.unique
|
||||||
|
.map(_ > 0)
|
||||||
|
|
||||||
|
def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
|
||||||
|
DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals)))
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user