Store item based proposals in separate table

Classifier don't work on each attachment, but on all. So the results
must not be stored at an attachment. This reverts some previous
changes to put the classifier results for item entities into its own
table.
This commit is contained in:
Eike Kettner 2021-01-19 23:48:09 +01:00
parent 3ff9284a64
commit 9d83cb7fe4
10 changed files with 142 additions and 52 deletions

View File

@ -84,7 +84,6 @@ object AttachmentPageCount {
Nil,
MetaProposalList.empty,
md.pageCount.some,
None,
None
)
)

View File

@ -2,9 +2,9 @@ package docspell.joex.process
import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.store.AddResult
import docspell.store.records._
/** Saves the proposals in the database
@ -13,17 +13,36 @@ object SaveProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
ctx.logger.info("Storing proposals") *>
data.metas
for {
_ <- ctx.logger.info("Storing proposals")
_ <- data.metas
.traverse(rm =>
ctx.logger.debug(
s"Storing attachment proposals: ${rm.proposals} and ${data.classifyProposals}"
) *>
ctx.store.transact(
RAttachmentMeta
.updateProposals(rm.id, rm.proposals, data.classifyProposals)
)
s"Storing attachment proposals: ${rm.proposals}"
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
)
.map(_ => data)
_ <- data.classifyProposals match {
case Some(clp) =>
val itemId = data.item.id
ctx.logger.debug(s"Storing classifier proposals: $clp") *>
ctx.store
.add(
RItemProposal.createNew(itemId, clp),
RItemProposal.exists(itemId)
)
.flatMap({
case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.updateProposals(itemId, clp))
case AddResult.Failure(ex) =>
ctx.logger
.warn(s"Could not store classifier proposals: ${ex.getMessage}") *>
0.pure[F]
case AddResult.Success =>
1.pure[F]
})
case None =>
0.pure[F]
}
} yield data
}
}

View File

@ -1,3 +1,7 @@
ALTER TABLE "attachmentmeta"
ADD COLUMN "classify_proposals" text;
CREATE TABLE "item_proposal" (
"itemid" varchar(254) not null primary key,
"classifier_proposals" text not null,
"classifier_tags" text not null,
"created" timestamp not null,
foreign key ("itemid") references "item"("itemid")
);

View File

@ -1,3 +1,7 @@
ALTER TABLE `attachmentmeta`
ADD COLUMN (`classify_proposals` mediumtext);
CREATE TABLE `item_proposal` (
`itemid` varchar(254) not null primary key,
`classifier_proposals` mediumtext not null,
`classifier_tags` mediumtext not null,
`created` timestamp not null,
foreign key (`itemid`) references `item`(`itemid`)
);

View File

@ -1,3 +1,7 @@
ALTER TABLE "attachmentmeta"
ADD COLUMN "classify_proposals" text;
CREATE TABLE "item_proposal" (
"itemid" varchar(254) not null primary key,
"classifier_proposals" text not null,
"classifier_tags" text not null,
"created" timestamp not null,
foreign key ("itemid") references "item"("itemid")
);

View File

@ -86,6 +86,9 @@ trait DoobieMeta extends EmilDoobieMeta {
implicit val metaItemProposalList: Meta[MetaProposalList] =
jsonMeta[MetaProposalList]
implicit val metaIdRef: Meta[List[IdRef]] =
jsonMeta[List[IdRef]]
implicit val metaLanguage: Meta[Language] =
Meta[String].imap(Language.unsafe)(_.iso3)

View File

@ -21,6 +21,7 @@ object QAttachment {
private val item = RItem.as("i")
private val am = RAttachmentMeta.as("am")
private val c = RCollective.as("c")
private val im = RItemProposal.as("im")
def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
val findPreview =
@ -118,24 +119,27 @@ object QAttachment {
} yield ns.sum
def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
val q = Select(
select(am.proposals, am.classifyProposals),
val qa = Select(
select(am.proposals),
from(am)
.innerJoin(a, a.id === am.id)
.innerJoin(item, a.itemId === item.id),
a.itemId === itemId && item.cid === coll
).build
val qi = Select(
select(im.classifyProposals),
from(im)
.innerJoin(item, item.id === im.itemId),
item.cid === coll && im.itemId === itemId
).build
for {
ml <- q.query[(MetaProposalList, Option[MetaProposalList])].to[Vector]
pairs = ml.foldLeft(
(Vector.empty[MetaProposalList], Vector.empty[MetaProposalList])
) { case ((vl, vr), (m, o)) =>
(vl.appended(m), o.map(vr.appended).getOrElse(vr))
}
mla <- qa.query[MetaProposalList].to[Vector]
mli <- qi.query[MetaProposalList].to[Vector]
} yield MetaProposalList
.flatten(pairs._1)
.fillEmptyFrom(MetaProposalList.flatten(pairs._2))
.flatten(mla)
.fillEmptyFrom(MetaProposalList.flatten(mli))
}
def getAttachmentMeta(

View File

@ -441,8 +441,9 @@ object QItem {
tn <- store.transact(RTagItem.deleteItemTags(itemId))
mn <- store.transact(RSentMail.deleteByItem(itemId))
cf <- store.transact(RCustomFieldValue.deleteByItem(itemId))
im <- store.transact(RItemProposal.deleteByItem(itemId))
n <- store.transact(RItem.deleteByIdAndCollective(itemId, collective))
} yield tn + rn + n + mn + cf
} yield tn + rn + n + mn + cf + im
private def findByFileIdsQuery(
fileMetaIds: Nel[Ident],

View File

@ -16,8 +16,7 @@ case class RAttachmentMeta(
nerlabels: List[NerLabel],
proposals: MetaProposalList,
pages: Option[Int],
language: Option[Language],
classifyProposals: Option[MetaProposalList]
language: Option[Language]
) {
def setContentIfEmpty(txt: Option[String]): RAttachmentMeta =
@ -30,18 +29,17 @@ case class RAttachmentMeta(
object RAttachmentMeta {
def empty(attachId: Ident, lang: Language) =
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang), None)
RAttachmentMeta(attachId, None, Nil, MetaProposalList.empty, None, Some(lang))
final case class Table(alias: Option[String]) extends TableDef {
val tableName = "attachmentmeta"
val id = Column[Ident]("attachid", this)
val content = Column[String]("content", this)
val nerlabels = Column[List[NerLabel]]("nerlabels", this)
val proposals = Column[MetaProposalList]("itemproposals", this)
val pages = Column[Int]("page_count", this)
val language = Column[Language]("language", this)
val classifyProposals = Column[MetaProposalList]("classify_proposals", this)
val id = Column[Ident]("attachid", this)
val content = Column[String]("content", this)
val nerlabels = Column[List[NerLabel]]("nerlabels", this)
val proposals = Column[MetaProposalList]("itemproposals", this)
val pages = Column[Int]("page_count", this)
val language = Column[Language]("language", this)
val all =
NonEmptyList.of[Column[_]](
id,
@ -49,8 +47,7 @@ object RAttachmentMeta {
nerlabels,
proposals,
pages,
language,
classifyProposals
language
)
}
@ -62,7 +59,7 @@ object RAttachmentMeta {
DML.insert(
T,
T.all,
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language},${v.classifyProposals}"
fr"${v.id},${v.content},${v.nerlabels},${v.proposals},${v.pages},${v.language}"
)
def exists(attachId: Ident): ConnectionIO[Boolean] =
@ -90,8 +87,7 @@ object RAttachmentMeta {
DML.set(
T.content.setTo(v.content),
T.nerlabels.setTo(v.nerlabels),
T.proposals.setTo(v.proposals),
T.classifyProposals.setTo(v.classifyProposals)
T.proposals.setTo(v.proposals)
)
)
@ -106,16 +102,12 @@ object RAttachmentMeta {
def updateProposals(
mid: Ident,
plist: MetaProposalList,
clist: Option[MetaProposalList]
plist: MetaProposalList
): ConnectionIO[Int] =
DML.update(
T,
T.id === mid,
DML.set(
T.proposals.setTo(plist),
T.classifyProposals.setTo(clist)
)
DML.set(T.proposals.setTo(plist))
)
def updatePageCount(mid: Ident, pageCount: Option[Int]): ConnectionIO[Int] =

View File

@ -0,0 +1,60 @@
package docspell.store.records
import cats.data.NonEmptyList
//import cats.implicits._
import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import doobie._
import doobie.implicits._
case class RItemProposal(
itemId: Ident,
classifyProposals: MetaProposalList,
classifyTags: List[IdRef],
created: Timestamp
)
object RItemProposal {
final case class Table(alias: Option[String]) extends TableDef {
val tableName = "item_proposal"
val itemId = Column[Ident]("itemid", this)
val classifyProposals = Column[MetaProposalList]("classifier_proposals", this)
val classifyTags = Column[List[IdRef]]("classifier_tags", this)
val created = Column[Timestamp]("created", this)
val all = NonEmptyList.of[Column[_]](itemId, classifyProposals, classifyTags, created)
}
val T = Table(None)
def as(alias: String): Table =
Table(Some(alias))
def insert(v: RItemProposal): ConnectionIO[Int] =
DML.insert(
T,
T.all,
fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}"
)
def deleteByItem(itemId: Ident): ConnectionIO[Int] =
DML.delete(T, T.itemId === itemId)
def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
for {
now <- Timestamp.current[ConnectionIO]
value = RItemProposal(itemId, proposals, Nil, now)
n <- insert(value)
} yield n
def exists(itemId: Ident): ConnectionIO[Boolean] =
Select(select(countAll), from(T), T.itemId === itemId).build
.query[Int]
.unique
.map(_ > 0)
def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals)))
}