Store tags guessed with classifier in database

This commit is contained in:
Eike Kettner 2021-01-20 00:30:40 +01:00
parent 9d83cb7fe4
commit 27c24c128d
9 changed files with 56 additions and 47 deletions

View File

@ -108,7 +108,8 @@ object CreateItem {
fm.map(a => a.id -> a.fileId).toMap,
MetaProposalList.empty,
Nil,
None
MetaProposalList.empty,
Nil
)
}
@ -168,7 +169,8 @@ object CreateItem {
origMap,
MetaProposalList.empty,
Nil,
None
MetaProposalList.empty,
Nil
)
)
}

View File

@ -5,6 +5,7 @@ import java.time.ZoneId
import cats.effect.Sync
import cats.implicits._
import cats.{Applicative, FlatMap}
import docspell.analysis.contact._
import docspell.common.MetaProposal.Candidate
import docspell.common._
@ -30,11 +31,8 @@ object FindProposal {
processAttachment(cfg, rm, data.findDates(rm), ctx)
.map(ml => rm.copy(proposals = ml))
)
clp <- data.classifyProposals match {
case Some(cmp) => lookupClassifierProposals(ctx, cmp)
case None => MetaProposalList.empty.pure[F]
}
} yield data.copy(metas = rmv, classifyProposals = clp.some)
clp <- lookupClassifierProposals(ctx, data.classifyProposals)
} yield data.copy(metas = rmv, classifyProposals = clp)
}
def lookupClassifierProposals[F[_]: Sync](

View File

@ -29,7 +29,8 @@ case class ItemData(
// a list of tags (names or ids) attached to the item if they exist
tags: List[String],
// proposals obtained from the classifier
classifyProposals: Option[MetaProposalList]
classifyProposals: MetaProposalList,
classifyTags: List[String]
) {
def findMeta(attachId: Ident): Option[RAttachmentMeta] =

View File

@ -24,7 +24,7 @@ object LinkProposal {
.flatten(data.metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
.fillEmptyFrom(data.classifyProposals.getOrElse(MetaProposalList.empty))
.fillEmptyFrom(data.classifyProposals)
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all

View File

@ -66,7 +66,8 @@ object ReProcessItem {
asrcMap.view.mapValues(_.fileId).toMap,
MetaProposalList.empty,
Nil,
None
MetaProposalList.empty,
Nil
)).getOrElseF(
Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}"))
)

View File

@ -2,16 +2,18 @@ package docspell.joex.process
import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
import docspell.joex.scheduler.{Context, Task}
import docspell.store.AddResult
import docspell.store.records._
/** Saves the proposals in the database
*/
object SaveProposals {
type Args = ProcessItemArgs
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] =
Task { ctx =>
for {
_ <- ctx.logger.info("Storing proposals")
@ -21,28 +23,32 @@ object SaveProposals {
s"Storing attachment proposals: ${rm.proposals}"
) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals))
)
_ <- data.classifyProposals match {
case Some(clp) =>
val itemId = data.item.id
ctx.logger.debug(s"Storing classifier proposals: $clp") *>
ctx.store
.add(
RItemProposal.createNew(itemId, clp),
RItemProposal.exists(itemId)
)
.flatMap({
case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.updateProposals(itemId, clp))
case AddResult.Failure(ex) =>
ctx.logger
.warn(s"Could not store classifier proposals: ${ex.getMessage}") *>
0.pure[F]
case AddResult.Success =>
1.pure[F]
})
case None =>
0.pure[F]
}
_ <-
if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F]
else saveItemProposal(ctx, data)
} yield data
}
def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = {
def upsert(v: RItemProposal): F[Int] =
ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap {
case AddResult.Success => 1.pure[F]
case AddResult.EntityExists(_) =>
ctx.store.transact(RItemProposal.update(v))
case AddResult.Failure(ex) =>
ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0
.pure[F]
}
for {
_ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}")
tags <- ctx.store.transact(
RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective)
)
tagRefs = tags.map(t => IdRef(t.tagId, t.name))
now <- Timestamp.current[F]
value = RItemProposal(data.item.id, data.classifyProposals, tagRefs.toList, now)
_ <- upsert(value)
} yield ()
}
}

View File

@ -45,7 +45,8 @@ object SetGivenData {
Task { ctx =>
val itemId = data.item.id
val collective = ctx.args.meta.collective
val tags = (ctx.args.meta.tags.getOrElse(Nil) ++ data.tags).distinct
val tags =
(ctx.args.meta.tags.getOrElse(Nil) ++ data.tags ++ data.classifyTags).distinct
for {
_ <- ctx.logger.info(s"Set tags from given data: ${tags}")
e <- ops.linkTags(itemId, tags, collective).attempt

View File

@ -54,9 +54,9 @@ object TextAnalysis {
.copy(
metas = v.map(_._1),
dateLabels = v.map(_._2),
classifyProposals = classProposals.some
classifyProposals = classProposals,
classifyTags = tag
)
.appendTags(tag)
}
def annotateAttachment[F[_]: Sync](

View File

@ -1,7 +1,6 @@
package docspell.store.records
import cats.data.NonEmptyList
//import cats.implicits._
import docspell.common._
import docspell.store.qb.DSL._
@ -39,22 +38,23 @@ object RItemProposal {
fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}"
)
def update(v: RItemProposal): ConnectionIO[Int] =
DML.update(
T,
T.itemId === v.itemId,
DML.set(
T.classifyProposals.setTo(v.classifyProposals),
T.classifyTags.setTo(v.classifyTags)
)
)
def deleteByItem(itemId: Ident): ConnectionIO[Int] =
DML.delete(T, T.itemId === itemId)
def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
for {
now <- Timestamp.current[ConnectionIO]
value = RItemProposal(itemId, proposals, Nil, now)
n <- insert(value)
} yield n
def exists(itemId: Ident): ConnectionIO[Boolean] =
Select(select(countAll), from(T), T.itemId === itemId).build
.query[Int]
.unique
.map(_ > 0)
def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] =
DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals)))
}