diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 8bc9ccc1..c24ad98c 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -108,7 +108,8 @@ object CreateItem { fm.map(a => a.id -> a.fileId).toMap, MetaProposalList.empty, Nil, - None + MetaProposalList.empty, + Nil ) } @@ -168,7 +169,8 @@ object CreateItem { origMap, MetaProposalList.empty, Nil, - None + MetaProposalList.empty, + Nil ) ) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala index 4f984b10..fa484772 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/FindProposal.scala @@ -5,6 +5,7 @@ import java.time.ZoneId import cats.effect.Sync import cats.implicits._ import cats.{Applicative, FlatMap} + import docspell.analysis.contact._ import docspell.common.MetaProposal.Candidate import docspell.common._ @@ -30,11 +31,8 @@ object FindProposal { processAttachment(cfg, rm, data.findDates(rm), ctx) .map(ml => rm.copy(proposals = ml)) ) - clp <- data.classifyProposals match { - case Some(cmp) => lookupClassifierProposals(ctx, cmp) - case None => MetaProposalList.empty.pure[F] - } - } yield data.copy(metas = rmv, classifyProposals = clp.some) + clp <- lookupClassifierProposals(ctx, data.classifyProposals) + } yield data.copy(metas = rmv, classifyProposals = clp) } def lookupClassifierProposals[F[_]: Sync]( diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index a151e8a6..f7f52fe5 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -29,7 +29,8 @@ case class ItemData( // a list of tags (names or ids) attached to the item if they exist tags: List[String], // proposals obtained from the classifier - classifyProposals: Option[MetaProposalList] + classifyProposals: MetaProposalList, + classifyTags: List[String] ) { def findMeta(attachId: Ident): Option[RAttachmentMeta] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 6108e216..be8d34c8 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -24,7 +24,7 @@ object LinkProposal { .flatten(data.metas.map(_.proposals)) .filter(_.proposalType != MetaProposalType.DocDate) .sortByWeights - .fillEmptyFrom(data.classifyProposals.getOrElse(MetaProposalList.empty)) + .fillEmptyFrom(data.classifyProposals) ctx.logger.info(s"Starting linking proposals") *> MetaProposalType.all diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index db41e901..42db6033 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -66,7 +66,8 @@ object ReProcessItem { asrcMap.view.mapValues(_.fileId).toMap, MetaProposalList.empty, Nil, - None + MetaProposalList.empty, + Nil )).getOrElseF( Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}")) ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala index 060e718e..dfe4e1e2 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SaveProposals.scala @@ -2,16 +2,18 @@ package docspell.joex.process import cats.effect.Sync import cats.implicits._ + import docspell.common._ -import docspell.joex.scheduler.Task +import docspell.joex.scheduler.{Context, Task} import docspell.store.AddResult import docspell.store.records._ /** Saves the proposals in the database */ object SaveProposals { + type Args = ProcessItemArgs - def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + def apply[F[_]: Sync](data: ItemData): Task[F, Args, ItemData] = Task { ctx => for { _ <- ctx.logger.info("Storing proposals") @@ -21,28 +23,32 @@ object SaveProposals { s"Storing attachment proposals: ${rm.proposals}" ) *> ctx.store.transact(RAttachmentMeta.updateProposals(rm.id, rm.proposals)) ) - _ <- data.classifyProposals match { - case Some(clp) => - val itemId = data.item.id - ctx.logger.debug(s"Storing classifier proposals: $clp") *> - ctx.store - .add( - RItemProposal.createNew(itemId, clp), - RItemProposal.exists(itemId) - ) - .flatMap({ - case AddResult.EntityExists(_) => - ctx.store.transact(RItemProposal.updateProposals(itemId, clp)) - case AddResult.Failure(ex) => - ctx.logger - .warn(s"Could not store classifier proposals: ${ex.getMessage}") *> - 0.pure[F] - case AddResult.Success => - 1.pure[F] - }) - case None => - 0.pure[F] - } + _ <- + if (data.classifyProposals.isEmpty && data.classifyTags.isEmpty) 0.pure[F] + else saveItemProposal(ctx, data) } yield data } + + def saveItemProposal[F[_]: Sync](ctx: Context[F, Args], data: ItemData): F[Unit] = { + def upsert(v: RItemProposal): F[Int] = + ctx.store.add(RItemProposal.insert(v), RItemProposal.exists(v.itemId)).flatMap { + case AddResult.Success => 1.pure[F] + case AddResult.EntityExists(_) => + ctx.store.transact(RItemProposal.update(v)) + case AddResult.Failure(ex) => + ctx.logger.warn(s"Could not store item proposals: ${ex.getMessage}") *> 0 + .pure[F] + } + + for { + _ <- ctx.logger.debug(s"Storing classifier proposals: ${data.classifyProposals}") + tags <- ctx.store.transact( + RTag.findAllByNameOrId(data.classifyTags, ctx.args.meta.collective) + ) + tagRefs = tags.map(t => IdRef(t.tagId, t.name)) + now <- Timestamp.current[F] + value = RItemProposal(data.item.id, data.classifyProposals, tagRefs.toList, now) + _ <- upsert(value) + } yield () + } } diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala index 99348419..b668dbe9 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala @@ -45,7 +45,8 @@ object SetGivenData { Task { ctx => val itemId = data.item.id val collective = ctx.args.meta.collective - val tags = (ctx.args.meta.tags.getOrElse(Nil) ++ data.tags).distinct + val tags = + (ctx.args.meta.tags.getOrElse(Nil) ++ data.tags ++ data.classifyTags).distinct for { _ <- ctx.logger.info(s"Set tags from given data: ${tags}") e <- ops.linkTags(itemId, tags, collective).attempt diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala index a2561e07..a3c4edb5 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextAnalysis.scala @@ -54,9 +54,9 @@ object TextAnalysis { .copy( metas = v.map(_._1), dateLabels = v.map(_._2), - classifyProposals = classProposals.some + classifyProposals = classProposals, + classifyTags = tag ) - .appendTags(tag) } def annotateAttachment[F[_]: Sync]( diff --git a/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala index 822404ce..c5d44cc0 100644 --- a/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala +++ b/modules/store/src/main/scala/docspell/store/records/RItemProposal.scala @@ -1,7 +1,6 @@ package docspell.store.records import cats.data.NonEmptyList -//import cats.implicits._ import docspell.common._ import docspell.store.qb.DSL._ @@ -39,22 +38,23 @@ object RItemProposal { fr"${v.itemId},${v.classifyProposals},${v.classifyTags},${v.created}" ) + def update(v: RItemProposal): ConnectionIO[Int] = + DML.update( + T, + T.itemId === v.itemId, + DML.set( + T.classifyProposals.setTo(v.classifyProposals), + T.classifyTags.setTo(v.classifyTags) + ) + ) + def deleteByItem(itemId: Ident): ConnectionIO[Int] = DML.delete(T, T.itemId === itemId) - def createNew(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] = - for { - now <- Timestamp.current[ConnectionIO] - value = RItemProposal(itemId, proposals, Nil, now) - n <- insert(value) - } yield n - def exists(itemId: Ident): ConnectionIO[Boolean] = Select(select(countAll), from(T), T.itemId === itemId).build .query[Int] .unique .map(_ > 0) - def updateProposals(itemId: Ident, proposals: MetaProposalList): ConnectionIO[Int] = - DML.update(T, T.itemId === itemId, DML.set(T.classifyProposals.setTo(proposals))) }