diff --git a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala index 33865484..1f34648d 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala @@ -40,6 +40,11 @@ case class MetaProposalList private (proposals: List[MetaProposal]) { def change(f: MetaProposal => MetaProposal): MetaProposalList = new MetaProposalList(proposals.map(f)) + def replace(mp: MetaProposal): MetaProposalList = { + val next = proposals.filter(_.proposalType != mp.proposalType) + MetaProposalList(mp :: next) + } + def filter(f: MetaProposal => Boolean): MetaProposalList = new MetaProposalList(proposals.filter(f)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala new file mode 100644 index 00000000..06c85ec0 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/process/CrossCheckProposals.scala @@ -0,0 +1,68 @@ +package docspell.joex.process + +import cats.data.NonEmptyList +import cats.data.OptionT +import cats.effect.Sync +import cats.implicits._ + +import docspell.common._ +import docspell.joex.scheduler.Task + +/** After candidates have been determined, the set is reduced by doing + * some cross checks. For example: if a organization is suggested as + * correspondent, the correspondent person must be linked to that + * organization. So this *removes all* person candidates that are not + * linked to the first organization candidate (which will be linked + * to the item). + */ +object CrossCheckProposals { + + def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = + Task { ctx => + val proposals = data.finalProposals + val corrOrg = proposals.find(MetaProposalType.CorrOrg) + (for { + orgRef <- OptionT.fromOption[F](corrOrg) + persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx)) + clProps <- OptionT.liftF( + personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef) + ) + atProps <- OptionT.liftF { + data.metas.traverse(ra => + personOrgCheck[F](ctx.logger, ra.proposals, persRefs)(orgRef).map(nl => + ra.copy(proposals = nl) + ) + ) + } + } yield data.copy(classifyProposals = clProps, metas = atProps)).getOrElse(data) + } + + def personOrgCheck[F[_]: Sync]( + logger: Logger[F], + mpl: MetaProposalList, + persRefs: Map[Ident, PersonRef] + )( + corrOrg: MetaProposal + ): F[MetaProposalList] = { + val orgId = corrOrg.values.head.ref.id + mpl.find(MetaProposalType.CorrPerson) match { + case Some(ppl) => + val list = ppl.values.filter(c => + persRefs.get(c.ref.id).exists(_.organization == Some(orgId)) + ) + + if (ppl.values.toList == list) mpl.pure[F] + else + logger.debug( + "Removing person suggestion, because it doesn't match organization" + ) *> NonEmptyList + .fromList(list) + .map(nel => mpl.replace(MetaProposal(ppl.proposalType, nel))) + .getOrElse(mpl.filter(_.proposalType != ppl.proposalType)) + .pure[F] + + case None => + mpl.pure[F] + } + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala index 772e9c03..8846b27a 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala @@ -28,7 +28,9 @@ object EvalProposals { ctx: Context[F, _] ): F[Map[Ident, PersonRef]] = { val corrPersIds = data.metas - .flatMap(_.proposals.find(MetaProposalType.CorrPerson)) + .map(_.proposals) + .appended(data.classifyProposals) + .flatMap(_.find(MetaProposalType.CorrPerson)) .flatMap(_.values.toList.map(_.ref.id)) .toSet ctx.store diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index f7f52fe5..f19a4244 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -33,6 +33,17 @@ case class ItemData( classifyTags: List[String] ) { + /** sort by weight; order of equal weights is not important, just + * choose one others are then suggestions + * doc-date is only set when given explicitely, not from "guessing" + */ + def finalProposals: MetaProposalList = + MetaProposalList + .flatten(metas.map(_.proposals)) + .filter(_.proposalType != MetaProposalType.DocDate) + .sortByWeights + .fillEmptyFrom(classifyProposals) + def findMeta(attachId: Ident): Option[RAttachmentMeta] = metas.find(_.id == attachId) diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index be8d34c8..6fa15978 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -17,14 +17,7 @@ object LinkProposal { .map(_ => data) else Task { ctx => - // sort by weight; order of equal weights is not important, just - // choose one others are then suggestions - // doc-date is only set when given explicitely, not from "guessing" - val proposals = MetaProposalList - .flatten(data.metas.map(_.proposals)) - .filter(_.proposalType != MetaProposalType.DocDate) - .sortByWeights - .fillEmptyFrom(data.classifyProposals) + val proposals = data.finalProposals ctx.logger.info(s"Starting linking proposals") *> MetaProposalType.all diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index c119b467..1ba548de 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -43,6 +43,7 @@ object ProcessItem { TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item) .flatMap(FindProposal[F](cfg.textAnalysis)) .flatMap(EvalProposals[F]) + .flatMap(CrossCheckProposals[F]) .flatMap(SaveProposals[F]) private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](