Remove person suggestion if it doesn't match with organization

This commit is contained in:
Eike Kettner 2021-02-15 22:52:08 +01:00
parent eb308cfa85
commit d99ce76d89
6 changed files with 89 additions and 9 deletions

View File

@ -40,6 +40,11 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
def change(f: MetaProposal => MetaProposal): MetaProposalList =
new MetaProposalList(proposals.map(f))
def replace(mp: MetaProposal): MetaProposalList = {
val next = proposals.filter(_.proposalType != mp.proposalType)
MetaProposalList(mp :: next)
}
def filter(f: MetaProposal => Boolean): MetaProposalList =
new MetaProposalList(proposals.filter(f))

View File

@ -0,0 +1,68 @@
package docspell.joex.process
import cats.data.NonEmptyList
import cats.data.OptionT
import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
/** After candidates have been determined, the set is reduced by doing
* some cross checks. For example: if a organization is suggested as
* correspondent, the correspondent person must be linked to that
* organization. So this *removes all* person candidates that are not
* linked to the first organization candidate (which will be linked
* to the item).
*/
object CrossCheckProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val proposals = data.finalProposals
val corrOrg = proposals.find(MetaProposalType.CorrOrg)
(for {
orgRef <- OptionT.fromOption[F](corrOrg)
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx))
clProps <- OptionT.liftF(
personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef)
)
atProps <- OptionT.liftF {
data.metas.traverse(ra =>
personOrgCheck[F](ctx.logger, ra.proposals, persRefs)(orgRef).map(nl =>
ra.copy(proposals = nl)
)
)
}
} yield data.copy(classifyProposals = clProps, metas = atProps)).getOrElse(data)
}
def personOrgCheck[F[_]: Sync](
logger: Logger[F],
mpl: MetaProposalList,
persRefs: Map[Ident, PersonRef]
)(
corrOrg: MetaProposal
): F[MetaProposalList] = {
val orgId = corrOrg.values.head.ref.id
mpl.find(MetaProposalType.CorrPerson) match {
case Some(ppl) =>
val list = ppl.values.filter(c =>
persRefs.get(c.ref.id).exists(_.organization == Some(orgId))
)
if (ppl.values.toList == list) mpl.pure[F]
else
logger.debug(
"Removing person suggestion, because it doesn't match organization"
) *> NonEmptyList
.fromList(list)
.map(nel => mpl.replace(MetaProposal(ppl.proposalType, nel)))
.getOrElse(mpl.filter(_.proposalType != ppl.proposalType))
.pure[F]
case None =>
mpl.pure[F]
}
}
}

View File

@ -28,7 +28,9 @@ object EvalProposals {
ctx: Context[F, _]
): F[Map[Ident, PersonRef]] = {
val corrPersIds = data.metas
.flatMap(_.proposals.find(MetaProposalType.CorrPerson))
.map(_.proposals)
.appended(data.classifyProposals)
.flatMap(_.find(MetaProposalType.CorrPerson))
.flatMap(_.values.toList.map(_.ref.id))
.toSet
ctx.store

View File

@ -33,6 +33,17 @@ case class ItemData(
classifyTags: List[String]
) {
/** sort by weight; order of equal weights is not important, just
* choose one others are then suggestions
* doc-date is only set when given explicitely, not from "guessing"
*/
def finalProposals: MetaProposalList =
MetaProposalList
.flatten(metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
.fillEmptyFrom(classifyProposals)
def findMeta(attachId: Ident): Option[RAttachmentMeta] =
metas.find(_.id == attachId)

View File

@ -17,14 +17,7 @@ object LinkProposal {
.map(_ => data)
else
Task { ctx =>
// sort by weight; order of equal weights is not important, just
// choose one others are then suggestions
// doc-date is only set when given explicitely, not from "guessing"
val proposals = MetaProposalList
.flatten(data.metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
.fillEmptyFrom(data.classifyProposals)
val proposals = data.finalProposals
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all

View File

@ -43,6 +43,7 @@ object ProcessItem {
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item)
.flatMap(FindProposal[F](cfg.textAnalysis))
.flatMap(EvalProposals[F])
.flatMap(CrossCheckProposals[F])
.flatMap(SaveProposals[F])
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](