Merge pull request #639 from eikek/person-suggestion-fix

Person suggestion fix
This commit is contained in:
mergify[bot] 2021-02-15 23:43:10 +00:00 committed by GitHub
commit 567bfb3e69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 134 additions and 11 deletions

View File

@ -1,5 +1,7 @@
package docspell.common
import cats.Order
import io.circe._
import io.circe.generic.semiauto._
@ -11,4 +13,7 @@ object IdRef {
deriveEncoder[IdRef]
implicit val jsonDecoder: Decoder[IdRef] =
deriveDecoder[IdRef]
implicit val order: Order[IdRef] =
Order.by(_.id)
}

View File

@ -6,6 +6,7 @@ import java.util.UUID
import cats.Eq
import cats.effect.Sync
import cats.implicits._
import cats.Order
import io.circe.{Decoder, Encoder}
import scodec.bits.ByteVector
@ -66,4 +67,7 @@ object Ident {
implicit val decodeIdent: Decoder[Ident] =
Decoder.decodeString.emap(Ident.fromString)
implicit val order: Order[Ident] =
Order.by(_.id)
}

View File

@ -2,9 +2,9 @@ package docspell.common
import java.time.LocalDate
import cats.Order
import cats.data.NonEmptyList
import cats.implicits._
import cats.kernel.Order
import docspell.common.MetaProposal.Candidate
import docspell.common._
@ -74,6 +74,9 @@ object MetaProposal {
implicit val jsonDecoder: Decoder[Candidate] =
deriveDecoder[Candidate]
implicit val order: Order[Candidate] =
Order.by(_.ref)
/** This deviates from standard order to sort None at last.
*/
val weightOrder: Order[Option[Double]] = new Order[Option[Double]] {

View File

@ -40,6 +40,11 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
def change(f: MetaProposal => MetaProposal): MetaProposalList =
new MetaProposalList(proposals.map(f))
def replace(mp: MetaProposal): MetaProposalList = {
val next = proposals.filter(_.proposalType != mp.proposalType)
MetaProposalList(mp :: next)
}
def filter(f: MetaProposal => Boolean): MetaProposalList =
new MetaProposalList(proposals.filter(f))
@ -52,7 +57,8 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
(map, next) =>
map.get(next.proposalType) match {
case Some(MetaProposal(mt, values)) =>
val cand = NonEmptyList(values.head, next.values.toList ++ values.tail)
val cand =
NonEmptyList(values.head, next.values.toList ++ values.tail).distinct
map.updated(next.proposalType, MetaProposal(mt, MetaProposal.flatten(cand)))
case None =>
map.updated(next.proposalType, next)

View File

@ -99,4 +99,34 @@ object MetaProposalListTest extends SimpleTestSuite {
)
)
}
test("insert second, remove duplicates") {
val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty)
val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty)
val cand3 = Candidate(IdRef(Ident.unsafe("789"), "name"), Set.empty)
val cand5 = Candidate(IdRef(Ident.unsafe("def"), "name"), Set.empty)
val mpl1 = MetaProposalList
.of(
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1, cand2)),
MetaProposal(MetaProposalType.ConcPerson, NonEmptyList.of(cand3))
)
val mpl2 = MetaProposalList
.of(
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)),
MetaProposal(MetaProposalType.ConcPerson, NonEmptyList.of(cand5))
)
val result = mpl1.insertSecond(mpl2)
assertEquals(
result,
MetaProposalList(
List(
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1, cand2)),
MetaProposal(MetaProposalType.ConcPerson, NonEmptyList.of(cand3, cand5))
)
)
)
}
}

View File

@ -0,0 +1,68 @@
package docspell.joex.process
import cats.data.NonEmptyList
import cats.data.OptionT
import cats.effect.Sync
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.Task
/** After candidates have been determined, the set is reduced by doing
* some cross checks. For example: if a organization is suggested as
* correspondent, the correspondent person must be linked to that
* organization. So this *removes all* person candidates that are not
* linked to the first organization candidate (which will be linked
* to the item).
*/
object CrossCheckProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val proposals = data.finalProposals
val corrOrg = proposals.find(MetaProposalType.CorrOrg)
(for {
orgRef <- OptionT.fromOption[F](corrOrg)
persRefs <- OptionT.liftF(EvalProposals.findOrganizationRelation(data, ctx))
clProps <- OptionT.liftF(
personOrgCheck[F](ctx.logger, data.classifyProposals, persRefs)(orgRef)
)
atProps <- OptionT.liftF {
data.metas.traverse(ra =>
personOrgCheck[F](ctx.logger, ra.proposals, persRefs)(orgRef).map(nl =>
ra.copy(proposals = nl)
)
)
}
} yield data.copy(classifyProposals = clProps, metas = atProps)).getOrElse(data)
}
def personOrgCheck[F[_]: Sync](
logger: Logger[F],
mpl: MetaProposalList,
persRefs: Map[Ident, PersonRef]
)(
corrOrg: MetaProposal
): F[MetaProposalList] = {
val orgId = corrOrg.values.head.ref.id
mpl.find(MetaProposalType.CorrPerson) match {
case Some(ppl) =>
val list = ppl.values.filter(c =>
persRefs.get(c.ref.id).exists(_.organization == Some(orgId))
)
if (ppl.values.toList == list) mpl.pure[F]
else
logger.debug(
"Removing person suggestion, because it doesn't match organization"
) *> NonEmptyList
.fromList(list)
.map(nel => mpl.replace(MetaProposal(ppl.proposalType, nel)))
.getOrElse(mpl.filter(_.proposalType != ppl.proposalType))
.pure[F]
case None =>
mpl.pure[F]
}
}
}

View File

@ -28,7 +28,9 @@ object EvalProposals {
ctx: Context[F, _]
): F[Map[Ident, PersonRef]] = {
val corrPersIds = data.metas
.flatMap(_.proposals.find(MetaProposalType.CorrPerson))
.map(_.proposals)
.appended(data.classifyProposals)
.flatMap(_.find(MetaProposalType.CorrPerson))
.flatMap(_.values.toList.map(_.ref.id))
.toSet
ctx.store

View File

@ -33,6 +33,17 @@ case class ItemData(
classifyTags: List[String]
) {
/** sort by weight; order of equal weights is not important, just
* choose one others are then suggestions
* doc-date is only set when given explicitely, not from "guessing"
*/
def finalProposals: MetaProposalList =
MetaProposalList
.flatten(metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
.fillEmptyFrom(classifyProposals)
def findMeta(attachId: Ident): Option[RAttachmentMeta] =
metas.find(_.id == attachId)

View File

@ -17,14 +17,7 @@ object LinkProposal {
.map(_ => data)
else
Task { ctx =>
// sort by weight; order of equal weights is not important, just
// choose one others are then suggestions
// doc-date is only set when given explicitely, not from "guessing"
val proposals = MetaProposalList
.flatten(data.metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
.fillEmptyFrom(data.classifyProposals)
val proposals = data.finalProposals
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all

View File

@ -43,6 +43,7 @@ object ProcessItem {
TextAnalysis[F](cfg.textAnalysis, analyser, regexNer)(item)
.flatMap(FindProposal[F](cfg.textAnalysis))
.flatMap(EvalProposals[F])
.flatMap(CrossCheckProposals[F])
.flatMap(SaveProposals[F])
private def processAttachments0[F[_]: ConcurrentEffect: ContextShift](