From 290989f67fe562f848015f61e30523f2b9572a81 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Tue, 1 Dec 2020 21:57:01 +0100 Subject: [PATCH] Reorder correspondent person suggestion based on org relationship --- .../scala/docspell/common/PersonRef.scala | 18 +++++ .../docspell/joex/process/EvalProposals.scala | 65 +++++++++++++++---- .../docspell/store/records/RPerson.scala | 12 ++++ 3 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 modules/common/src/main/scala/docspell/common/PersonRef.scala diff --git a/modules/common/src/main/scala/docspell/common/PersonRef.scala b/modules/common/src/main/scala/docspell/common/PersonRef.scala new file mode 100644 index 00000000..e5624548 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/PersonRef.scala @@ -0,0 +1,18 @@ +package docspell.common + +import io.circe._ +import io.circe.generic.semiauto._ + +case class PersonRef(id: Ident, name: String, organization: Option[Ident]) { + + def toIdRef: IdRef = + IdRef(id, name) +} + +object PersonRef { + + implicit val jsonEncoder: Encoder[PersonRef] = + deriveEncoder[PersonRef] + implicit val jsonDecoder: Decoder[PersonRef] = + deriveDecoder[PersonRef] +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala index 170958c9..772e9c03 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala @@ -6,8 +6,8 @@ import cats.effect.Sync import cats.implicits._ import docspell.common._ -import docspell.joex.scheduler.Task -import docspell.store.records.RAttachmentMeta +import docspell.joex.scheduler.{Context, Task} +import docspell.store.records.{RAttachmentMeta, RPerson} /** Calculate weights for candidates that adds the most likely * candidate a lower number. @@ -15,21 +15,40 @@ import docspell.store.records.RAttachmentMeta object EvalProposals { def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = - Task { _ => - Timestamp - .current[F] - .map { now => - val metas = data.metas.map(calcCandidateWeight(now.toUtcDate)) - data.copy(metas = metas) - } + Task { ctx => + for { + now <- Timestamp.current[F] + personRefs <- findOrganizationRelation[F](data, ctx) + metas = data.metas.map(calcCandidateWeight(now.toUtcDate, personRefs)) + } yield data.copy(metas = metas) } - def calcCandidateWeight(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = { - val list = rm.proposals.change(mp => mp.addWeights(weight(rm, mp, now))) + def findOrganizationRelation[F[_]: Sync]( + data: ItemData, + ctx: Context[F, _] + ): F[Map[Ident, PersonRef]] = { + val corrPersIds = data.metas + .flatMap(_.proposals.find(MetaProposalType.CorrPerson)) + .flatMap(_.values.toList.map(_.ref.id)) + .toSet + ctx.store + .transact(RPerson.findOrganization(corrPersIds)) + .map(_.map(p => (p.id, p)).toMap) + } + + def calcCandidateWeight(now: LocalDate, personRefs: Map[Ident, PersonRef])( + rm: RAttachmentMeta + ): RAttachmentMeta = { + val list = rm.proposals.change(mp => mp.addWeights(weight(rm, mp, now, personRefs))) rm.copy(proposals = list.sortByWeights) } - def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)( + def weight( + rm: RAttachmentMeta, + mp: MetaProposal, + ref: LocalDate, + personRefs: Map[Ident, PersonRef] + )( cand: MetaProposal.Candidate ): Double = mp.proposalType match { @@ -51,7 +70,27 @@ object EvalProposals { val words = cand.origin.map(_.label.split(' ').length).max.toDouble val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min - (1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac + val corrPerFac = corrOrgPersonFactor(rm, mp, personRefs, cand) + (1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac * corrPerFac + } + + def corrOrgPersonFactor( + rm: RAttachmentMeta, + mp: MetaProposal, + personRefs: Map[Ident, PersonRef], + cand: MetaProposal.Candidate + ): Double = + mp.proposalType match { + case MetaProposalType.CorrPerson => + (for { + currentOrg <- rm.proposals + .find(MetaProposalType.CorrOrg) + .map(_.values.head.ref.id) + personOrg <- personRefs.get(cand.ref.id).flatMap(_.organization) + fac = if (currentOrg == personOrg) 0.5 else 1 + } yield fac).getOrElse(1) + case _ => + 1 } def positionWeight(pos: Int, total: Int): Double = diff --git a/modules/store/src/main/scala/docspell/store/records/RPerson.scala b/modules/store/src/main/scala/docspell/store/records/RPerson.scala index b59d4cf4..c7df6fed 100644 --- a/modules/store/src/main/scala/docspell/store/records/RPerson.scala +++ b/modules/store/src/main/scala/docspell/store/records/RPerson.scala @@ -1,6 +1,8 @@ package docspell.store.records import cats.Eq +import cats.data.NonEmptyList +import cats.effect._ import fs2.Stream import docspell.common.{IdRef, _} @@ -167,4 +169,14 @@ object RPerson { def delete(personId: Ident, coll: Ident): ConnectionIO[Int] = deleteFrom(table, and(pid.is(personId), cid.is(coll))).update.run + + def findOrganization(ids: Set[Ident]): ConnectionIO[Vector[PersonRef]] = { + val cols = Seq(pid, name, oid) + NonEmptyList.fromList(ids.toList) match { + case Some(nel) => + selectSimple(cols, table, pid.isIn(nel)).query[PersonRef].to[Vector] + case None => + Sync[ConnectionIO].pure(Vector.empty) + } + } }