From cba466ed47174681ae18f9a6327dc9eb3347fc3f Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Fri, 20 Mar 2020 22:38:03 +0100 Subject: [PATCH] Set item due date candidate After processing, set the due date of an item to the first candidate. The earliest due date is considered best match. --- .../scala/docspell/common/MetaProposal.scala | 9 +++ .../docspell/common/MetaProposalType.scala | 2 +- .../docspell/joex/process/EvalProposals.scala | 71 ++++++++++++------- .../docspell/joex/process/LinkProposal.scala | 28 ++++++-- 4 files changed, 79 insertions(+), 31 deletions(-) diff --git a/modules/common/src/main/scala/docspell/common/MetaProposal.scala b/modules/common/src/main/scala/docspell/common/MetaProposal.scala index 6c35cf9b..f7c34955 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposal.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposal.scala @@ -1,9 +1,12 @@ package docspell.common +import cats.implicits._ import cats.data.NonEmptyList +import docspell.common._ import docspell.common.MetaProposal.Candidate import io.circe._ import io.circe.generic.semiauto._ +import java.time.LocalDate case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) { @@ -22,6 +25,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can object MetaProposal { + def parseDate(cand: Candidate): Option[LocalDate] = + parseDate(cand.ref.id) + + def parseDate(date: Ident): Option[LocalDate] = + Either.catchNonFatal(LocalDate.parse(date.id)).toOption + case class Candidate(ref: IdRef, origin: Set[NerLabel]) object Candidate { implicit val jsonEncoder: Encoder[Candidate] = diff --git a/modules/common/src/main/scala/docspell/common/MetaProposalType.scala b/modules/common/src/main/scala/docspell/common/MetaProposalType.scala index 89504896..0799e330 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposalType.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposalType.scala @@ -18,7 +18,7 @@ object MetaProposalType { case object DueDate extends MetaProposalType val all: List[MetaProposalType] = - List(CorrOrg, CorrPerson, ConcPerson, ConcEquip) + List(CorrOrg, CorrPerson, ConcPerson, ConcEquip, DocDate, DueDate) def fromString(str: String): Either[String, MetaProposalType] = str.toLowerCase match { diff --git a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala index 021cb097..799b8b6c 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala @@ -1,5 +1,6 @@ package docspell.joex.process +import java.time.{LocalDate, Period} import cats.implicits._ import cats.effect.Sync import docspell.common._ @@ -12,54 +13,74 @@ object EvalProposals { def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = Task { _ => - val metas = data.metas.map(reorderCandidates) - data.copy(metas = metas).pure[F] + Timestamp + .current[F] + .map { now => + val metas = data.metas.map(reorderCandidates(now.toUtcDate)) + data.copy(metas = metas) + } } - def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = { + def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = { val list = rm.proposals.getTypes.toList - .map(mpt => rm.proposals.find(mpt) match { - case Some(mp) => - val v = mp.values.sortBy(weight(rm, mp)) - Some(mp.copy(values = v)) - case None => - None - }) + .map(mpt => + rm.proposals.find(mpt) match { + case Some(mp) => + val v = mp.values.sortBy(weight(rm, mp, now)) + Some(mp.copy(values = v)) + case None => + None + } + ) rm.copy(proposals = MetaProposalList(list.flatMap(identity))) } - def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = { - val textLen = rm.content.map(_.length).getOrElse(0) - val tagCount = cand.origin.size.toDouble - val pos = cand.origin.map(_.startPosition).min - val words = cand.origin.map(_.label.split(' ').length).max.toDouble - val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min - (1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac - } + def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)( + cand: MetaProposal.Candidate + ): Double = + mp.proposalType match { + case MetaProposalType.DueDate => + //for due dates, sort earliest on top + MetaProposal + .parseDate(cand) + .map { ld => + val p = Period.between(ref, ld) + // conversion only for sorting + val d = p.getYears * 365 + p.getMonths * 31 + p.getDays + d.toDouble + } + .getOrElse(2000.0) + case _ => + val textLen = rm.content.map(_.length).getOrElse(0) + val tagCount = cand.origin.size.toDouble + val pos = cand.origin.map(_.startPosition).min + val words = cand.origin.map(_.label.split(' ').length).max.toDouble + val nerFac = + cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min + (1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac + } - def positionWeight(pos: Int, total: Int): Double = { + def positionWeight(pos: Int, total: Int): Double = if (total <= 0) 1 else { val p = math.abs(pos.toDouble / total.toDouble) if (p < 0.7) p / 2 else p } - } - def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double = tag match { - case NerTag.Date => 1.0 - case NerTag.Email => 0.5 + case NerTag.Date => 1.0 + case NerTag.Email => 0.5 case NerTag.Location => 1.0 - case NerTag.Misc => 1.0 + case NerTag.Misc => 1.0 case NerTag.Organization => if (mt == MetaProposalType.CorrOrg) 0.8 else 1.0 case NerTag.Person => if (mt == MetaProposalType.CorrPerson || - mt == MetaProposalType.ConcPerson) 0.8 + mt == MetaProposalType.ConcPerson) 0.8 else 1.0 case NerTag.Website => 0.5 } diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 64a693d2..9416b386 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -50,20 +50,38 @@ object LinkProposal { mpt match { case MetaProposalType.CorrOrg => ctx.logger.debug(s"Updating item organization with: ${value.id}") *> - ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))) + ctx.store.transact( + RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)) + ) case MetaProposalType.ConcPerson => ctx.logger.debug(s"Updating item concerning person with: $value") *> - ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))) + ctx.store.transact( + RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)) + ) case MetaProposalType.CorrPerson => ctx.logger.debug(s"Updating item correspondent person with: $value") *> - ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))) + ctx.store.transact( + RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)) + ) case MetaProposalType.ConcEquip => ctx.logger.debug(s"Updating item concerning equipment with: $value") *> - ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))) + ctx.store.transact( + RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)) + ) case MetaProposalType.DocDate => ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0) case MetaProposalType.DueDate => - ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0) + MetaProposal.parseDate(value) match { + case Some(ld) => + val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) + ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *> + ctx.store.transact( + RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts)) + ) + case None => + ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *> + 0.pure[F] + } } sealed trait Result {