mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Set item due date candidate
After processing, set the due date of an item to the first candidate. The earliest due date is considered best match.
This commit is contained in:
@ -1,5 +1,6 @@
|
||||
package docspell.joex.process
|
||||
|
||||
import java.time.{LocalDate, Period}
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import docspell.common._
|
||||
@ -12,54 +13,74 @@ object EvalProposals {
|
||||
|
||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { _ =>
|
||||
val metas = data.metas.map(reorderCandidates)
|
||||
data.copy(metas = metas).pure[F]
|
||||
Timestamp
|
||||
.current[F]
|
||||
.map { now =>
|
||||
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
|
||||
data.copy(metas = metas)
|
||||
}
|
||||
}
|
||||
|
||||
def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = {
|
||||
def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
|
||||
val list = rm.proposals.getTypes.toList
|
||||
.map(mpt => rm.proposals.find(mpt) match {
|
||||
case Some(mp) =>
|
||||
val v = mp.values.sortBy(weight(rm, mp))
|
||||
Some(mp.copy(values = v))
|
||||
case None =>
|
||||
None
|
||||
})
|
||||
.map(mpt =>
|
||||
rm.proposals.find(mpt) match {
|
||||
case Some(mp) =>
|
||||
val v = mp.values.sortBy(weight(rm, mp, now))
|
||||
Some(mp.copy(values = v))
|
||||
case None =>
|
||||
None
|
||||
}
|
||||
)
|
||||
|
||||
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
|
||||
}
|
||||
|
||||
def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = {
|
||||
val textLen = rm.content.map(_.length).getOrElse(0)
|
||||
val tagCount = cand.origin.size.toDouble
|
||||
val pos = cand.origin.map(_.startPosition).min
|
||||
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
||||
val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
||||
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
||||
}
|
||||
def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(
|
||||
cand: MetaProposal.Candidate
|
||||
): Double =
|
||||
mp.proposalType match {
|
||||
case MetaProposalType.DueDate =>
|
||||
//for due dates, sort earliest on top
|
||||
MetaProposal
|
||||
.parseDate(cand)
|
||||
.map { ld =>
|
||||
val p = Period.between(ref, ld)
|
||||
// conversion only for sorting
|
||||
val d = p.getYears * 365 + p.getMonths * 31 + p.getDays
|
||||
d.toDouble
|
||||
}
|
||||
.getOrElse(2000.0)
|
||||
case _ =>
|
||||
val textLen = rm.content.map(_.length).getOrElse(0)
|
||||
val tagCount = cand.origin.size.toDouble
|
||||
val pos = cand.origin.map(_.startPosition).min
|
||||
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
||||
val nerFac =
|
||||
cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
||||
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
||||
}
|
||||
|
||||
def positionWeight(pos: Int, total: Int): Double = {
|
||||
def positionWeight(pos: Int, total: Int): Double =
|
||||
if (total <= 0) 1
|
||||
else {
|
||||
val p = math.abs(pos.toDouble / total.toDouble)
|
||||
if (p < 0.7) p / 2
|
||||
else p
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
|
||||
tag match {
|
||||
case NerTag.Date => 1.0
|
||||
case NerTag.Email => 0.5
|
||||
case NerTag.Date => 1.0
|
||||
case NerTag.Email => 0.5
|
||||
case NerTag.Location => 1.0
|
||||
case NerTag.Misc => 1.0
|
||||
case NerTag.Misc => 1.0
|
||||
case NerTag.Organization =>
|
||||
if (mt == MetaProposalType.CorrOrg) 0.8
|
||||
else 1.0
|
||||
case NerTag.Person =>
|
||||
if (mt == MetaProposalType.CorrPerson ||
|
||||
mt == MetaProposalType.ConcPerson) 0.8
|
||||
mt == MetaProposalType.ConcPerson) 0.8
|
||||
else 1.0
|
||||
case NerTag.Website => 0.5
|
||||
}
|
||||
|
@ -50,20 +50,38 @@ object LinkProposal {
|
||||
mpt match {
|
||||
case MetaProposalType.CorrOrg =>
|
||||
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
|
||||
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.ConcPerson =>
|
||||
ctx.logger.debug(s"Updating item concerning person with: $value") *>
|
||||
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.CorrPerson =>
|
||||
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
|
||||
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.ConcEquip =>
|
||||
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
|
||||
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.DocDate =>
|
||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||
case MetaProposalType.DueDate =>
|
||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||
MetaProposal.parseDate(value) match {
|
||||
case Some(ld) =>
|
||||
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
|
||||
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
|
||||
ctx.store.transact(
|
||||
RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts))
|
||||
)
|
||||
case None =>
|
||||
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
|
||||
0.pure[F]
|
||||
}
|
||||
}
|
||||
|
||||
sealed trait Result {
|
||||
|
Reference in New Issue
Block a user