mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Set item due date candidate
After processing, set the due date of an item to the first candidate. The earliest due date is considered best match.
This commit is contained in:
parent
fd48dace9d
commit
cba466ed47
@ -1,9 +1,12 @@
|
||||
package docspell.common
|
||||
|
||||
import cats.implicits._
|
||||
import cats.data.NonEmptyList
|
||||
import docspell.common._
|
||||
import docspell.common.MetaProposal.Candidate
|
||||
import io.circe._
|
||||
import io.circe.generic.semiauto._
|
||||
import java.time.LocalDate
|
||||
|
||||
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
|
||||
|
||||
@ -22,6 +25,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can
|
||||
|
||||
object MetaProposal {
|
||||
|
||||
def parseDate(cand: Candidate): Option[LocalDate] =
|
||||
parseDate(cand.ref.id)
|
||||
|
||||
def parseDate(date: Ident): Option[LocalDate] =
|
||||
Either.catchNonFatal(LocalDate.parse(date.id)).toOption
|
||||
|
||||
case class Candidate(ref: IdRef, origin: Set[NerLabel])
|
||||
object Candidate {
|
||||
implicit val jsonEncoder: Encoder[Candidate] =
|
||||
|
@ -18,7 +18,7 @@ object MetaProposalType {
|
||||
case object DueDate extends MetaProposalType
|
||||
|
||||
val all: List[MetaProposalType] =
|
||||
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
|
||||
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip, DocDate, DueDate)
|
||||
|
||||
def fromString(str: String): Either[String, MetaProposalType] =
|
||||
str.toLowerCase match {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package docspell.joex.process
|
||||
|
||||
import java.time.{LocalDate, Period}
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import docspell.common._
|
||||
@ -12,54 +13,74 @@ object EvalProposals {
|
||||
|
||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { _ =>
|
||||
val metas = data.metas.map(reorderCandidates)
|
||||
data.copy(metas = metas).pure[F]
|
||||
Timestamp
|
||||
.current[F]
|
||||
.map { now =>
|
||||
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
|
||||
data.copy(metas = metas)
|
||||
}
|
||||
}
|
||||
|
||||
def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = {
|
||||
def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
|
||||
val list = rm.proposals.getTypes.toList
|
||||
.map(mpt => rm.proposals.find(mpt) match {
|
||||
case Some(mp) =>
|
||||
val v = mp.values.sortBy(weight(rm, mp))
|
||||
Some(mp.copy(values = v))
|
||||
case None =>
|
||||
None
|
||||
})
|
||||
.map(mpt =>
|
||||
rm.proposals.find(mpt) match {
|
||||
case Some(mp) =>
|
||||
val v = mp.values.sortBy(weight(rm, mp, now))
|
||||
Some(mp.copy(values = v))
|
||||
case None =>
|
||||
None
|
||||
}
|
||||
)
|
||||
|
||||
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
|
||||
}
|
||||
|
||||
def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = {
|
||||
val textLen = rm.content.map(_.length).getOrElse(0)
|
||||
val tagCount = cand.origin.size.toDouble
|
||||
val pos = cand.origin.map(_.startPosition).min
|
||||
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
||||
val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
||||
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
||||
}
|
||||
def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(
|
||||
cand: MetaProposal.Candidate
|
||||
): Double =
|
||||
mp.proposalType match {
|
||||
case MetaProposalType.DueDate =>
|
||||
//for due dates, sort earliest on top
|
||||
MetaProposal
|
||||
.parseDate(cand)
|
||||
.map { ld =>
|
||||
val p = Period.between(ref, ld)
|
||||
// conversion only for sorting
|
||||
val d = p.getYears * 365 + p.getMonths * 31 + p.getDays
|
||||
d.toDouble
|
||||
}
|
||||
.getOrElse(2000.0)
|
||||
case _ =>
|
||||
val textLen = rm.content.map(_.length).getOrElse(0)
|
||||
val tagCount = cand.origin.size.toDouble
|
||||
val pos = cand.origin.map(_.startPosition).min
|
||||
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
||||
val nerFac =
|
||||
cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
||||
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
||||
}
|
||||
|
||||
def positionWeight(pos: Int, total: Int): Double = {
|
||||
def positionWeight(pos: Int, total: Int): Double =
|
||||
if (total <= 0) 1
|
||||
else {
|
||||
val p = math.abs(pos.toDouble / total.toDouble)
|
||||
if (p < 0.7) p / 2
|
||||
else p
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
|
||||
tag match {
|
||||
case NerTag.Date => 1.0
|
||||
case NerTag.Email => 0.5
|
||||
case NerTag.Date => 1.0
|
||||
case NerTag.Email => 0.5
|
||||
case NerTag.Location => 1.0
|
||||
case NerTag.Misc => 1.0
|
||||
case NerTag.Misc => 1.0
|
||||
case NerTag.Organization =>
|
||||
if (mt == MetaProposalType.CorrOrg) 0.8
|
||||
else 1.0
|
||||
case NerTag.Person =>
|
||||
if (mt == MetaProposalType.CorrPerson ||
|
||||
mt == MetaProposalType.ConcPerson) 0.8
|
||||
mt == MetaProposalType.ConcPerson) 0.8
|
||||
else 1.0
|
||||
case NerTag.Website => 0.5
|
||||
}
|
||||
|
@ -50,20 +50,38 @@ object LinkProposal {
|
||||
mpt match {
|
||||
case MetaProposalType.CorrOrg =>
|
||||
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
|
||||
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.ConcPerson =>
|
||||
ctx.logger.debug(s"Updating item concerning person with: $value") *>
|
||||
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.CorrPerson =>
|
||||
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
|
||||
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.ConcEquip =>
|
||||
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
|
||||
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)))
|
||||
ctx.store.transact(
|
||||
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.DocDate =>
|
||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||
case MetaProposalType.DueDate =>
|
||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||
MetaProposal.parseDate(value) match {
|
||||
case Some(ld) =>
|
||||
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
|
||||
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
|
||||
ctx.store.transact(
|
||||
RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts))
|
||||
)
|
||||
case None =>
|
||||
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
|
||||
0.pure[F]
|
||||
}
|
||||
}
|
||||
|
||||
sealed trait Result {
|
||||
|
Loading…
x
Reference in New Issue
Block a user