Set item due date candidate

After processing, set the due date of an item to the first candidate.
The earliest due date is considered best match.
This commit is contained in:
Eike Kettner 2020-03-20 22:38:03 +01:00
parent fd48dace9d
commit cba466ed47
4 changed files with 79 additions and 31 deletions

View File

@ -1,9 +1,12 @@
package docspell.common
import cats.implicits._
import cats.data.NonEmptyList
import docspell.common._
import docspell.common.MetaProposal.Candidate
import io.circe._
import io.circe.generic.semiauto._
import java.time.LocalDate
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
@ -22,6 +25,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can
object MetaProposal {
def parseDate(cand: Candidate): Option[LocalDate] =
parseDate(cand.ref.id)
def parseDate(date: Ident): Option[LocalDate] =
Either.catchNonFatal(LocalDate.parse(date.id)).toOption
case class Candidate(ref: IdRef, origin: Set[NerLabel])
object Candidate {
implicit val jsonEncoder: Encoder[Candidate] =

View File

@ -18,7 +18,7 @@ object MetaProposalType {
case object DueDate extends MetaProposalType
val all: List[MetaProposalType] =
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip, DocDate, DueDate)
def fromString(str: String): Either[String, MetaProposalType] =
str.toLowerCase match {

View File

@ -1,5 +1,6 @@
package docspell.joex.process
import java.time.{LocalDate, Period}
import cats.implicits._
import cats.effect.Sync
import docspell.common._
@ -12,54 +13,74 @@ object EvalProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { _ =>
val metas = data.metas.map(reorderCandidates)
data.copy(metas = metas).pure[F]
Timestamp
.current[F]
.map { now =>
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
data.copy(metas = metas)
}
}
def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = {
def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
val list = rm.proposals.getTypes.toList
.map(mpt => rm.proposals.find(mpt) match {
case Some(mp) =>
val v = mp.values.sortBy(weight(rm, mp))
Some(mp.copy(values = v))
case None =>
None
})
.map(mpt =>
rm.proposals.find(mpt) match {
case Some(mp) =>
val v = mp.values.sortBy(weight(rm, mp, now))
Some(mp.copy(values = v))
case None =>
None
}
)
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
}
def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = {
val textLen = rm.content.map(_.length).getOrElse(0)
val tagCount = cand.origin.size.toDouble
val pos = cand.origin.map(_.startPosition).min
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
}
def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(
cand: MetaProposal.Candidate
): Double =
mp.proposalType match {
case MetaProposalType.DueDate =>
//for due dates, sort earliest on top
MetaProposal
.parseDate(cand)
.map { ld =>
val p = Period.between(ref, ld)
// conversion only for sorting
val d = p.getYears * 365 + p.getMonths * 31 + p.getDays
d.toDouble
}
.getOrElse(2000.0)
case _ =>
val textLen = rm.content.map(_.length).getOrElse(0)
val tagCount = cand.origin.size.toDouble
val pos = cand.origin.map(_.startPosition).min
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
val nerFac =
cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
}
def positionWeight(pos: Int, total: Int): Double = {
def positionWeight(pos: Int, total: Int): Double =
if (total <= 0) 1
else {
val p = math.abs(pos.toDouble / total.toDouble)
if (p < 0.7) p / 2
else p
}
}
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
tag match {
case NerTag.Date => 1.0
case NerTag.Email => 0.5
case NerTag.Date => 1.0
case NerTag.Email => 0.5
case NerTag.Location => 1.0
case NerTag.Misc => 1.0
case NerTag.Misc => 1.0
case NerTag.Organization =>
if (mt == MetaProposalType.CorrOrg) 0.8
else 1.0
case NerTag.Person =>
if (mt == MetaProposalType.CorrPerson ||
mt == MetaProposalType.ConcPerson) 0.8
mt == MetaProposalType.ConcPerson) 0.8
else 1.0
case NerTag.Website => 0.5
}

View File

@ -50,20 +50,38 @@ object LinkProposal {
mpt match {
case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)))
ctx.store.transact(
RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)))
ctx.store.transact(
RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)))
ctx.store.transact(
RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)))
ctx.store.transact(
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.DocDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
case MetaProposalType.DueDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
MetaProposal.parseDate(value) match {
case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
ctx.store.transact(
RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts))
)
case None =>
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
0.pure[F]
}
}
sealed trait Result {