Set item due date candidate

After processing, set the due date of an item to the first candidate.
The earliest due date is considered best match.
This commit is contained in:
Eike Kettner 2020-03-20 22:38:03 +01:00
parent fd48dace9d
commit cba466ed47
4 changed files with 79 additions and 31 deletions

View File

@ -1,9 +1,12 @@
package docspell.common package docspell.common
import cats.implicits._
import cats.data.NonEmptyList import cats.data.NonEmptyList
import docspell.common._
import docspell.common.MetaProposal.Candidate import docspell.common.MetaProposal.Candidate
import io.circe._ import io.circe._
import io.circe.generic.semiauto._ import io.circe.generic.semiauto._
import java.time.LocalDate
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) { case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
@ -22,6 +25,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can
object MetaProposal { object MetaProposal {
def parseDate(cand: Candidate): Option[LocalDate] =
parseDate(cand.ref.id)
def parseDate(date: Ident): Option[LocalDate] =
Either.catchNonFatal(LocalDate.parse(date.id)).toOption
case class Candidate(ref: IdRef, origin: Set[NerLabel]) case class Candidate(ref: IdRef, origin: Set[NerLabel])
object Candidate { object Candidate {
implicit val jsonEncoder: Encoder[Candidate] = implicit val jsonEncoder: Encoder[Candidate] =

View File

@ -18,7 +18,7 @@ object MetaProposalType {
case object DueDate extends MetaProposalType case object DueDate extends MetaProposalType
val all: List[MetaProposalType] = val all: List[MetaProposalType] =
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip) List(CorrOrg, CorrPerson, ConcPerson, ConcEquip, DocDate, DueDate)
def fromString(str: String): Either[String, MetaProposalType] = def fromString(str: String): Either[String, MetaProposalType] =
str.toLowerCase match { str.toLowerCase match {

View File

@ -1,5 +1,6 @@
package docspell.joex.process package docspell.joex.process
import java.time.{LocalDate, Period}
import cats.implicits._ import cats.implicits._
import cats.effect.Sync import cats.effect.Sync
import docspell.common._ import docspell.common._
@ -12,41 +13,61 @@ object EvalProposals {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { _ => Task { _ =>
val metas = data.metas.map(reorderCandidates) Timestamp
data.copy(metas = metas).pure[F] .current[F]
.map { now =>
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
data.copy(metas = metas)
}
} }
def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = { def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
val list = rm.proposals.getTypes.toList val list = rm.proposals.getTypes.toList
.map(mpt => rm.proposals.find(mpt) match { .map(mpt =>
rm.proposals.find(mpt) match {
case Some(mp) => case Some(mp) =>
val v = mp.values.sortBy(weight(rm, mp)) val v = mp.values.sortBy(weight(rm, mp, now))
Some(mp.copy(values = v)) Some(mp.copy(values = v))
case None => case None =>
None None
}) }
)
rm.copy(proposals = MetaProposalList(list.flatMap(identity))) rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
} }
def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = { def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(
cand: MetaProposal.Candidate
): Double =
mp.proposalType match {
case MetaProposalType.DueDate =>
//for due dates, sort earliest on top
MetaProposal
.parseDate(cand)
.map { ld =>
val p = Period.between(ref, ld)
// conversion only for sorting
val d = p.getYears * 365 + p.getMonths * 31 + p.getDays
d.toDouble
}
.getOrElse(2000.0)
case _ =>
val textLen = rm.content.map(_.length).getOrElse(0) val textLen = rm.content.map(_.length).getOrElse(0)
val tagCount = cand.origin.size.toDouble val tagCount = cand.origin.size.toDouble
val pos = cand.origin.map(_.startPosition).min val pos = cand.origin.map(_.startPosition).min
val words = cand.origin.map(_.label.split(' ').length).max.toDouble val words = cand.origin.map(_.label.split(' ').length).max.toDouble
val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min val nerFac =
cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac (1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
} }
def positionWeight(pos: Int, total: Int): Double = { def positionWeight(pos: Int, total: Int): Double =
if (total <= 0) 1 if (total <= 0) 1
else { else {
val p = math.abs(pos.toDouble / total.toDouble) val p = math.abs(pos.toDouble / total.toDouble)
if (p < 0.7) p / 2 if (p < 0.7) p / 2
else p else p
} }
}
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double = def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
tag match { tag match {

View File

@ -50,20 +50,38 @@ object LinkProposal {
mpt match { mpt match {
case MetaProposalType.CorrOrg => case MetaProposalType.CorrOrg =>
ctx.logger.debug(s"Updating item organization with: ${value.id}") *> ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))) ctx.store.transact(
RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.ConcPerson => case MetaProposalType.ConcPerson =>
ctx.logger.debug(s"Updating item concerning person with: $value") *> ctx.logger.debug(s"Updating item concerning person with: $value") *>
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))) ctx.store.transact(
RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.CorrPerson => case MetaProposalType.CorrPerson =>
ctx.logger.debug(s"Updating item correspondent person with: $value") *> ctx.logger.debug(s"Updating item correspondent person with: $value") *>
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))) ctx.store.transact(
RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.ConcEquip => case MetaProposalType.ConcEquip =>
ctx.logger.debug(s"Updating item concerning equipment with: $value") *> ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))) ctx.store.transact(
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.DocDate => case MetaProposalType.DocDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0) ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
case MetaProposalType.DueDate => case MetaProposalType.DueDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0) MetaProposal.parseDate(value) match {
case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
ctx.store.transact(
RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts))
)
case None =>
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
0.pure[F]
}
} }
sealed trait Result { sealed trait Result {