mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-06 15:15:58 +00:00
Set item due date candidate
After processing, set the due date of an item to the first candidate. The earliest due date is considered best match.
This commit is contained in:
parent
fd48dace9d
commit
cba466ed47
@ -1,9 +1,12 @@
|
|||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
|
import cats.implicits._
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
|
import docspell.common._
|
||||||
import docspell.common.MetaProposal.Candidate
|
import docspell.common.MetaProposal.Candidate
|
||||||
import io.circe._
|
import io.circe._
|
||||||
import io.circe.generic.semiauto._
|
import io.circe.generic.semiauto._
|
||||||
|
import java.time.LocalDate
|
||||||
|
|
||||||
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
|
case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) {
|
||||||
|
|
||||||
@ -22,6 +25,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can
|
|||||||
|
|
||||||
object MetaProposal {
|
object MetaProposal {
|
||||||
|
|
||||||
|
def parseDate(cand: Candidate): Option[LocalDate] =
|
||||||
|
parseDate(cand.ref.id)
|
||||||
|
|
||||||
|
def parseDate(date: Ident): Option[LocalDate] =
|
||||||
|
Either.catchNonFatal(LocalDate.parse(date.id)).toOption
|
||||||
|
|
||||||
case class Candidate(ref: IdRef, origin: Set[NerLabel])
|
case class Candidate(ref: IdRef, origin: Set[NerLabel])
|
||||||
object Candidate {
|
object Candidate {
|
||||||
implicit val jsonEncoder: Encoder[Candidate] =
|
implicit val jsonEncoder: Encoder[Candidate] =
|
||||||
|
@ -18,7 +18,7 @@ object MetaProposalType {
|
|||||||
case object DueDate extends MetaProposalType
|
case object DueDate extends MetaProposalType
|
||||||
|
|
||||||
val all: List[MetaProposalType] =
|
val all: List[MetaProposalType] =
|
||||||
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip)
|
List(CorrOrg, CorrPerson, ConcPerson, ConcEquip, DocDate, DueDate)
|
||||||
|
|
||||||
def fromString(str: String): Either[String, MetaProposalType] =
|
def fromString(str: String): Either[String, MetaProposalType] =
|
||||||
str.toLowerCase match {
|
str.toLowerCase match {
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package docspell.joex.process
|
package docspell.joex.process
|
||||||
|
|
||||||
|
import java.time.{LocalDate, Period}
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -12,54 +13,74 @@ object EvalProposals {
|
|||||||
|
|
||||||
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
Task { _ =>
|
Task { _ =>
|
||||||
val metas = data.metas.map(reorderCandidates)
|
Timestamp
|
||||||
data.copy(metas = metas).pure[F]
|
.current[F]
|
||||||
|
.map { now =>
|
||||||
|
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
|
||||||
|
data.copy(metas = metas)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def reorderCandidates(rm: RAttachmentMeta): RAttachmentMeta = {
|
def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
|
||||||
val list = rm.proposals.getTypes.toList
|
val list = rm.proposals.getTypes.toList
|
||||||
.map(mpt => rm.proposals.find(mpt) match {
|
.map(mpt =>
|
||||||
case Some(mp) =>
|
rm.proposals.find(mpt) match {
|
||||||
val v = mp.values.sortBy(weight(rm, mp))
|
case Some(mp) =>
|
||||||
Some(mp.copy(values = v))
|
val v = mp.values.sortBy(weight(rm, mp, now))
|
||||||
case None =>
|
Some(mp.copy(values = v))
|
||||||
None
|
case None =>
|
||||||
})
|
None
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
|
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
|
||||||
}
|
}
|
||||||
|
|
||||||
def weight(rm: RAttachmentMeta, mp: MetaProposal)(cand: MetaProposal.Candidate): Double = {
|
def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(
|
||||||
val textLen = rm.content.map(_.length).getOrElse(0)
|
cand: MetaProposal.Candidate
|
||||||
val tagCount = cand.origin.size.toDouble
|
): Double =
|
||||||
val pos = cand.origin.map(_.startPosition).min
|
mp.proposalType match {
|
||||||
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
case MetaProposalType.DueDate =>
|
||||||
val nerFac = cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
//for due dates, sort earliest on top
|
||||||
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
MetaProposal
|
||||||
}
|
.parseDate(cand)
|
||||||
|
.map { ld =>
|
||||||
|
val p = Period.between(ref, ld)
|
||||||
|
// conversion only for sorting
|
||||||
|
val d = p.getYears * 365 + p.getMonths * 31 + p.getDays
|
||||||
|
d.toDouble
|
||||||
|
}
|
||||||
|
.getOrElse(2000.0)
|
||||||
|
case _ =>
|
||||||
|
val textLen = rm.content.map(_.length).getOrElse(0)
|
||||||
|
val tagCount = cand.origin.size.toDouble
|
||||||
|
val pos = cand.origin.map(_.startPosition).min
|
||||||
|
val words = cand.origin.map(_.label.split(' ').length).max.toDouble
|
||||||
|
val nerFac =
|
||||||
|
cand.origin.map(label => nerTagFactor(label.tag, mp.proposalType)).min
|
||||||
|
(1 / words) * (1 / tagCount) * positionWeight(pos, textLen) * nerFac
|
||||||
|
}
|
||||||
|
|
||||||
def positionWeight(pos: Int, total: Int): Double = {
|
def positionWeight(pos: Int, total: Int): Double =
|
||||||
if (total <= 0) 1
|
if (total <= 0) 1
|
||||||
else {
|
else {
|
||||||
val p = math.abs(pos.toDouble / total.toDouble)
|
val p = math.abs(pos.toDouble / total.toDouble)
|
||||||
if (p < 0.7) p / 2
|
if (p < 0.7) p / 2
|
||||||
else p
|
else p
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
|
def nerTagFactor(tag: NerTag, mt: MetaProposalType): Double =
|
||||||
tag match {
|
tag match {
|
||||||
case NerTag.Date => 1.0
|
case NerTag.Date => 1.0
|
||||||
case NerTag.Email => 0.5
|
case NerTag.Email => 0.5
|
||||||
case NerTag.Location => 1.0
|
case NerTag.Location => 1.0
|
||||||
case NerTag.Misc => 1.0
|
case NerTag.Misc => 1.0
|
||||||
case NerTag.Organization =>
|
case NerTag.Organization =>
|
||||||
if (mt == MetaProposalType.CorrOrg) 0.8
|
if (mt == MetaProposalType.CorrOrg) 0.8
|
||||||
else 1.0
|
else 1.0
|
||||||
case NerTag.Person =>
|
case NerTag.Person =>
|
||||||
if (mt == MetaProposalType.CorrPerson ||
|
if (mt == MetaProposalType.CorrPerson ||
|
||||||
mt == MetaProposalType.ConcPerson) 0.8
|
mt == MetaProposalType.ConcPerson) 0.8
|
||||||
else 1.0
|
else 1.0
|
||||||
case NerTag.Website => 0.5
|
case NerTag.Website => 0.5
|
||||||
}
|
}
|
||||||
|
@ -50,20 +50,38 @@ object LinkProposal {
|
|||||||
mpt match {
|
mpt match {
|
||||||
case MetaProposalType.CorrOrg =>
|
case MetaProposalType.CorrOrg =>
|
||||||
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
|
ctx.logger.debug(s"Updating item organization with: ${value.id}") *>
|
||||||
ctx.store.transact(RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value)))
|
ctx.store.transact(
|
||||||
|
RItem.updateCorrOrg(itemId, ctx.args.meta.collective, Some(value))
|
||||||
|
)
|
||||||
case MetaProposalType.ConcPerson =>
|
case MetaProposalType.ConcPerson =>
|
||||||
ctx.logger.debug(s"Updating item concerning person with: $value") *>
|
ctx.logger.debug(s"Updating item concerning person with: $value") *>
|
||||||
ctx.store.transact(RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value)))
|
ctx.store.transact(
|
||||||
|
RItem.updateConcPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||||
|
)
|
||||||
case MetaProposalType.CorrPerson =>
|
case MetaProposalType.CorrPerson =>
|
||||||
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
|
ctx.logger.debug(s"Updating item correspondent person with: $value") *>
|
||||||
ctx.store.transact(RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value)))
|
ctx.store.transact(
|
||||||
|
RItem.updateCorrPerson(itemId, ctx.args.meta.collective, Some(value))
|
||||||
|
)
|
||||||
case MetaProposalType.ConcEquip =>
|
case MetaProposalType.ConcEquip =>
|
||||||
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
|
ctx.logger.debug(s"Updating item concerning equipment with: $value") *>
|
||||||
ctx.store.transact(RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)))
|
ctx.store.transact(
|
||||||
|
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
|
||||||
|
)
|
||||||
case MetaProposalType.DocDate =>
|
case MetaProposalType.DocDate =>
|
||||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||||
case MetaProposalType.DueDate =>
|
case MetaProposalType.DueDate =>
|
||||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
MetaProposal.parseDate(value) match {
|
||||||
|
case Some(ld) =>
|
||||||
|
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
|
||||||
|
ctx.logger.debug(s"Updating item due-date suggestion ${value.id}") *>
|
||||||
|
ctx.store.transact(
|
||||||
|
RItem.updateDueDate(itemId, ctx.args.meta.collective, Some(ts))
|
||||||
|
)
|
||||||
|
case None =>
|
||||||
|
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
|
||||||
|
0.pure[F]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sealed trait Result {
|
sealed trait Result {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user