Sort by weights better and save them

This commit is contained in:
Eike Kettner
2020-05-12 01:13:22 +02:00
parent 5d6658770e
commit 3e10e2175a
7 changed files with 151 additions and 26 deletions

View File

@ -73,7 +73,8 @@ object CreateItem {
fm,
Vector.empty,
Vector.empty,
fm.map(a => a.id -> a.fileId).toMap
fm.map(a => a.id -> a.fileId).toMap,
MetaProposalList.empty
)
}
@ -110,7 +111,7 @@ object CreateItem {
.map(originFileTuple)
.toMap
} yield cand.headOption.map(ri =>
ItemData(ri, rms, Vector.empty, Vector.empty, origMap)
ItemData(ri, rms, Vector.empty, Vector.empty, origMap, MetaProposalList.empty)
)
}

View File

@ -7,7 +7,8 @@ import docspell.common._
import docspell.joex.scheduler.Task
import docspell.store.records.RAttachmentMeta
/** Reorders the proposals to put most probable fits first.
/** Calculate weights for candidates that adds the most likely
* candidate a lower number.
*/
object EvalProposals {
@ -16,24 +17,14 @@ object EvalProposals {
Timestamp
.current[F]
.map { now =>
val metas = data.metas.map(reorderCandidates(now.toUtcDate))
val metas = data.metas.map(calcCandidateWeight(now.toUtcDate))
data.copy(metas = metas)
}
}
def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
val list = rm.proposals.getTypes.toList
.map(mpt =>
rm.proposals.find(mpt) match {
case Some(mp) =>
val v = mp.values.sortBy(weight(rm, mp, now))
Some(mp.copy(values = v))
case None =>
None
}
)
rm.copy(proposals = MetaProposalList(list.flatMap(identity)))
def calcCandidateWeight(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = {
val list = rm.proposals.change(mp => mp.addWeights(weight(rm, mp, now)))
rm.copy(proposals = list.sortByWeights)
}
def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)(

View File

@ -1,15 +1,18 @@
package docspell.joex.process
import docspell.common.{Ident, NerDateLabel, NerLabel}
import docspell.common._
import docspell.joex.process.ItemData.AttachmentDates
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
/** Data that is carried across all processing tasks.
*/
case class ItemData(
item: RItem,
attachments: Vector[RAttachment],
metas: Vector[RAttachmentMeta],
dateLabels: Vector[AttachmentDates],
originFile: Map[Ident, Ident] //maps RAttachment.id -> FileMeta.id
originFile: Map[Ident, Ident], // maps RAttachment.id -> FileMeta.id
givenMeta: MetaProposalList // given meta data not associated to a specific attachment
) {
def findMeta(attachId: Ident): Option[RAttachmentMeta] =

View File

@ -10,7 +10,9 @@ object LinkProposal {
def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
val proposals = MetaProposalList.flatten(data.metas.map(_.proposals))
// sort by weight; order of equal weights is not important, just
// choose one others are then suggestions
val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)).sortByWeights
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all
@ -24,7 +26,7 @@ object LinkProposal {
proposalList: MetaProposalList,
ctx: Context[F, ProcessItemArgs]
)(mpt: MetaProposalType): F[Result] =
proposalList.find(mpt) match {
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
case None =>
Result.noneFound(mpt).pure[F]
case Some(a) if a.isSingleValue =>