From 3e10e2175a1f651711669ea05a7ca73f638ed0a7 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Tue, 12 May 2020 01:13:22 +0200 Subject: [PATCH 1/3] Sort by weights better and save them --- .../scala/docspell/common/MetaProposal.scala | 56 ++++++++++++++-- .../docspell/common/MetaProposalList.scala | 15 +++++ .../common/MetaProposalListTest.scala | 67 +++++++++++++++++++ .../docspell/joex/process/CreateItem.scala | 5 +- .../docspell/joex/process/EvalProposals.scala | 21 ++---- .../docspell/joex/process/ItemData.scala | 7 +- .../docspell/joex/process/LinkProposal.scala | 6 +- 7 files changed, 151 insertions(+), 26 deletions(-) create mode 100644 modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala diff --git a/modules/common/src/main/scala/docspell/common/MetaProposal.scala b/modules/common/src/main/scala/docspell/common/MetaProposal.scala index f7c34955..d3d40012 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposal.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposal.scala @@ -7,7 +7,19 @@ import docspell.common.MetaProposal.Candidate import io.circe._ import io.circe.generic.semiauto._ import java.time.LocalDate +import cats.kernel.Order +/** A proposed meta data to an item. + * + * There is only one value for each proposal type. The list of + * candidates is meant to be ordered from the best match to the + * lowest match. + * + * The candidate is already "resolved" against the database and + * contains a valid record (with its ID and a human readable name). + * Additionally it carries a set of "labels" (which may be empty) + * that are the source of this candidate. + */ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Candidate]) { def addIdRef(refs: Seq[Candidate]): MetaProposal = @@ -21,6 +33,12 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can def size: Int = values.size + + def addWeights(wf: Candidate => Double): MetaProposal = + MetaProposal(proposalType, values.map(c => c.withWeight(wf(c)))) + + def sortByWeight: MetaProposal = + MetaProposal(proposalType, values.sortBy(_.weight)(Candidate.weightOrder)) } object MetaProposal { @@ -31,23 +49,51 @@ object MetaProposal { def parseDate(date: Ident): Option[LocalDate] = Either.catchNonFatal(LocalDate.parse(date.id)).toOption - case class Candidate(ref: IdRef, origin: Set[NerLabel]) + case class Candidate(ref: IdRef, origin: Set[NerLabel], weight: Option[Double] = None) { + def withWeight(w: Double): Candidate = + copy(weight = Some(w)) + } + object Candidate { implicit val jsonEncoder: Encoder[Candidate] = deriveEncoder[Candidate] implicit val jsonDecoder: Decoder[Candidate] = deriveDecoder[Candidate] + + /** This deviates from standard order to sort None at last. + */ + val weightOrder: Order[Option[Double]] = new Order[Option[Double]] { + def compare(x: Option[Double], y: Option[Double]) = + (x, y) match { + case (None, None) => 0 + case (None, _) => 1 + case (_, None) => -1 + case (Some(x), Some(y)) => Order[Double].compare(x, y) + } + } } + /** Merges candidates with same `IdRef' values and concatenates their + * respective labels. The candidate order is preserved. + */ def flatten(s: NonEmptyList[Candidate]): NonEmptyList[Candidate] = { - def append(list: List[Candidate]): Candidate = - list.reduce((l0, l1) => l0.copy(origin = l0.origin ++ l1.origin)) - val grouped = s.toList.groupBy(_.ref.id) - NonEmptyList.fromListUnsafe(grouped.values.toList.map(append)) + def mergeInto( + res: NonEmptyList[Candidate], + el: Candidate + ): NonEmptyList[Candidate] = { + val l = res.map(c => + if (c.ref.id == el.ref.id) c.copy(origin = c.origin ++ el.origin) else c + ) + if (l == res) l :+ el + else l + } + val init = NonEmptyList.of(s.head) + s.tail.foldLeft(init)(mergeInto) } implicit val jsonDecoder: Decoder[MetaProposal] = deriveDecoder[MetaProposal] implicit val jsonEncoder: Encoder[MetaProposal] = deriveEncoder[MetaProposal] + } diff --git a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala index 79473e1b..d72f5f85 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala @@ -6,6 +6,10 @@ import docspell.common.MetaProposal.Candidate import io.circe._ import io.circe.generic.semiauto._ +/** A list of proposals for meta data to an item. + * + * The list usually keeps only one value for each `MetaProposalType'. + */ case class MetaProposalList private (proposals: List[MetaProposal]) { def isEmpty: Boolean = proposals.isEmpty @@ -31,6 +35,11 @@ case class MetaProposalList private (proposals: List[MetaProposal]) { def find(mpt: MetaProposalType): Option[MetaProposal] = proposals.find(_.proposalType == mpt) + def change(f: MetaProposal => MetaProposal): MetaProposalList = + new MetaProposalList(proposals.map(f)) + + def sortByWeights: MetaProposalList = + change(_.sortByWeight) } object MetaProposalList { @@ -54,6 +63,12 @@ object MetaProposalList { def fromMap(m: Map[MetaProposalType, MetaProposal]): MetaProposalList = new MetaProposalList(m.toList.map({ case (k, v) => v.copy(proposalType = k) })) + /** Flattens the given list of meta-proposals into a single list, + * where each meta-proposal type exists at most once. Candidates to + * equal proposal-types are merged together. The candidate's order + * is preserved and candidates of proposals are appended as given + * by the order of the given `seq'. + */ def flatten(ml: Seq[MetaProposalList]): MetaProposalList = { val init: Map[MetaProposalType, MetaProposal] = Map.empty diff --git a/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala b/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala new file mode 100644 index 00000000..a8ffde51 --- /dev/null +++ b/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala @@ -0,0 +1,67 @@ +package docspell.common + +import minitest._ +import cats.data.NonEmptyList +import docspell.common.MetaProposal.Candidate + +object MetaProposalListTest extends SimpleTestSuite { + + test("flatten retains order of candidates") { + val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty) + val mpl1 = MetaProposalList.of( + MetaProposal( + MetaProposalType.CorrOrg, + NonEmptyList.of(cand1) + ) + ) + val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty) + val mpl2 = MetaProposalList.of( + MetaProposal( + MetaProposalType.CorrOrg, + NonEmptyList.of(cand2) + ) + ) + + val candidates1 = MetaProposalList + .flatten(Seq(mpl1, mpl2)) + .find(MetaProposalType.CorrOrg) + .get + .values + assertEquals(candidates1.head, cand1) + assertEquals(candidates1.tail.head, cand2) + + val candidates2 = MetaProposalList + .flatten(Seq(mpl2, mpl1)) + .find(MetaProposalType.CorrOrg) + .get + .values + assertEquals(candidates2.head, cand2) + assertEquals(candidates2.tail.head, cand1) + } + + test("sort by weights") { + val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1)) + val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty, Some(0.05)) + val mpl = MetaProposalList.of( + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) + ).sortByWeights + + val candidates = mpl.find(MetaProposalType.CorrOrg).get.values + assertEquals(candidates.head, cand2) + assertEquals(candidates.tail.head, cand1) + } + + test("sort by weights: unset is last") { + val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1)) + val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty) + val mpl = MetaProposalList.of( + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) + ).sortByWeights + + val candidates = mpl.find(MetaProposalType.CorrOrg).get.values + assertEquals(candidates.head, cand1) + assertEquals(candidates.tail.head, cand2) + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 798fb6f5..595c0b1b 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -73,7 +73,8 @@ object CreateItem { fm, Vector.empty, Vector.empty, - fm.map(a => a.id -> a.fileId).toMap + fm.map(a => a.id -> a.fileId).toMap, + MetaProposalList.empty ) } @@ -110,7 +111,7 @@ object CreateItem { .map(originFileTuple) .toMap } yield cand.headOption.map(ri => - ItemData(ri, rms, Vector.empty, Vector.empty, origMap) + ItemData(ri, rms, Vector.empty, Vector.empty, origMap, MetaProposalList.empty) ) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala index 6e492a07..cf5ecb1c 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/EvalProposals.scala @@ -7,7 +7,8 @@ import docspell.common._ import docspell.joex.scheduler.Task import docspell.store.records.RAttachmentMeta -/** Reorders the proposals to put most probable fits first. +/** Calculate weights for candidates that adds the most likely + * candidate a lower number. */ object EvalProposals { @@ -16,24 +17,14 @@ object EvalProposals { Timestamp .current[F] .map { now => - val metas = data.metas.map(reorderCandidates(now.toUtcDate)) + val metas = data.metas.map(calcCandidateWeight(now.toUtcDate)) data.copy(metas = metas) } } - def reorderCandidates(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = { - val list = rm.proposals.getTypes.toList - .map(mpt => - rm.proposals.find(mpt) match { - case Some(mp) => - val v = mp.values.sortBy(weight(rm, mp, now)) - Some(mp.copy(values = v)) - case None => - None - } - ) - - rm.copy(proposals = MetaProposalList(list.flatMap(identity))) + def calcCandidateWeight(now: LocalDate)(rm: RAttachmentMeta): RAttachmentMeta = { + val list = rm.proposals.change(mp => mp.addWeights(weight(rm, mp, now))) + rm.copy(proposals = list.sortByWeights) } def weight(rm: RAttachmentMeta, mp: MetaProposal, ref: LocalDate)( diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 48dada99..7d8e7729 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -1,15 +1,18 @@ package docspell.joex.process -import docspell.common.{Ident, NerDateLabel, NerLabel} +import docspell.common._ import docspell.joex.process.ItemData.AttachmentDates import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} +/** Data that is carried across all processing tasks. + */ case class ItemData( item: RItem, attachments: Vector[RAttachment], metas: Vector[RAttachmentMeta], dateLabels: Vector[AttachmentDates], - originFile: Map[Ident, Ident] //maps RAttachment.id -> FileMeta.id + originFile: Map[Ident, Ident], // maps RAttachment.id -> FileMeta.id + givenMeta: MetaProposalList // given meta data not associated to a specific attachment ) { def findMeta(attachId: Ident): Option[RAttachmentMeta] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index 9416b386..ca875215 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -10,7 +10,9 @@ object LinkProposal { def apply[F[_]: Sync](data: ItemData): Task[F, ProcessItemArgs, ItemData] = Task { ctx => - val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)) + // sort by weight; order of equal weights is not important, just + // choose one others are then suggestions + val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)).sortByWeights ctx.logger.info(s"Starting linking proposals") *> MetaProposalType.all @@ -24,7 +26,7 @@ object LinkProposal { proposalList: MetaProposalList, ctx: Context[F, ProcessItemArgs] )(mpt: MetaProposalType): F[Result] = - proposalList.find(mpt) match { + data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match { case None => Result.noneFound(mpt).pure[F] case Some(a) if a.isSingleValue => From 5e6ce1737c7dc90de2364688d142ac592ef8059e Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 16 May 2020 03:00:29 +0200 Subject: [PATCH 2/3] Change recognizing dates with short years Short years are now added to the current centure (2000) such that date strings like 12/26/11 result in 12/26/2011 and not 12/26/1911. --- .../src/main/scala/docspell/analysis/date/DateFind.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala index f894e265..8a97c3e1 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala @@ -34,7 +34,7 @@ object DateFind { private case class SimpleDate(year: Int, month: Int, day: Int) { def toLocalDate: LocalDate = - LocalDate.of(if (year < 100) 1900 + year else year, month, day) + LocalDate.of(if (year < 100) 2000 + year else year, month, day) } private object SimpleDate { From d65c1e0d36cd8fa9c8ae0a97f1ad65b79ef86b99 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 16 May 2020 14:18:59 +0200 Subject: [PATCH 3/3] Use date from e-mails to set item date --- .../scala/docspell/common/MetaProposal.scala | 11 ++++++ .../docspell/common/MetaProposalList.scala | 3 ++ .../common/MetaProposalListTest.scala | 20 ++++++---- .../joex/process/ExtractArchive.scala | 38 +++++++++++++++---- .../docspell/joex/process/ItemData.scala | 10 +++++ .../docspell/joex/process/LinkProposal.scala | 21 ++++++++-- 6 files changed, 85 insertions(+), 18 deletions(-) diff --git a/modules/common/src/main/scala/docspell/common/MetaProposal.scala b/modules/common/src/main/scala/docspell/common/MetaProposal.scala index d3d40012..d1e236ec 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposal.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposal.scala @@ -43,6 +43,17 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can object MetaProposal { + def apply(pt: MetaProposalType, v0: Candidate, vm: Candidate*): MetaProposal = + MetaProposal(pt, NonEmptyList.of(v0, vm: _*)) + + def docDate(ts: Timestamp, origin: Option[NerLabel]): MetaProposal = { + val label = ts.toUtcDate.toString + MetaProposal( + MetaProposalType.DocDate, + Candidate(IdRef(Ident.unsafe(label), label), origin.toSet) + ) + } + def parseDate(cand: Candidate): Option[LocalDate] = parseDate(cand.ref.id) diff --git a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala index d72f5f85..4b62b686 100644 --- a/modules/common/src/main/scala/docspell/common/MetaProposalList.scala +++ b/modules/common/src/main/scala/docspell/common/MetaProposalList.scala @@ -38,6 +38,9 @@ case class MetaProposalList private (proposals: List[MetaProposal]) { def change(f: MetaProposal => MetaProposal): MetaProposalList = new MetaProposalList(proposals.map(f)) + def filter(f: MetaProposal => Boolean): MetaProposalList = + new MetaProposalList(proposals.filter(f)) + def sortByWeights: MetaProposalList = change(_.sortByWeight) } diff --git a/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala b/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala index a8ffde51..4b652f62 100644 --- a/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala +++ b/modules/common/src/test/scala/docspell/common/MetaProposalListTest.scala @@ -42,10 +42,12 @@ object MetaProposalListTest extends SimpleTestSuite { test("sort by weights") { val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1)) val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty, Some(0.05)) - val mpl = MetaProposalList.of( - MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), - MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) - ).sortByWeights + val mpl = MetaProposalList + .of( + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) + ) + .sortByWeights val candidates = mpl.find(MetaProposalType.CorrOrg).get.values assertEquals(candidates.head, cand2) @@ -55,10 +57,12 @@ object MetaProposalListTest extends SimpleTestSuite { test("sort by weights: unset is last") { val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1)) val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty) - val mpl = MetaProposalList.of( - MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), - MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) - ).sortByWeights + val mpl = MetaProposalList + .of( + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)), + MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2)) + ) + .sortByWeights val candidates = mpl.find(MetaProposalType.CorrOrg).get.values assertEquals(candidates.head, cand1) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 4429203b..ddb184ab 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -12,6 +12,7 @@ import docspell.joex.scheduler._ import docspell.store.records._ import docspell.files.Zip import cats.kernel.Monoid +import emil.Mail /** Goes through all attachments and extracts archive files, like zip * files. The process is recursive, until all archives have been @@ -56,7 +57,8 @@ object ExtractArchive { _ <- naa.traverse(storeArchive(ctx)) } yield naa.headOption -> item.copy( attachments = nra, - originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap + originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap, + givenMeta = item.givenMeta.fillEmptyFrom(Monoid[Extracted].combineAll(ras).meta) ) } @@ -139,15 +141,27 @@ object ExtractArchive { .through(ReadMail.bytesToMail[F](ctx.logger)) .flatMap { mail => val mId = mail.header.messageId + val givenMeta = + for { + _ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}") + s <- Sync[F].delay(extractMailMeta(mail)) + } yield s + ReadMail .mailToEntries(ctx.logger)(mail) - .flatMap(handleEntry(ctx, ra, archive, mId)) + .flatMap(handleEntry(ctx, ra, archive, mId)) ++ Stream.eval(givenMeta) } .foldMonoid .compile .lastOrError } + def extractMailMeta[F[_]](mail: Mail[F]): Extracted = + mail.header.date + .map(Timestamp.apply) + .map(ts => Extracted.empty.setMeta(MetaProposal.docDate(ts, None))) + .getOrElse(Extracted.empty) + def handleEntry[F[_]: Sync]( ctx: Context[F, _], ra: RAttachment, @@ -187,18 +201,28 @@ object ExtractArchive { def storeArchive[F[_]: Sync](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] = ctx.store.transact(RAttachmentArchive.insert(aa)) - case class Extracted(files: Vector[RAttachment], archives: Vector[RAttachmentArchive]) { + case class Extracted( + files: Vector[RAttachment], + archives: Vector[RAttachmentArchive], + meta: MetaProposalList + ) { def ++(e: Extracted) = - Extracted(files ++ e.files, archives ++ e.archives) + Extracted(files ++ e.files, archives ++ e.archives, meta.fillEmptyFrom(e.meta)) + + def setMeta(m: MetaProposal): Extracted = + setMeta(MetaProposalList.of(m)) + + def setMeta(ml: MetaProposalList): Extracted = + Extracted(files, archives, meta.fillEmptyFrom(ml)) } object Extracted { - val empty = Extracted(Vector.empty, Vector.empty) + val empty = Extracted(Vector.empty, Vector.empty, MetaProposalList.empty) def noArchive(ra: RAttachment): Extracted = - Extracted(Vector(ra), Vector.empty) + Extracted(Vector(ra), Vector.empty, MetaProposalList.empty) def of(ra: RAttachment, aa: RAttachmentArchive): Extracted = - Extracted(Vector(ra), Vector(aa)) + Extracted(Vector(ra), Vector(aa), MetaProposalList.empty) implicit val extractedMonoid: Monoid[Extracted] = Monoid.instance(empty, _ ++ _) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 7d8e7729..46ef9f8c 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -5,6 +5,16 @@ import docspell.joex.process.ItemData.AttachmentDates import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} /** Data that is carried across all processing tasks. + * + * @param item the stored item record + * @param attachments the attachments belonging to the item + * @param metas the meta data to each attachment; depending on the + * state of processing, this may be empty + * @param dateLabels a separate list of found dates + * @param originFile a mapping from an attachment id to a filemeta-id + * containng the source or origin file + * @param givenMeta meta data to this item that was not "guessed" + * from an attachment but given and thus is always correct */ case class ItemData( item: RItem, diff --git a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala index ca875215..7552b8db 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/LinkProposal.scala @@ -12,7 +12,11 @@ object LinkProposal { Task { ctx => // sort by weight; order of equal weights is not important, just // choose one others are then suggestions - val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)).sortByWeights + // doc-date is only set when given explicitely, not from "guessing" + val proposals = MetaProposalList + .flatten(data.metas.map(_.proposals)) + .filter(_.proposalType != MetaProposalType.DocDate) + .sortByWeights ctx.logger.info(s"Starting linking proposals") *> MetaProposalType.all @@ -28,7 +32,8 @@ object LinkProposal { )(mpt: MetaProposalType): F[Result] = data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match { case None => - Result.noneFound(mpt).pure[F] + ctx.logger.debug(s"No value for $mpt") *> + Result.noneFound(mpt).pure[F] case Some(a) if a.isSingleValue => ctx.logger.info(s"Found one candidate for ${a.proposalType}") *> setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ => @@ -71,7 +76,17 @@ object LinkProposal { RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value)) ) case MetaProposalType.DocDate => - ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0) + MetaProposal.parseDate(value) match { + case Some(ld) => + val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC)) + ctx.logger.debug(s"Updating item date ${value.id}") *> + ctx.store.transact( + RItem.updateDate(itemId, ctx.args.meta.collective, Some(ts)) + ) + case None => + ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *> + 0.pure[F] + } case MetaProposalType.DueDate => MetaProposal.parseDate(value) match { case Some(ld) =>