mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 10:59:33 +00:00
Use date from e-mails to set item date
This commit is contained in:
parent
5e6ce1737c
commit
d65c1e0d36
@ -43,6 +43,17 @@ case class MetaProposal(proposalType: MetaProposalType, values: NonEmptyList[Can
|
||||
|
||||
object MetaProposal {
|
||||
|
||||
def apply(pt: MetaProposalType, v0: Candidate, vm: Candidate*): MetaProposal =
|
||||
MetaProposal(pt, NonEmptyList.of(v0, vm: _*))
|
||||
|
||||
def docDate(ts: Timestamp, origin: Option[NerLabel]): MetaProposal = {
|
||||
val label = ts.toUtcDate.toString
|
||||
MetaProposal(
|
||||
MetaProposalType.DocDate,
|
||||
Candidate(IdRef(Ident.unsafe(label), label), origin.toSet)
|
||||
)
|
||||
}
|
||||
|
||||
def parseDate(cand: Candidate): Option[LocalDate] =
|
||||
parseDate(cand.ref.id)
|
||||
|
||||
|
@ -38,6 +38,9 @@ case class MetaProposalList private (proposals: List[MetaProposal]) {
|
||||
def change(f: MetaProposal => MetaProposal): MetaProposalList =
|
||||
new MetaProposalList(proposals.map(f))
|
||||
|
||||
def filter(f: MetaProposal => Boolean): MetaProposalList =
|
||||
new MetaProposalList(proposals.filter(f))
|
||||
|
||||
def sortByWeights: MetaProposalList =
|
||||
change(_.sortByWeight)
|
||||
}
|
||||
|
@ -42,10 +42,12 @@ object MetaProposalListTest extends SimpleTestSuite {
|
||||
test("sort by weights") {
|
||||
val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1))
|
||||
val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty, Some(0.05))
|
||||
val mpl = MetaProposalList.of(
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)),
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2))
|
||||
).sortByWeights
|
||||
val mpl = MetaProposalList
|
||||
.of(
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)),
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2))
|
||||
)
|
||||
.sortByWeights
|
||||
|
||||
val candidates = mpl.find(MetaProposalType.CorrOrg).get.values
|
||||
assertEquals(candidates.head, cand2)
|
||||
@ -55,10 +57,12 @@ object MetaProposalListTest extends SimpleTestSuite {
|
||||
test("sort by weights: unset is last") {
|
||||
val cand1 = Candidate(IdRef(Ident.unsafe("123"), "name"), Set.empty, Some(0.1))
|
||||
val cand2 = Candidate(IdRef(Ident.unsafe("456"), "name"), Set.empty)
|
||||
val mpl = MetaProposalList.of(
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)),
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2))
|
||||
).sortByWeights
|
||||
val mpl = MetaProposalList
|
||||
.of(
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand1)),
|
||||
MetaProposal(MetaProposalType.CorrOrg, NonEmptyList.of(cand2))
|
||||
)
|
||||
.sortByWeights
|
||||
|
||||
val candidates = mpl.find(MetaProposalType.CorrOrg).get.values
|
||||
assertEquals(candidates.head, cand1)
|
||||
|
@ -12,6 +12,7 @@ import docspell.joex.scheduler._
|
||||
import docspell.store.records._
|
||||
import docspell.files.Zip
|
||||
import cats.kernel.Monoid
|
||||
import emil.Mail
|
||||
|
||||
/** Goes through all attachments and extracts archive files, like zip
|
||||
* files. The process is recursive, until all archives have been
|
||||
@ -56,7 +57,8 @@ object ExtractArchive {
|
||||
_ <- naa.traverse(storeArchive(ctx))
|
||||
} yield naa.headOption -> item.copy(
|
||||
attachments = nra,
|
||||
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap
|
||||
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap,
|
||||
givenMeta = item.givenMeta.fillEmptyFrom(Monoid[Extracted].combineAll(ras).meta)
|
||||
)
|
||||
}
|
||||
|
||||
@ -139,15 +141,27 @@ object ExtractArchive {
|
||||
.through(ReadMail.bytesToMail[F](ctx.logger))
|
||||
.flatMap { mail =>
|
||||
val mId = mail.header.messageId
|
||||
val givenMeta =
|
||||
for {
|
||||
_ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
|
||||
s <- Sync[F].delay(extractMailMeta(mail))
|
||||
} yield s
|
||||
|
||||
ReadMail
|
||||
.mailToEntries(ctx.logger)(mail)
|
||||
.flatMap(handleEntry(ctx, ra, archive, mId))
|
||||
.flatMap(handleEntry(ctx, ra, archive, mId)) ++ Stream.eval(givenMeta)
|
||||
}
|
||||
.foldMonoid
|
||||
.compile
|
||||
.lastOrError
|
||||
}
|
||||
|
||||
def extractMailMeta[F[_]](mail: Mail[F]): Extracted =
|
||||
mail.header.date
|
||||
.map(Timestamp.apply)
|
||||
.map(ts => Extracted.empty.setMeta(MetaProposal.docDate(ts, None)))
|
||||
.getOrElse(Extracted.empty)
|
||||
|
||||
def handleEntry[F[_]: Sync](
|
||||
ctx: Context[F, _],
|
||||
ra: RAttachment,
|
||||
@ -187,18 +201,28 @@ object ExtractArchive {
|
||||
def storeArchive[F[_]: Sync](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] =
|
||||
ctx.store.transact(RAttachmentArchive.insert(aa))
|
||||
|
||||
case class Extracted(files: Vector[RAttachment], archives: Vector[RAttachmentArchive]) {
|
||||
case class Extracted(
|
||||
files: Vector[RAttachment],
|
||||
archives: Vector[RAttachmentArchive],
|
||||
meta: MetaProposalList
|
||||
) {
|
||||
def ++(e: Extracted) =
|
||||
Extracted(files ++ e.files, archives ++ e.archives)
|
||||
Extracted(files ++ e.files, archives ++ e.archives, meta.fillEmptyFrom(e.meta))
|
||||
|
||||
def setMeta(m: MetaProposal): Extracted =
|
||||
setMeta(MetaProposalList.of(m))
|
||||
|
||||
def setMeta(ml: MetaProposalList): Extracted =
|
||||
Extracted(files, archives, meta.fillEmptyFrom(ml))
|
||||
}
|
||||
object Extracted {
|
||||
val empty = Extracted(Vector.empty, Vector.empty)
|
||||
val empty = Extracted(Vector.empty, Vector.empty, MetaProposalList.empty)
|
||||
|
||||
def noArchive(ra: RAttachment): Extracted =
|
||||
Extracted(Vector(ra), Vector.empty)
|
||||
Extracted(Vector(ra), Vector.empty, MetaProposalList.empty)
|
||||
|
||||
def of(ra: RAttachment, aa: RAttachmentArchive): Extracted =
|
||||
Extracted(Vector(ra), Vector(aa))
|
||||
Extracted(Vector(ra), Vector(aa), MetaProposalList.empty)
|
||||
|
||||
implicit val extractedMonoid: Monoid[Extracted] =
|
||||
Monoid.instance(empty, _ ++ _)
|
||||
|
@ -5,6 +5,16 @@ import docspell.joex.process.ItemData.AttachmentDates
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
|
||||
|
||||
/** Data that is carried across all processing tasks.
|
||||
*
|
||||
* @param item the stored item record
|
||||
* @param attachments the attachments belonging to the item
|
||||
* @param metas the meta data to each attachment; depending on the
|
||||
* state of processing, this may be empty
|
||||
* @param dateLabels a separate list of found dates
|
||||
* @param originFile a mapping from an attachment id to a filemeta-id
|
||||
* containng the source or origin file
|
||||
* @param givenMeta meta data to this item that was not "guessed"
|
||||
* from an attachment but given and thus is always correct
|
||||
*/
|
||||
case class ItemData(
|
||||
item: RItem,
|
||||
|
@ -12,7 +12,11 @@ object LinkProposal {
|
||||
Task { ctx =>
|
||||
// sort by weight; order of equal weights is not important, just
|
||||
// choose one others are then suggestions
|
||||
val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)).sortByWeights
|
||||
// doc-date is only set when given explicitely, not from "guessing"
|
||||
val proposals = MetaProposalList
|
||||
.flatten(data.metas.map(_.proposals))
|
||||
.filter(_.proposalType != MetaProposalType.DocDate)
|
||||
.sortByWeights
|
||||
|
||||
ctx.logger.info(s"Starting linking proposals") *>
|
||||
MetaProposalType.all
|
||||
@ -28,7 +32,8 @@ object LinkProposal {
|
||||
)(mpt: MetaProposalType): F[Result] =
|
||||
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
|
||||
case None =>
|
||||
Result.noneFound(mpt).pure[F]
|
||||
ctx.logger.debug(s"No value for $mpt") *>
|
||||
Result.noneFound(mpt).pure[F]
|
||||
case Some(a) if a.isSingleValue =>
|
||||
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
|
||||
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ =>
|
||||
@ -71,7 +76,17 @@ object LinkProposal {
|
||||
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
|
||||
)
|
||||
case MetaProposalType.DocDate =>
|
||||
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
|
||||
MetaProposal.parseDate(value) match {
|
||||
case Some(ld) =>
|
||||
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
|
||||
ctx.logger.debug(s"Updating item date ${value.id}") *>
|
||||
ctx.store.transact(
|
||||
RItem.updateDate(itemId, ctx.args.meta.collective, Some(ts))
|
||||
)
|
||||
case None =>
|
||||
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
|
||||
0.pure[F]
|
||||
}
|
||||
case MetaProposalType.DueDate =>
|
||||
MetaProposal.parseDate(value) match {
|
||||
case Some(ld) =>
|
||||
|
Loading…
x
Reference in New Issue
Block a user