Use date from e-mails to set item date

This commit is contained in:
Eike Kettner
2020-05-16 14:18:59 +02:00
parent 5e6ce1737c
commit d65c1e0d36
6 changed files with 85 additions and 18 deletions

View File

@ -12,6 +12,7 @@ import docspell.joex.scheduler._
import docspell.store.records._
import docspell.files.Zip
import cats.kernel.Monoid
import emil.Mail
/** Goes through all attachments and extracts archive files, like zip
* files. The process is recursive, until all archives have been
@ -56,7 +57,8 @@ object ExtractArchive {
_ <- naa.traverse(storeArchive(ctx))
} yield naa.headOption -> item.copy(
attachments = nra,
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap
originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap,
givenMeta = item.givenMeta.fillEmptyFrom(Monoid[Extracted].combineAll(ras).meta)
)
}
@ -139,15 +141,27 @@ object ExtractArchive {
.through(ReadMail.bytesToMail[F](ctx.logger))
.flatMap { mail =>
val mId = mail.header.messageId
val givenMeta =
for {
_ <- ctx.logger.debug(s"Use mail date for item date: ${mail.header.date}")
s <- Sync[F].delay(extractMailMeta(mail))
} yield s
ReadMail
.mailToEntries(ctx.logger)(mail)
.flatMap(handleEntry(ctx, ra, archive, mId))
.flatMap(handleEntry(ctx, ra, archive, mId)) ++ Stream.eval(givenMeta)
}
.foldMonoid
.compile
.lastOrError
}
def extractMailMeta[F[_]](mail: Mail[F]): Extracted =
mail.header.date
.map(Timestamp.apply)
.map(ts => Extracted.empty.setMeta(MetaProposal.docDate(ts, None)))
.getOrElse(Extracted.empty)
def handleEntry[F[_]: Sync](
ctx: Context[F, _],
ra: RAttachment,
@ -187,18 +201,28 @@ object ExtractArchive {
def storeArchive[F[_]: Sync](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] =
ctx.store.transact(RAttachmentArchive.insert(aa))
case class Extracted(files: Vector[RAttachment], archives: Vector[RAttachmentArchive]) {
case class Extracted(
files: Vector[RAttachment],
archives: Vector[RAttachmentArchive],
meta: MetaProposalList
) {
def ++(e: Extracted) =
Extracted(files ++ e.files, archives ++ e.archives)
Extracted(files ++ e.files, archives ++ e.archives, meta.fillEmptyFrom(e.meta))
def setMeta(m: MetaProposal): Extracted =
setMeta(MetaProposalList.of(m))
def setMeta(ml: MetaProposalList): Extracted =
Extracted(files, archives, meta.fillEmptyFrom(ml))
}
object Extracted {
val empty = Extracted(Vector.empty, Vector.empty)
val empty = Extracted(Vector.empty, Vector.empty, MetaProposalList.empty)
def noArchive(ra: RAttachment): Extracted =
Extracted(Vector(ra), Vector.empty)
Extracted(Vector(ra), Vector.empty, MetaProposalList.empty)
def of(ra: RAttachment, aa: RAttachmentArchive): Extracted =
Extracted(Vector(ra), Vector(aa))
Extracted(Vector(ra), Vector(aa), MetaProposalList.empty)
implicit val extractedMonoid: Monoid[Extracted] =
Monoid.instance(empty, _ ++ _)

View File

@ -5,6 +5,16 @@ import docspell.joex.process.ItemData.AttachmentDates
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
/** Data that is carried across all processing tasks.
*
* @param item the stored item record
* @param attachments the attachments belonging to the item
* @param metas the meta data to each attachment; depending on the
* state of processing, this may be empty
* @param dateLabels a separate list of found dates
* @param originFile a mapping from an attachment id to a filemeta-id
* containng the source or origin file
* @param givenMeta meta data to this item that was not "guessed"
* from an attachment but given and thus is always correct
*/
case class ItemData(
item: RItem,

View File

@ -12,7 +12,11 @@ object LinkProposal {
Task { ctx =>
// sort by weight; order of equal weights is not important, just
// choose one others are then suggestions
val proposals = MetaProposalList.flatten(data.metas.map(_.proposals)).sortByWeights
// doc-date is only set when given explicitely, not from "guessing"
val proposals = MetaProposalList
.flatten(data.metas.map(_.proposals))
.filter(_.proposalType != MetaProposalType.DocDate)
.sortByWeights
ctx.logger.info(s"Starting linking proposals") *>
MetaProposalType.all
@ -28,7 +32,8 @@ object LinkProposal {
)(mpt: MetaProposalType): F[Result] =
data.givenMeta.find(mpt).orElse(proposalList.find(mpt)) match {
case None =>
Result.noneFound(mpt).pure[F]
ctx.logger.debug(s"No value for $mpt") *>
Result.noneFound(mpt).pure[F]
case Some(a) if a.isSingleValue =>
ctx.logger.info(s"Found one candidate for ${a.proposalType}") *>
setItemMeta(data.item.id, ctx, a.proposalType, a.values.head.ref.id).map(_ =>
@ -71,7 +76,17 @@ object LinkProposal {
RItem.updateConcEquip(itemId, ctx.args.meta.collective, Some(value))
)
case MetaProposalType.DocDate =>
ctx.logger.debug(s"Not linking document date suggestion ${value.id}").map(_ => 0)
MetaProposal.parseDate(value) match {
case Some(ld) =>
val ts = Timestamp.from(ld.atStartOfDay(Timestamp.UTC))
ctx.logger.debug(s"Updating item date ${value.id}") *>
ctx.store.transact(
RItem.updateDate(itemId, ctx.args.meta.collective, Some(ts))
)
case None =>
ctx.logger.info(s"Cannot read value '${value.id}' into a date.") *>
0.pure[F]
}
case MetaProposalType.DueDate =>
MetaProposal.parseDate(value) match {
case Some(ld) =>