Merge pull request #2380 from eikek/custom-processing-data

Custom processing data
This commit is contained in:
mergify[bot]
2023-11-15 19:28:03 +00:00
committed by GitHub
16 changed files with 62 additions and 25 deletions

View File

@ -15,10 +15,10 @@ import docspell.common.ProcessItemArgs.ProcessMeta
import docspell.common.{CollectiveId, Ident, Language} import docspell.common.{CollectiveId, Ident, Language}
import docspell.logging.Logger import docspell.logging.Logger
import io.circe.Codec
import io.circe.generic.extras.Configuration import io.circe.generic.extras.Configuration
import io.circe.generic.extras.semiauto.deriveConfiguredCodec import io.circe.generic.extras.semiauto.deriveConfiguredCodec
import io.circe.generic.semiauto.deriveCodec import io.circe.generic.semiauto.deriveCodec
import io.circe.{Codec, Json}
case class NewFile(metadata: Meta = Meta.empty, file: String) { case class NewFile(metadata: Meta = Meta.empty, file: String) {
@ -41,7 +41,8 @@ object NewFile {
case class Meta( case class Meta(
language: Option[Language], language: Option[Language],
skipDuplicate: Option[Boolean], skipDuplicate: Option[Boolean],
attachmentsOnly: Option[Boolean] attachmentsOnly: Option[Boolean],
customData: Option[Json]
) { ) {
def toProcessMeta( def toProcessMeta(
@ -62,12 +63,13 @@ object NewFile {
fileFilter = None, fileFilter = None,
tags = None, tags = None,
reprocess = false, reprocess = false,
attachmentsOnly = attachmentsOnly attachmentsOnly = attachmentsOnly,
customData = customData
) )
} }
object Meta { object Meta {
val empty = Meta(None, None, None) val empty = Meta(None, None, None, None)
implicit val jsonCodec: Codec[Meta] = deriveCodec implicit val jsonCodec: Codec[Meta] = deriveCodec
} }

View File

@ -15,7 +15,7 @@ import docspell.common._
import docspell.logging.Logger import docspell.logging.Logger
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder} import io.circe.{Decoder, Encoder, Json}
case class NewItem(metadata: Option[Meta], files: List[String]) { case class NewItem(metadata: Option[Meta], files: List[String]) {
@ -25,7 +25,7 @@ case class NewItem(metadata: Option[Meta], files: List[String]) {
sourceAbbrev: String sourceAbbrev: String
): ProcessItemArgs.ProcessMeta = ): ProcessItemArgs.ProcessMeta =
metadata metadata
.getOrElse(Meta(None, None, None, None, None, None, None)) .getOrElse(Meta.empty)
.toProcessArgs(cid, collLang, sourceAbbrev) .toProcessArgs(cid, collLang, sourceAbbrev)
def resolveFiles[F[_]: Files: Monad]( def resolveFiles[F[_]: Files: Monad](
@ -58,7 +58,8 @@ object NewItem {
source: Option[String], source: Option[String],
skipDuplicate: Option[Boolean], skipDuplicate: Option[Boolean],
tags: Option[List[String]], tags: Option[List[String]],
attachmentsOnly: Option[Boolean] attachmentsOnly: Option[Boolean],
customData: Option[Json]
) { ) {
def toProcessArgs( def toProcessArgs(
@ -78,11 +79,14 @@ object NewItem {
fileFilter = None, fileFilter = None,
tags = tags, tags = tags,
reprocess = false, reprocess = false,
attachmentsOnly = attachmentsOnly attachmentsOnly = attachmentsOnly,
customData = customData
) )
} }
object Meta { object Meta {
val empty: Meta = Meta(None, None, None, None, None, None, None, None)
implicit val jsonEncoder: Encoder[Meta] = deriveEncoder implicit val jsonEncoder: Encoder[Meta] = deriveEncoder
implicit val jsonDecoder: Decoder[Meta] = deriveDecoder implicit val jsonDecoder: Decoder[Meta] = deriveDecoder
} }

View File

@ -19,6 +19,8 @@ import docspell.scheduler.{Job, JobStore}
import docspell.store.Store import docspell.store.Store
import docspell.store.records._ import docspell.store.records._
import io.circe.Json
trait OUpload[F[_]] { trait OUpload[F[_]] {
def submit( def submit(
@ -69,7 +71,8 @@ object OUpload {
tags: List[String], tags: List[String],
language: Option[Language], language: Option[Language],
attachmentsOnly: Option[Boolean], attachmentsOnly: Option[Boolean],
flattenArchives: Option[Boolean] flattenArchives: Option[Boolean],
customData: Option[Json]
) )
case class UploadData[F[_]]( case class UploadData[F[_]](
@ -157,7 +160,8 @@ object OUpload {
data.meta.fileFilter.some, data.meta.fileFilter.some,
data.meta.tags.some, data.meta.tags.some,
false, false,
data.meta.attachmentsOnly data.meta.attachmentsOnly,
data.meta.customData
) )
args = ProcessItemArgs(meta, files.toList) args = ProcessItemArgs(meta, files.toList)
jobs <- right( jobs <- right(

View File

@ -54,7 +54,8 @@ object ProcessItemArgs {
fileFilter: Option[Glob], fileFilter: Option[Glob],
tags: Option[List[String]], tags: Option[List[String]],
reprocess: Boolean, reprocess: Boolean,
attachmentsOnly: Option[Boolean] attachmentsOnly: Option[Boolean],
customData: Option[Json]
) )
object ProcessMeta { object ProcessMeta {

View File

@ -75,6 +75,7 @@ object ItemAddonTask extends AddonTaskExtension {
givenMeta = proposals, givenMeta = proposals,
tags = tags.map(_.name).toList, tags = tags.map(_.name).toList,
classifyProposals = MetaProposalList.empty, classifyProposals = MetaProposalList.empty,
classifyTags = Nil classifyTags = Nil,
customData = None // can't retain this information from a final item. TODO
) )
} }

View File

@ -112,7 +112,8 @@ object CreateItem {
MetaProposalList.empty, MetaProposalList.empty,
Nil, Nil,
MetaProposalList.empty, MetaProposalList.empty,
Nil Nil,
ctx.args.meta.customData
) )
} }
@ -175,7 +176,8 @@ object CreateItem {
MetaProposalList.empty, MetaProposalList.empty,
Nil, Nil,
MetaProposalList.empty, MetaProposalList.empty,
Nil Nil,
ctx.args.meta.customData
) )
) )
} }

View File

@ -46,7 +46,8 @@ case class ItemData(
tags: List[String], tags: List[String],
// proposals obtained from the classifier // proposals obtained from the classifier
classifyProposals: MetaProposalList, classifyProposals: MetaProposalList,
classifyTags: List[String] classifyTags: List[String],
customData: Option[Json]
) { ) {
/** sort by weight; order of equal weights is not important, just choose one others are /** sort by weight; order of equal weights is not important, just choose one others are
@ -121,6 +122,7 @@ object ItemData {
) )
) )
.asJson, .asJson,
"customData" -> data.customData.asJson,
"tags" -> data.tags.asJson, "tags" -> data.tags.asJson,
"assumedTags" -> data.classifyTags.asJson, "assumedTags" -> data.classifyTags.asJson,
"assumedCorrOrg" -> data.finalProposals "assumedCorrOrg" -> data.finalProposals

View File

@ -101,7 +101,8 @@ object ReProcessItem {
MetaProposalList.empty, MetaProposalList.empty,
Nil, Nil,
MetaProposalList.empty, MetaProposalList.empty,
Nil Nil,
None // cannot retain customData from an already existing item
)).getOrElseF( )).getOrElseF(
Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}")) Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}"))
) )
@ -134,7 +135,8 @@ object ReProcessItem {
None, None,
None, None,
true, true,
None // attachOnly (not used when reprocessing attachments) None, // attachOnly (not used when reprocessing attachments)
None // cannot retain customData from an already existing item
), ),
Nil Nil
).pure[F] ).pure[F]

View File

@ -328,6 +328,7 @@ object ScanMailboxTask {
args.tags.getOrElse(Nil), args.tags.getOrElse(Nil),
args.language, args.language,
args.attachmentsOnly, args.attachmentsOnly,
None,
None None
) )
data = OUpload.UploadData( data = OUpload.UploadData(

View File

@ -8250,6 +8250,13 @@ components:
attachments of the e-mail are imported and the e-mail body attachments of the e-mail are imported and the e-mail body
is discarded. E-mails that don't have any attachments are is discarded. E-mails that don't have any attachments are
skipped. skipped.
customData:
type: object
format: json
description: |
Custom user data that gets threaded through the processing. Docspell
ignores it completely, but will pass it to the outcome of processing
to be able to react on it in addons or other ways.
Collective: Collective:
description: | description: |

View File

@ -315,7 +315,8 @@ trait Conversions {
m.tags.map(_.items).getOrElse(Nil), m.tags.map(_.items).getOrElse(Nil),
m.language, m.language,
m.attachmentsOnly, m.attachmentsOnly,
m.flattenArchives m.flattenArchives,
m.customData
) )
) )
) )
@ -333,6 +334,7 @@ trait Conversions {
Nil, Nil,
None, None,
None, None,
None,
None None
) )
) )

View File

@ -348,7 +348,8 @@ object MigrateCollectiveIdTaskArgs extends TransactorSupport {
fileFilter = oldArgs.meta.fileFilter, fileFilter = oldArgs.meta.fileFilter,
tags = oldArgs.meta.tags, tags = oldArgs.meta.tags,
reprocess = oldArgs.meta.reprocess, reprocess = oldArgs.meta.reprocess,
attachmentsOnly = oldArgs.meta.attachmentsOnly attachmentsOnly = oldArgs.meta.attachmentsOnly,
customData = None
), ),
oldArgs.files.map(f => oldArgs.files.map(f =>
ProcessItemArgs ProcessItemArgs

View File

@ -16,7 +16,7 @@ Since Docspell is free software, the tools must also be free.
# Considered Options # Considered Options
* [Apache POI](https://poi.apache.org) together with * [Apache POI](https://poi.apache.org) together with
[this](https://search.maven.org/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6/jar) [this](https://central.sonatype.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6)
library library
* [pandoc](https://pandoc.org/) external command * [pandoc](https://pandoc.org/) external command
* [abiword](https://www.abisource.com/) external command * [abiword](https://www.abisource.com/) external command

View File

@ -7,6 +7,7 @@ import docspell.addons.out.NewItem.{Meta => ItemMeta}
import docspell.common._ import docspell.common._
import docspell.common.Timestamp import docspell.common.Timestamp
import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction} import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction}
import io.circe.Json
import io.circe.syntax._ import io.circe.syntax._
object AddonOutputExample extends Helper { object AddonOutputExample extends Helper {
@ -53,7 +54,8 @@ object AddonOutputExample extends Helper {
metadata = FileMeta( metadata = FileMeta(
language = Some(Language.English), language = Some(Language.English),
skipDuplicate = Some(true), skipDuplicate = Some(true),
attachmentsOnly = Some(false) attachmentsOnly = Some(false),
customData = None
), ),
file = "new-file1.docx" file = "new-file1.docx"
), ),
@ -61,7 +63,8 @@ object AddonOutputExample extends Helper {
metadata = FileMeta( metadata = FileMeta(
language = Some(Language.German), language = Some(Language.German),
skipDuplicate = Some(true), skipDuplicate = Some(true),
attachmentsOnly = Some(false) attachmentsOnly = Some(false),
customData = None
), ),
file = "new-file2.pdf" file = "new-file2.pdf"
) )
@ -77,7 +80,8 @@ object AddonOutputExample extends Helper {
source = "the-addon-x".some, source = "the-addon-x".some,
skipDuplicate = true.some, skipDuplicate = true.some,
tags = List("tag1", "tag2").some, tags = List("tag1", "tag2").some,
attachmentsOnly = None attachmentsOnly = None,
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
).some, ).some,
files = List("a-file.pdf", "another.jpg") files = List("a-file.pdf", "another.jpg")
) )

View File

@ -2,6 +2,7 @@ package docspell.website
import cats.syntax.option._ import cats.syntax.option._
import docspell.common.{Language, ProcessItemArgs} import docspell.common.{Language, ProcessItemArgs}
import io.circe.Json
import io.circe.syntax._ import io.circe.syntax._
object ItemArgsExample extends Helper { object ItemArgsExample extends Helper {
@ -18,7 +19,8 @@ object ItemArgsExample extends Helper {
fileFilter = None, fileFilter = None,
tags = List("given-tag-1").some, tags = List("given-tag-1").some,
reprocess = false, reprocess = false,
attachmentsOnly = None attachmentsOnly = None,
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
) )
val exampleJson = example.asJson.spaces2 val exampleJson = example.asJson.spaces2

View File

@ -5,6 +5,7 @@ import docspell.common.MetaProposal.Candidate
import docspell.common._ import docspell.common._
import docspell.joex.process.ItemData import docspell.joex.process.ItemData
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
import io.circe.Json
import io.circe.syntax._ import io.circe.syntax._
object ItemDataExample extends Helper { object ItemDataExample extends Helper {
@ -67,7 +68,8 @@ object ItemDataExample extends Helper {
givenMeta = givenProposals, givenMeta = givenProposals,
tags = List("tag-1"), tags = List("tag-1"),
classifyProposals = MetaProposalList.empty, classifyProposals = MetaProposalList.empty,
classifyTags = List("invoice") classifyTags = List("invoice"),
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
) )
val exampleJson = example.asJson.spaces2 val exampleJson = example.asJson.spaces2