Merge pull request #2380 from eikek/custom-processing-data

Custom processing data
This commit is contained in:
mergify[bot]
2023-11-15 19:28:03 +00:00
committed by GitHub
16 changed files with 62 additions and 25 deletions

View File

@ -15,10 +15,10 @@ import docspell.common.ProcessItemArgs.ProcessMeta
import docspell.common.{CollectiveId, Ident, Language}
import docspell.logging.Logger
import io.circe.Codec
import io.circe.generic.extras.Configuration
import io.circe.generic.extras.semiauto.deriveConfiguredCodec
import io.circe.generic.semiauto.deriveCodec
import io.circe.{Codec, Json}
case class NewFile(metadata: Meta = Meta.empty, file: String) {
@ -41,7 +41,8 @@ object NewFile {
case class Meta(
language: Option[Language],
skipDuplicate: Option[Boolean],
attachmentsOnly: Option[Boolean]
attachmentsOnly: Option[Boolean],
customData: Option[Json]
) {
def toProcessMeta(
@ -62,12 +63,13 @@ object NewFile {
fileFilter = None,
tags = None,
reprocess = false,
attachmentsOnly = attachmentsOnly
attachmentsOnly = attachmentsOnly,
customData = customData
)
}
object Meta {
val empty = Meta(None, None, None)
val empty = Meta(None, None, None, None)
implicit val jsonCodec: Codec[Meta] = deriveCodec
}

View File

@ -15,7 +15,7 @@ import docspell.common._
import docspell.logging.Logger
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
import io.circe.{Decoder, Encoder, Json}
case class NewItem(metadata: Option[Meta], files: List[String]) {
@ -25,7 +25,7 @@ case class NewItem(metadata: Option[Meta], files: List[String]) {
sourceAbbrev: String
): ProcessItemArgs.ProcessMeta =
metadata
.getOrElse(Meta(None, None, None, None, None, None, None))
.getOrElse(Meta.empty)
.toProcessArgs(cid, collLang, sourceAbbrev)
def resolveFiles[F[_]: Files: Monad](
@ -58,7 +58,8 @@ object NewItem {
source: Option[String],
skipDuplicate: Option[Boolean],
tags: Option[List[String]],
attachmentsOnly: Option[Boolean]
attachmentsOnly: Option[Boolean],
customData: Option[Json]
) {
def toProcessArgs(
@ -78,11 +79,14 @@ object NewItem {
fileFilter = None,
tags = tags,
reprocess = false,
attachmentsOnly = attachmentsOnly
attachmentsOnly = attachmentsOnly,
customData = customData
)
}
object Meta {
val empty: Meta = Meta(None, None, None, None, None, None, None, None)
implicit val jsonEncoder: Encoder[Meta] = deriveEncoder
implicit val jsonDecoder: Decoder[Meta] = deriveDecoder
}

View File

@ -19,6 +19,8 @@ import docspell.scheduler.{Job, JobStore}
import docspell.store.Store
import docspell.store.records._
import io.circe.Json
trait OUpload[F[_]] {
def submit(
@ -69,7 +71,8 @@ object OUpload {
tags: List[String],
language: Option[Language],
attachmentsOnly: Option[Boolean],
flattenArchives: Option[Boolean]
flattenArchives: Option[Boolean],
customData: Option[Json]
)
case class UploadData[F[_]](
@ -157,7 +160,8 @@ object OUpload {
data.meta.fileFilter.some,
data.meta.tags.some,
false,
data.meta.attachmentsOnly
data.meta.attachmentsOnly,
data.meta.customData
)
args = ProcessItemArgs(meta, files.toList)
jobs <- right(

View File

@ -54,7 +54,8 @@ object ProcessItemArgs {
fileFilter: Option[Glob],
tags: Option[List[String]],
reprocess: Boolean,
attachmentsOnly: Option[Boolean]
attachmentsOnly: Option[Boolean],
customData: Option[Json]
)
object ProcessMeta {

View File

@ -75,6 +75,7 @@ object ItemAddonTask extends AddonTaskExtension {
givenMeta = proposals,
tags = tags.map(_.name).toList,
classifyProposals = MetaProposalList.empty,
classifyTags = Nil
classifyTags = Nil,
customData = None // can't retain this information from a final item. TODO
)
}

View File

@ -112,7 +112,8 @@ object CreateItem {
MetaProposalList.empty,
Nil,
MetaProposalList.empty,
Nil
Nil,
ctx.args.meta.customData
)
}
@ -175,7 +176,8 @@ object CreateItem {
MetaProposalList.empty,
Nil,
MetaProposalList.empty,
Nil
Nil,
ctx.args.meta.customData
)
)
}

View File

@ -46,7 +46,8 @@ case class ItemData(
tags: List[String],
// proposals obtained from the classifier
classifyProposals: MetaProposalList,
classifyTags: List[String]
classifyTags: List[String],
customData: Option[Json]
) {
/** sort by weight; order of equal weights is not important, just choose one others are
@ -121,6 +122,7 @@ object ItemData {
)
)
.asJson,
"customData" -> data.customData.asJson,
"tags" -> data.tags.asJson,
"assumedTags" -> data.classifyTags.asJson,
"assumedCorrOrg" -> data.finalProposals

View File

@ -101,7 +101,8 @@ object ReProcessItem {
MetaProposalList.empty,
Nil,
MetaProposalList.empty,
Nil
Nil,
None // cannot retain customData from an already existing item
)).getOrElseF(
Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}"))
)
@ -134,7 +135,8 @@ object ReProcessItem {
None,
None,
true,
None // attachOnly (not used when reprocessing attachments)
None, // attachOnly (not used when reprocessing attachments)
None // cannot retain customData from an already existing item
),
Nil
).pure[F]

View File

@ -328,6 +328,7 @@ object ScanMailboxTask {
args.tags.getOrElse(Nil),
args.language,
args.attachmentsOnly,
None,
None
)
data = OUpload.UploadData(

View File

@ -8250,6 +8250,13 @@ components:
attachments of the e-mail are imported and the e-mail body
is discarded. E-mails that don't have any attachments are
skipped.
customData:
type: object
format: json
description: |
Custom user data that gets threaded through the processing. Docspell
ignores it completely, but will pass it to the outcome of processing
to be able to react on it in addons or other ways.
Collective:
description: |

View File

@ -315,7 +315,8 @@ trait Conversions {
m.tags.map(_.items).getOrElse(Nil),
m.language,
m.attachmentsOnly,
m.flattenArchives
m.flattenArchives,
m.customData
)
)
)
@ -333,6 +334,7 @@ trait Conversions {
Nil,
None,
None,
None,
None
)
)

View File

@ -348,7 +348,8 @@ object MigrateCollectiveIdTaskArgs extends TransactorSupport {
fileFilter = oldArgs.meta.fileFilter,
tags = oldArgs.meta.tags,
reprocess = oldArgs.meta.reprocess,
attachmentsOnly = oldArgs.meta.attachmentsOnly
attachmentsOnly = oldArgs.meta.attachmentsOnly,
customData = None
),
oldArgs.files.map(f =>
ProcessItemArgs

View File

@ -16,7 +16,7 @@ Since Docspell is free software, the tools must also be free.
# Considered Options
* [Apache POI](https://poi.apache.org) together with
[this](https://search.maven.org/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6/jar)
[this](https://central.sonatype.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6)
library
* [pandoc](https://pandoc.org/) external command
* [abiword](https://www.abisource.com/) external command

View File

@ -7,6 +7,7 @@ import docspell.addons.out.NewItem.{Meta => ItemMeta}
import docspell.common._
import docspell.common.Timestamp
import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction}
import io.circe.Json
import io.circe.syntax._
object AddonOutputExample extends Helper {
@ -53,7 +54,8 @@ object AddonOutputExample extends Helper {
metadata = FileMeta(
language = Some(Language.English),
skipDuplicate = Some(true),
attachmentsOnly = Some(false)
attachmentsOnly = Some(false),
customData = None
),
file = "new-file1.docx"
),
@ -61,7 +63,8 @@ object AddonOutputExample extends Helper {
metadata = FileMeta(
language = Some(Language.German),
skipDuplicate = Some(true),
attachmentsOnly = Some(false)
attachmentsOnly = Some(false),
customData = None
),
file = "new-file2.pdf"
)
@ -77,7 +80,8 @@ object AddonOutputExample extends Helper {
source = "the-addon-x".some,
skipDuplicate = true.some,
tags = List("tag1", "tag2").some,
attachmentsOnly = None
attachmentsOnly = None,
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
).some,
files = List("a-file.pdf", "another.jpg")
)

View File

@ -2,6 +2,7 @@ package docspell.website
import cats.syntax.option._
import docspell.common.{Language, ProcessItemArgs}
import io.circe.Json
import io.circe.syntax._
object ItemArgsExample extends Helper {
@ -18,7 +19,8 @@ object ItemArgsExample extends Helper {
fileFilter = None,
tags = List("given-tag-1").some,
reprocess = false,
attachmentsOnly = None
attachmentsOnly = None,
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
)
val exampleJson = example.asJson.spaces2

View File

@ -5,6 +5,7 @@ import docspell.common.MetaProposal.Candidate
import docspell.common._
import docspell.joex.process.ItemData
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem}
import io.circe.Json
import io.circe.syntax._
object ItemDataExample extends Helper {
@ -67,7 +68,8 @@ object ItemDataExample extends Helper {
givenMeta = givenProposals,
tags = List("tag-1"),
classifyProposals = MetaProposalList.empty,
classifyTags = List("invoice")
classifyTags = List("invoice"),
customData = Some(Json.obj("my-id" -> Json.fromInt(42)))
)
val exampleJson = example.asJson.spaces2