From 83ad2c504426f5c5208e9d790892500d8d550cec Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 12 Nov 2023 15:48:10 +0100 Subject: [PATCH 1/4] First sketch for custom data threaded through item processing Refs: #2334 --- .../src/main/scala/docspell/addons/out/NewFile.scala | 10 ++++++---- .../src/main/scala/docspell/addons/out/NewItem.scala | 12 ++++++++---- .../main/scala/docspell/backend/ops/OUpload.scala | 8 ++++++-- .../main/scala/docspell/common/ProcessItemArgs.scala | 3 ++- .../scala/docspell/joex/addon/ItemAddonTask.scala | 3 ++- .../scala/docspell/joex/process/CreateItem.scala | 6 ++++-- .../main/scala/docspell/joex/process/ItemData.scala | 4 +++- .../scala/docspell/joex/process/ReProcessItem.scala | 6 ++++-- .../docspell/joex/scanmailbox/ScanMailboxTask.scala | 1 + .../restapi/src/main/resources/docspell-openapi.yml | 8 ++++++++ .../scala/docspell/restserver/conv/Conversions.scala | 5 ++++- .../common/MigrateCollectiveIdTaskArgs.scala | 3 ++- 12 files changed, 50 insertions(+), 19 deletions(-) diff --git a/modules/addonlib/src/main/scala/docspell/addons/out/NewFile.scala b/modules/addonlib/src/main/scala/docspell/addons/out/NewFile.scala index 395cf4b1..b378d109 100644 --- a/modules/addonlib/src/main/scala/docspell/addons/out/NewFile.scala +++ b/modules/addonlib/src/main/scala/docspell/addons/out/NewFile.scala @@ -15,10 +15,10 @@ import docspell.common.ProcessItemArgs.ProcessMeta import docspell.common.{CollectiveId, Ident, Language} import docspell.logging.Logger -import io.circe.Codec import io.circe.generic.extras.Configuration import io.circe.generic.extras.semiauto.deriveConfiguredCodec import io.circe.generic.semiauto.deriveCodec +import io.circe.{Codec, Json} case class NewFile(metadata: Meta = Meta.empty, file: String) { @@ -41,7 +41,8 @@ object NewFile { case class Meta( language: Option[Language], skipDuplicate: Option[Boolean], - attachmentsOnly: Option[Boolean] + attachmentsOnly: Option[Boolean], + customData: Option[Json] ) { def toProcessMeta( @@ -62,12 +63,13 @@ object NewFile { fileFilter = None, tags = None, reprocess = false, - attachmentsOnly = attachmentsOnly + attachmentsOnly = attachmentsOnly, + customData = customData ) } object Meta { - val empty = Meta(None, None, None) + val empty = Meta(None, None, None, None) implicit val jsonCodec: Codec[Meta] = deriveCodec } diff --git a/modules/addonlib/src/main/scala/docspell/addons/out/NewItem.scala b/modules/addonlib/src/main/scala/docspell/addons/out/NewItem.scala index 2ee79161..6cd90b21 100644 --- a/modules/addonlib/src/main/scala/docspell/addons/out/NewItem.scala +++ b/modules/addonlib/src/main/scala/docspell/addons/out/NewItem.scala @@ -15,7 +15,7 @@ import docspell.common._ import docspell.logging.Logger import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} -import io.circe.{Decoder, Encoder} +import io.circe.{Decoder, Encoder, Json} case class NewItem(metadata: Option[Meta], files: List[String]) { @@ -25,7 +25,7 @@ case class NewItem(metadata: Option[Meta], files: List[String]) { sourceAbbrev: String ): ProcessItemArgs.ProcessMeta = metadata - .getOrElse(Meta(None, None, None, None, None, None, None)) + .getOrElse(Meta.empty) .toProcessArgs(cid, collLang, sourceAbbrev) def resolveFiles[F[_]: Files: Monad]( @@ -58,7 +58,8 @@ object NewItem { source: Option[String], skipDuplicate: Option[Boolean], tags: Option[List[String]], - attachmentsOnly: Option[Boolean] + attachmentsOnly: Option[Boolean], + customData: Option[Json] ) { def toProcessArgs( @@ -78,11 +79,14 @@ object NewItem { fileFilter = None, tags = tags, reprocess = false, - attachmentsOnly = attachmentsOnly + attachmentsOnly = attachmentsOnly, + customData = customData ) } object Meta { + val empty: Meta = Meta(None, None, None, None, None, None, None, None) + implicit val jsonEncoder: Encoder[Meta] = deriveEncoder implicit val jsonDecoder: Decoder[Meta] = deriveDecoder } diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 5c0d7a2a..398543d4 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -19,6 +19,8 @@ import docspell.scheduler.{Job, JobStore} import docspell.store.Store import docspell.store.records._ +import io.circe.Json + trait OUpload[F[_]] { def submit( @@ -69,7 +71,8 @@ object OUpload { tags: List[String], language: Option[Language], attachmentsOnly: Option[Boolean], - flattenArchives: Option[Boolean] + flattenArchives: Option[Boolean], + customData: Option[Json] ) case class UploadData[F[_]]( @@ -157,7 +160,8 @@ object OUpload { data.meta.fileFilter.some, data.meta.tags.some, false, - data.meta.attachmentsOnly + data.meta.attachmentsOnly, + data.meta.customData ) args = ProcessItemArgs(meta, files.toList) jobs <- right( diff --git a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala index edbee9a7..d1426595 100644 --- a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala @@ -54,7 +54,8 @@ object ProcessItemArgs { fileFilter: Option[Glob], tags: Option[List[String]], reprocess: Boolean, - attachmentsOnly: Option[Boolean] + attachmentsOnly: Option[Boolean], + customData: Option[Json] ) object ProcessMeta { diff --git a/modules/joex/src/main/scala/docspell/joex/addon/ItemAddonTask.scala b/modules/joex/src/main/scala/docspell/joex/addon/ItemAddonTask.scala index 8dfb0a3a..477a4d27 100644 --- a/modules/joex/src/main/scala/docspell/joex/addon/ItemAddonTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/addon/ItemAddonTask.scala @@ -75,6 +75,7 @@ object ItemAddonTask extends AddonTaskExtension { givenMeta = proposals, tags = tags.map(_.name).toList, classifyProposals = MetaProposalList.empty, - classifyTags = Nil + classifyTags = Nil, + customData = None // can't retain this information from a final item. TODO ) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index b5ae352b..06631d12 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -112,7 +112,8 @@ object CreateItem { MetaProposalList.empty, Nil, MetaProposalList.empty, - Nil + Nil, + ctx.args.meta.customData ) } @@ -175,7 +176,8 @@ object CreateItem { MetaProposalList.empty, Nil, MetaProposalList.empty, - Nil + Nil, + ctx.args.meta.customData ) ) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 94a6c07f..cea79a79 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -46,7 +46,8 @@ case class ItemData( tags: List[String], // proposals obtained from the classifier classifyProposals: MetaProposalList, - classifyTags: List[String] + classifyTags: List[String], + customData: Option[Json] ) { /** sort by weight; order of equal weights is not important, just choose one others are @@ -121,6 +122,7 @@ object ItemData { ) ) .asJson, + "customData" -> data.customData.asJson, "tags" -> data.tags.asJson, "assumedTags" -> data.classifyTags.asJson, "assumedCorrOrg" -> data.finalProposals diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index 58ade825..19a5193d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -101,7 +101,8 @@ object ReProcessItem { MetaProposalList.empty, Nil, MetaProposalList.empty, - Nil + Nil, + None // cannot retain customData from an already existing item )).getOrElseF( Sync[F].raiseError(new Exception(s"Item not found: ${ctx.args.itemId.id}")) ) @@ -134,7 +135,8 @@ object ReProcessItem { None, None, true, - None // attachOnly (not used when reprocessing attachments) + None, // attachOnly (not used when reprocessing attachments) + None // cannot retain customData from an already existing item ), Nil ).pure[F] diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 7add6baf..e0219121 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -328,6 +328,7 @@ object ScanMailboxTask { args.tags.getOrElse(Nil), args.language, args.attachmentsOnly, + None, None ) data = OUpload.UploadData( diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 6a15940c..41f9be90 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -8250,6 +8250,14 @@ components: attachments of the e-mail are imported and the e-mail body is discarded. E-mails that don't have any attachments are skipped. + customData: + type: string + format: json + default: null + description: | + Custom user data that gets threaded through the processing. Docspell + ignores it completely, but will pass it to the outcome of processing + to be able to react on it in addons or other ways. Collective: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index 0a14fb88..2c209f25 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -32,6 +32,7 @@ import docspell.store.queries.{ import docspell.store.records._ import docspell.store.{AddResult, UpdateResult} +import io.circe.Json import org.http4s.headers.`Content-Type` import org.http4s.multipart.Multipart import org.log4s.Logger @@ -315,7 +316,8 @@ trait Conversions { m.tags.map(_.items).getOrElse(Nil), m.language, m.attachmentsOnly, - m.flattenArchives + m.flattenArchives, + m.customData.map(Json.fromString) // TODO fix openapi spec ) ) ) @@ -333,6 +335,7 @@ trait Conversions { Nil, None, None, + None, None ) ) diff --git a/modules/store/src/main/scala/db/migration/common/MigrateCollectiveIdTaskArgs.scala b/modules/store/src/main/scala/db/migration/common/MigrateCollectiveIdTaskArgs.scala index 8871111d..852f24f0 100644 --- a/modules/store/src/main/scala/db/migration/common/MigrateCollectiveIdTaskArgs.scala +++ b/modules/store/src/main/scala/db/migration/common/MigrateCollectiveIdTaskArgs.scala @@ -348,7 +348,8 @@ object MigrateCollectiveIdTaskArgs extends TransactorSupport { fileFilter = oldArgs.meta.fileFilter, tags = oldArgs.meta.tags, reprocess = oldArgs.meta.reprocess, - attachmentsOnly = oldArgs.meta.attachmentsOnly + attachmentsOnly = oldArgs.meta.attachmentsOnly, + customData = None ), oldArgs.files.map(f => ProcessItemArgs From 212c10e20a6737873666e48e00d702d8428dd880 Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 12 Nov 2023 18:48:43 +0100 Subject: [PATCH 2/4] Allow custom json Refs: #2334 --- modules/restapi/src/main/resources/docspell-openapi.yml | 3 +-- .../src/main/scala/docspell/restserver/conv/Conversions.scala | 3 +-- project/plugins.sbt | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 41f9be90..d506c723 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -8251,9 +8251,8 @@ components: is discarded. E-mails that don't have any attachments are skipped. customData: - type: string + type: object format: json - default: null description: | Custom user data that gets threaded through the processing. Docspell ignores it completely, but will pass it to the outcome of processing diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index 2c209f25..cc745186 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -32,7 +32,6 @@ import docspell.store.queries.{ import docspell.store.records._ import docspell.store.{AddResult, UpdateResult} -import io.circe.Json import org.http4s.headers.`Content-Type` import org.http4s.multipart.Multipart import org.log4s.Logger @@ -317,7 +316,7 @@ trait Conversions { m.language, m.attachmentsOnly, m.flattenArchives, - m.customData.map(Json.fromString) // TODO fix openapi spec + m.customData ) ) ) diff --git a/project/plugins.sbt b/project/plugins.sbt index 923b44bf..beed6745 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,6 +1,6 @@ addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.1") addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0") -addSbtPlugin("com.github.eikek" % "sbt-openapi-schema" % "0.10.0") +addSbtPlugin("com.github.eikek" % "sbt-openapi-schema" % "0.11.0") addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1") addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0") addSbtPlugin("com.github.sbt" % "sbt-git" % "2.0.1") From 11aa8e539270ee744edb91858a047047244ede37 Mon Sep 17 00:00:00 2001 From: eikek Date: Mon, 13 Nov 2023 16:51:39 +0100 Subject: [PATCH 3/4] Adopt website examples --- .../scala/docspell/website/AddonOutputExample.scala | 10 +++++++--- .../main/scala/docspell/website/ItemArgsExample.scala | 4 +++- .../main/scala/docspell/website/ItemDataExample.scala | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/website/src/main/scala/docspell/website/AddonOutputExample.scala b/website/src/main/scala/docspell/website/AddonOutputExample.scala index 7cdf6e67..7a3bcf56 100644 --- a/website/src/main/scala/docspell/website/AddonOutputExample.scala +++ b/website/src/main/scala/docspell/website/AddonOutputExample.scala @@ -7,6 +7,7 @@ import docspell.addons.out.NewItem.{Meta => ItemMeta} import docspell.common._ import docspell.common.Timestamp import docspell.common.bc.{AttachmentAction, BackendCommand, ItemAction} +import io.circe.Json import io.circe.syntax._ object AddonOutputExample extends Helper { @@ -52,7 +53,8 @@ object AddonOutputExample extends Helper { metadata = FileMeta( language = Some(Language.English), skipDuplicate = Some(true), - attachmentsOnly = Some(false) + attachmentsOnly = Some(false), + customData = None ), file = "new-file1.docx" ), @@ -60,7 +62,8 @@ object AddonOutputExample extends Helper { metadata = FileMeta( language = Some(Language.German), skipDuplicate = Some(true), - attachmentsOnly = Some(false) + attachmentsOnly = Some(false), + customData = None ), file = "new-file2.pdf" ) @@ -76,7 +79,8 @@ object AddonOutputExample extends Helper { source = "the-addon-x".some, skipDuplicate = true.some, tags = List("tag1", "tag2").some, - attachmentsOnly = None + attachmentsOnly = None, + customData = Some(Json.obj("my-id" -> Json.fromInt(42))) ).some, files = List("a-file.pdf", "another.jpg") ) diff --git a/website/src/main/scala/docspell/website/ItemArgsExample.scala b/website/src/main/scala/docspell/website/ItemArgsExample.scala index 5ca5580b..86664ac1 100644 --- a/website/src/main/scala/docspell/website/ItemArgsExample.scala +++ b/website/src/main/scala/docspell/website/ItemArgsExample.scala @@ -2,6 +2,7 @@ package docspell.website import cats.syntax.option._ import docspell.common.{Language, ProcessItemArgs} +import io.circe.Json import io.circe.syntax._ object ItemArgsExample extends Helper { @@ -18,7 +19,8 @@ object ItemArgsExample extends Helper { fileFilter = None, tags = List("given-tag-1").some, reprocess = false, - attachmentsOnly = None + attachmentsOnly = None, + customData = Some(Json.obj("my-id" -> Json.fromInt(42))) ) val exampleJson = example.asJson.spaces2 diff --git a/website/src/main/scala/docspell/website/ItemDataExample.scala b/website/src/main/scala/docspell/website/ItemDataExample.scala index ce5214fc..8dbcb387 100644 --- a/website/src/main/scala/docspell/website/ItemDataExample.scala +++ b/website/src/main/scala/docspell/website/ItemDataExample.scala @@ -5,6 +5,7 @@ import docspell.common.MetaProposal.Candidate import docspell.common._ import docspell.joex.process.ItemData import docspell.store.records.{RAttachment, RAttachmentMeta, RItem} +import io.circe.Json import io.circe.syntax._ object ItemDataExample extends Helper { @@ -67,7 +68,8 @@ object ItemDataExample extends Helper { givenMeta = givenProposals, tags = List("tag-1"), classifyProposals = MetaProposalList.empty, - classifyTags = List("invoice") + classifyTags = List("invoice"), + customData = Some(Json.obj("my-id" -> Json.fromInt(42))) ) val exampleJson = example.asJson.spaces2 From 175ae0299667c84774ef482917ac958a9adc9700 Mon Sep 17 00:00:00 2001 From: eikek Date: Mon, 13 Nov 2023 17:06:39 +0100 Subject: [PATCH 4/4] Fix external link in docs --- website/site/content/docs/dev/adr/0009_convert_office_docs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/site/content/docs/dev/adr/0009_convert_office_docs.md b/website/site/content/docs/dev/adr/0009_convert_office_docs.md index 40f74c11..b4ab4403 100644 --- a/website/site/content/docs/dev/adr/0009_convert_office_docs.md +++ b/website/site/content/docs/dev/adr/0009_convert_office_docs.md @@ -16,7 +16,7 @@ Since Docspell is free software, the tools must also be free. # Considered Options * [Apache POI](https://poi.apache.org) together with - [this](https://search.maven.org/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6/jar) + [this](https://central.sonatype.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf/1.0.6) library * [pandoc](https://pandoc.org/) external command * [abiword](https://www.abisource.com/) external command