From 751fa3da5a41c0707810c746fac9c163ae99e2a4 Mon Sep 17 00:00:00 2001 From: eikek Date: Sat, 21 Aug 2021 13:37:17 +0200 Subject: [PATCH 1/2] Add attachments-only filter to uploads When uploading a file which is an e-mail, this option allows to skip the mail body when the file is being processed. --- .../scala/docspell/backend/ops/OUpload.scala | 6 ++- .../docspell/common/ProcessItemArgs.scala | 3 +- .../docspell/common/ScanMailboxArgs.scala | 4 +- .../scala/docspell/joex/mail/ReadMail.scala | 53 ++++++++++++------- .../joex/process/ExtractArchive.scala | 5 +- .../docspell/joex/process/ReProcessItem.scala | 3 +- .../joex/scanmailbox/ScanMailboxTask.scala | 3 +- .../src/main/resources/docspell-openapi.yml | 12 +++++ .../restserver/conv/Conversions.scala | 15 +++++- .../restserver/routes/ScanMailboxRoutes.scala | 6 ++- website/site/content/docs/api/upload.md | 5 ++ 11 files changed, 84 insertions(+), 31 deletions(-) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 2e508a55..73cde778 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -73,7 +73,8 @@ object OUpload { skipDuplicates: Boolean, fileFilter: Glob, tags: List[String], - language: Option[Language] + language: Option[Language], + attachmentsOnly: Option[Boolean] ) case class UploadData[F[_]]( @@ -150,7 +151,8 @@ object OUpload { data.meta.skipDuplicates, data.meta.fileFilter.some, data.meta.tags.some, - false + false, + data.meta.attachmentsOnly ) args = if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f))) diff --git a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala index def1cfd6..3491f502 100644 --- a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala @@ -51,7 +51,8 @@ object ProcessItemArgs { skipDuplicate: Boolean, fileFilter: Option[Glob], tags: Option[List[String]], - reprocess: Boolean + reprocess: Boolean, + attachmentsOnly: Option[Boolean] ) object ProcessMeta { diff --git a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala index 4b00f74b..7f9e644f 100644 --- a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala @@ -44,7 +44,9 @@ case class ScanMailboxArgs( // the language for extraction and analysis language: Option[Language], // apply additional filter to all mails or only imported - postHandleAll: Option[Boolean] + postHandleAll: Option[Boolean], + // Exclude the mail body when importing + attachmentsOnly: Option[Boolean] ) object ScanMailboxArgs { diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala index 84566343..f81be825 100644 --- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala +++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala @@ -23,9 +23,12 @@ object ReadMail { def readBytesP[F[_]: Async]( logger: Logger[F], - glob: Glob + glob: Glob, + attachmentsOnly: Boolean ): Pipe[F, Byte, Binary[F]] = - _.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger, glob)) + _.through(bytesToMail(logger)).flatMap( + mailToEntries[F](logger, glob, attachmentsOnly) + ) def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] = s => @@ -34,10 +37,30 @@ object ReadMail { def mailToEntries[F[_]: Async]( logger: Logger[F], - glob: Glob + glob: Glob, + attachmentsOnly: Boolean + )(mail: Mail[F]): Stream[F, Binary[F]] = + Stream.eval( + logger.debug( + s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}" + ) + ) >> + (makeBodyEntry(logger, glob, attachmentsOnly)(mail) ++ + Stream + .eval(TnefExtract.replace(mail)) + .flatMap(m => Stream.emits(m.attachments.all)) + .filter(a => a.filename.exists(glob.matches(caseSensitive = false))) + .map(a => + Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) + )) + + private def makeBodyEntry[F[_]: Async]( + logger: Logger[F], + glob: Glob, + attachmentsOnly: Boolean )(mail: Mail[F]): Stream[F, Binary[F]] = { val bodyEntry: F[Option[Binary[F]]] = - if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F] + if (mail.body.isEmpty || attachmentsOnly) (None: Option[Binary[F]]).pure[F] else { val markdownCfg = MarkdownConfig.defaultConfig HtmlBodyView( @@ -49,22 +72,14 @@ object ReadMail { ).map(makeHtmlBinary[F] _).map(b => Some(b)) } - Stream.eval( - logger.debug( - s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}" - ) - ) >> - (Stream - .eval(bodyEntry) - .flatMap(e => Stream.emits(e.toSeq)) - .filter(a => glob.matches(caseSensitive = false)(a.name)) ++ + for { + _ <- Stream.eval(logger.debug(s"Import attachments only: $attachmentsOnly")) + bin <- Stream - .eval(TnefExtract.replace(mail)) - .flatMap(m => Stream.emits(m.attachments.all)) - .filter(a => a.filename.exists(glob.matches(caseSensitive = false))) - .map(a => - Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) - )) + .eval(bodyEntry) + .flatMap(e => Stream.emits(e.toSeq)) + .filter(a => glob.matches(caseSensitive = false)(a.name)) + } yield bin } private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 931140c9..e07064fe 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -161,7 +161,8 @@ object ExtractArchive { .unNoneTerminate .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) - val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) + val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) + val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) ctx.logger.debug(s"Filtering email attachments with '${glob.asString}'") *> email .through(ReadMail.bytesToMail[F](ctx.logger)) @@ -174,7 +175,7 @@ object ExtractArchive { } yield s ReadMail - .mailToEntries(ctx.logger, glob)(mail) + .mailToEntries(ctx.logger, glob, attachOnly)(mail) .zipWithIndex .flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index 26c84b12..c2e34e4d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -114,7 +114,8 @@ object ReProcessItem { false, None, None, - true + true, + None // attachOnly (not used when reprocessing attachments) ), Nil ).pure[F] diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 2c8125e4..c8e183a9 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -300,7 +300,8 @@ object ScanMailboxTask { true, args.fileFilter.getOrElse(Glob.all), args.tags.getOrElse(Nil), - args.language + args.language, + args.attachmentsOnly ) data = OUpload.UploadData( multiple = false, diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index c686f217..f8761e55 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -4336,6 +4336,10 @@ components: format: language postHandleAll: type: boolean + attachmentsOnly: + type: boolean + description: | + Import only the attachments e-mails and discard the body ImapSettingsList: description: | @@ -5282,6 +5286,14 @@ components: description: | The `language` of the document may be specified, otherwise the one from settings is used. + attachmentsOnly: + type: boolean + default: false + description: | + Only applies to e-mail files. If `true` then only + attachments of the e-mail are imported and the e-mail body + is discarded. E-mails that don't have any attachments are + skipped. Collective: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index c4f5fd64..d7cf8f51 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -337,7 +337,8 @@ trait Conversions { m.skipDuplicates.getOrElse(false), m.fileFilter.getOrElse(Glob.all), m.tags.map(_.items).getOrElse(Nil), - m.language + m.language, + m.attachmentsOnly ) ) ) @@ -345,7 +346,17 @@ trait Conversions { .getOrElse( ( true, - UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil, None) + UploadMeta( + None, + sourceName, + None, + validFileTypes, + false, + Glob.all, + Nil, + None, + None + ) ) .pure[F] ) diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala index c01503a4..623bb695 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala @@ -125,7 +125,8 @@ object ScanMailboxRoutes { settings.tags.map(_.items), settings.subjectFilter, settings.language, - settings.postHandleAll + settings.postHandleAll, + settings.attachmentsOnly ) ) ) @@ -159,6 +160,7 @@ object ScanMailboxRoutes { task.args.fileFilter, task.args.subjectFilter, task.args.language, - task.args.postHandleAll + task.args.postHandleAll, + task.args.attachmentsOnly ) } diff --git a/website/site/content/docs/api/upload.md b/website/site/content/docs/api/upload.md index 34eda0e9..133d2423 100644 --- a/website/site/content/docs/api/upload.md +++ b/website/site/content/docs/api/upload.md @@ -52,6 +52,7 @@ specified via a JSON structure in a part with name `meta`: , tags: Maybe StringList , fileFilter: Maybe String , language: Maybe String +, attachmentsOnly: Maybe Bool } ``` @@ -90,6 +91,10 @@ specified via a JSON structure in a part with name `meta`: - The `language` is used for processing the document(s) contained in the request. If not specified the collective's default language is used. +- The `attachmentsOnly` property only applies to e-mail files (usually + `*.eml`). If this is `true`, then the e-mail body is discarded and + only the attachments are imported. An e-mail without any attachments + is therefore skipped. # Endpoints From 0a0dd80bdccd89d19b500d27d6391421897799c2 Mon Sep 17 00:00:00 2001 From: eikek Date: Sat, 21 Aug 2021 13:48:57 +0200 Subject: [PATCH 2/2] Add new checkbox to scan mailbox form --- .../src/main/elm/Comp/ScanMailboxForm.elm | 23 +++++++++++++++++++ .../elm/Messages/Comp/ScanMailboxForm.elm | 6 +++++ 2 files changed, 29 insertions(+) diff --git a/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm b/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm index d9a5c3bf..d6e749e5 100644 --- a/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm +++ b/modules/webapp/src/main/elm/Comp/ScanMailboxForm.elm @@ -83,6 +83,7 @@ type alias Model = , language : Maybe Language , postHandleAll : Bool , summary : Maybe String + , attachmentsOnly : Bool , openTabs : Set String } @@ -166,6 +167,7 @@ type Msg | TogglePostHandleAll | ToggleAkkordionTab String | SetSummary String + | ToggleAttachmentsOnly initWith : Flags -> ScanMailboxSettings -> ( Model, Cmd Msg ) @@ -212,6 +214,7 @@ initWith flags s = Comp.FixedDropdown.init Data.Language.all , language = Maybe.andThen Data.Language.fromString s.language , postHandleAll = Maybe.withDefault False s.postHandleAll + , attachmentsOnly = Maybe.withDefault False s.attachmentsOnly , summary = s.summary } , Cmd.batch @@ -260,6 +263,7 @@ init flags = , language = Nothing , postHandleAll = False , summary = Nothing + , attachmentsOnly = False , openTabs = Set.singleton (tabName TabGeneral) } , Cmd.batch @@ -327,6 +331,7 @@ makeSettings model = , language = Maybe.map Data.Language.toIso3 model.language , postHandleAll = Just model.postHandleAll , summary = model.summary + , attachmentsOnly = Just model.attachmentsOnly } in Result.map3 make conn schedule_ infolders @@ -697,6 +702,12 @@ update flags msg model = , Cmd.none ) + ToggleAttachmentsOnly -> + ( { model | attachmentsOnly = not model.attachmentsOnly } + , NoAction + , Cmd.none + ) + ToggleAkkordionTab name -> let tabs = @@ -994,6 +1005,18 @@ viewAdditionalFilter2 texts model = [ Markdown.toHtml [] texts.fileFilterInfo ] ] + , div [ class "mb-4" ] + [ MB.viewItem <| + MB.Checkbox + { id = "scanmail-attachments-only" + , value = model.attachmentsOnly + , label = texts.attachmentsOnlyLabel + , tagger = \_ -> ToggleAttachmentsOnly + } + , span [ class "opacity-50 text-sm mt-1" ] + [ Markdown.toHtml [] texts.attachmentsOnlyInfo + ] + ] , div [ class "mb-4" ] diff --git a/modules/webapp/src/main/elm/Messages/Comp/ScanMailboxForm.elm b/modules/webapp/src/main/elm/Messages/Comp/ScanMailboxForm.elm index 85eac9b1..ea0d2860 100644 --- a/modules/webapp/src/main/elm/Messages/Comp/ScanMailboxForm.elm +++ b/modules/webapp/src/main/elm/Messages/Comp/ScanMailboxForm.elm @@ -70,6 +70,8 @@ type alias Texts = , connectionMissing : String , noProcessingFolders : String , invalidCalEvent : String + , attachmentsOnlyLabel : String + , attachmentsOnlyInfo : String } @@ -149,6 +151,8 @@ gb = , connectionMissing = "No E-Mail connections configured. Goto E-Mail Settings to add one." , noProcessingFolders = "No processing folders given." , invalidCalEvent = "The calendar event is not valid." + , attachmentsOnlyLabel = "Only import e-mail attachments" + , attachmentsOnlyInfo = "Discards the e-mail body and only imports the attachments." } @@ -223,4 +227,6 @@ kann hier ein Wert für alle festgelegt werden. Bei 'Automatisch' wird auf den S , connectionMissing = "Keine E-Mail-Verbindung definiert. Gehe zu den E-Mail-Einstellungen und füge eine hinzu." , noProcessingFolders = "Keine Postfachordner ausgewählt." , invalidCalEvent = "Das Kalenderereignis ist ungültig." + , attachmentsOnlyLabel = "Nur Anhänge importieren" + , attachmentsOnlyInfo = "Verwirft den E-Mail Text und importiert nur die Anhänge." }