diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 2e508a55..73cde778 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -73,7 +73,8 @@ object OUpload { skipDuplicates: Boolean, fileFilter: Glob, tags: List[String], - language: Option[Language] + language: Option[Language], + attachmentsOnly: Option[Boolean] ) case class UploadData[F[_]]( @@ -150,7 +151,8 @@ object OUpload { data.meta.skipDuplicates, data.meta.fileFilter.some, data.meta.tags.some, - false + false, + data.meta.attachmentsOnly ) args = if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f))) diff --git a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala index def1cfd6..3491f502 100644 --- a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala @@ -51,7 +51,8 @@ object ProcessItemArgs { skipDuplicate: Boolean, fileFilter: Option[Glob], tags: Option[List[String]], - reprocess: Boolean + reprocess: Boolean, + attachmentsOnly: Option[Boolean] ) object ProcessMeta { diff --git a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala index 4b00f74b..7f9e644f 100644 --- a/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ScanMailboxArgs.scala @@ -44,7 +44,9 @@ case class ScanMailboxArgs( // the language for extraction and analysis language: Option[Language], // apply additional filter to all mails or only imported - postHandleAll: Option[Boolean] + postHandleAll: Option[Boolean], + // Exclude the mail body when importing + attachmentsOnly: Option[Boolean] ) object ScanMailboxArgs { diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala index 84566343..f81be825 100644 --- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala +++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala @@ -23,9 +23,12 @@ object ReadMail { def readBytesP[F[_]: Async]( logger: Logger[F], - glob: Glob + glob: Glob, + attachmentsOnly: Boolean ): Pipe[F, Byte, Binary[F]] = - _.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger, glob)) + _.through(bytesToMail(logger)).flatMap( + mailToEntries[F](logger, glob, attachmentsOnly) + ) def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] = s => @@ -34,10 +37,30 @@ object ReadMail { def mailToEntries[F[_]: Async]( logger: Logger[F], - glob: Glob + glob: Glob, + attachmentsOnly: Boolean + )(mail: Mail[F]): Stream[F, Binary[F]] = + Stream.eval( + logger.debug( + s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}" + ) + ) >> + (makeBodyEntry(logger, glob, attachmentsOnly)(mail) ++ + Stream + .eval(TnefExtract.replace(mail)) + .flatMap(m => Stream.emits(m.attachments.all)) + .filter(a => a.filename.exists(glob.matches(caseSensitive = false))) + .map(a => + Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) + )) + + private def makeBodyEntry[F[_]: Async]( + logger: Logger[F], + glob: Glob, + attachmentsOnly: Boolean )(mail: Mail[F]): Stream[F, Binary[F]] = { val bodyEntry: F[Option[Binary[F]]] = - if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F] + if (mail.body.isEmpty || attachmentsOnly) (None: Option[Binary[F]]).pure[F] else { val markdownCfg = MarkdownConfig.defaultConfig HtmlBodyView( @@ -49,22 +72,14 @@ object ReadMail { ).map(makeHtmlBinary[F] _).map(b => Some(b)) } - Stream.eval( - logger.debug( - s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}" - ) - ) >> - (Stream - .eval(bodyEntry) - .flatMap(e => Stream.emits(e.toSeq)) - .filter(a => glob.matches(caseSensitive = false)(a.name)) ++ + for { + _ <- Stream.eval(logger.debug(s"Import attachments only: $attachmentsOnly")) + bin <- Stream - .eval(TnefExtract.replace(mail)) - .flatMap(m => Stream.emits(m.attachments.all)) - .filter(a => a.filename.exists(glob.matches(caseSensitive = false))) - .map(a => - Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content) - )) + .eval(bodyEntry) + .flatMap(e => Stream.emits(e.toSeq)) + .filter(a => glob.matches(caseSensitive = false)(a.name)) + } yield bin } private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 931140c9..e07064fe 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -161,7 +161,8 @@ object ExtractArchive { .unNoneTerminate .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) - val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) + val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) + val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) ctx.logger.debug(s"Filtering email attachments with '${glob.asString}'") *> email .through(ReadMail.bytesToMail[F](ctx.logger)) @@ -174,7 +175,7 @@ object ExtractArchive { } yield s ReadMail - .mailToEntries(ctx.logger, glob)(mail) + .mailToEntries(ctx.logger, glob, attachOnly)(mail) .zipWithIndex .flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala index 26c84b12..c2e34e4d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ReProcessItem.scala @@ -114,7 +114,8 @@ object ReProcessItem { false, None, None, - true + true, + None // attachOnly (not used when reprocessing attachments) ), Nil ).pure[F] diff --git a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala index 2c8125e4..c8e183a9 100644 --- a/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/scanmailbox/ScanMailboxTask.scala @@ -300,7 +300,8 @@ object ScanMailboxTask { true, args.fileFilter.getOrElse(Glob.all), args.tags.getOrElse(Nil), - args.language + args.language, + args.attachmentsOnly ) data = OUpload.UploadData( multiple = false, diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index c686f217..f8761e55 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -4336,6 +4336,10 @@ components: format: language postHandleAll: type: boolean + attachmentsOnly: + type: boolean + description: | + Import only the attachments e-mails and discard the body ImapSettingsList: description: | @@ -5282,6 +5286,14 @@ components: description: | The `language` of the document may be specified, otherwise the one from settings is used. + attachmentsOnly: + type: boolean + default: false + description: | + Only applies to e-mail files. If `true` then only + attachments of the e-mail are imported and the e-mail body + is discarded. E-mails that don't have any attachments are + skipped. Collective: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index c4f5fd64..d7cf8f51 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -337,7 +337,8 @@ trait Conversions { m.skipDuplicates.getOrElse(false), m.fileFilter.getOrElse(Glob.all), m.tags.map(_.items).getOrElse(Nil), - m.language + m.language, + m.attachmentsOnly ) ) ) @@ -345,7 +346,17 @@ trait Conversions { .getOrElse( ( true, - UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil, None) + UploadMeta( + None, + sourceName, + None, + validFileTypes, + false, + Glob.all, + Nil, + None, + None + ) ) .pure[F] ) diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala index c01503a4..623bb695 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ScanMailboxRoutes.scala @@ -125,7 +125,8 @@ object ScanMailboxRoutes { settings.tags.map(_.items), settings.subjectFilter, settings.language, - settings.postHandleAll + settings.postHandleAll, + settings.attachmentsOnly ) ) ) @@ -159,6 +160,7 @@ object ScanMailboxRoutes { task.args.fileFilter, task.args.subjectFilter, task.args.language, - task.args.postHandleAll + task.args.postHandleAll, + task.args.attachmentsOnly ) } diff --git a/website/site/content/docs/api/upload.md b/website/site/content/docs/api/upload.md index 34eda0e9..133d2423 100644 --- a/website/site/content/docs/api/upload.md +++ b/website/site/content/docs/api/upload.md @@ -52,6 +52,7 @@ specified via a JSON structure in a part with name `meta`: , tags: Maybe StringList , fileFilter: Maybe String , language: Maybe String +, attachmentsOnly: Maybe Bool } ``` @@ -90,6 +91,10 @@ specified via a JSON structure in a part with name `meta`: - The `language` is used for processing the document(s) contained in the request. If not specified the collective's default language is used. +- The `attachmentsOnly` property only applies to e-mail files (usually + `*.eml`). If this is `true`, then the e-mail body is discarded and + only the attachments are imported. An e-mail without any attachments + is therefore skipped. # Endpoints