mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Add attachments-only filter to uploads
When uploading a file which is an e-mail, this option allows to skip the mail body when the file is being processed.
This commit is contained in:
parent
bb8a6c054b
commit
751fa3da5a
@ -73,7 +73,8 @@ object OUpload {
|
||||
skipDuplicates: Boolean,
|
||||
fileFilter: Glob,
|
||||
tags: List[String],
|
||||
language: Option[Language]
|
||||
language: Option[Language],
|
||||
attachmentsOnly: Option[Boolean]
|
||||
)
|
||||
|
||||
case class UploadData[F[_]](
|
||||
@ -150,7 +151,8 @@ object OUpload {
|
||||
data.meta.skipDuplicates,
|
||||
data.meta.fileFilter.some,
|
||||
data.meta.tags.some,
|
||||
false
|
||||
false,
|
||||
data.meta.attachmentsOnly
|
||||
)
|
||||
args =
|
||||
if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f)))
|
||||
|
@ -51,7 +51,8 @@ object ProcessItemArgs {
|
||||
skipDuplicate: Boolean,
|
||||
fileFilter: Option[Glob],
|
||||
tags: Option[List[String]],
|
||||
reprocess: Boolean
|
||||
reprocess: Boolean,
|
||||
attachmentsOnly: Option[Boolean]
|
||||
)
|
||||
|
||||
object ProcessMeta {
|
||||
|
@ -44,7 +44,9 @@ case class ScanMailboxArgs(
|
||||
// the language for extraction and analysis
|
||||
language: Option[Language],
|
||||
// apply additional filter to all mails or only imported
|
||||
postHandleAll: Option[Boolean]
|
||||
postHandleAll: Option[Boolean],
|
||||
// Exclude the mail body when importing
|
||||
attachmentsOnly: Option[Boolean]
|
||||
)
|
||||
|
||||
object ScanMailboxArgs {
|
||||
|
@ -23,9 +23,12 @@ object ReadMail {
|
||||
|
||||
def readBytesP[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
glob: Glob
|
||||
glob: Glob,
|
||||
attachmentsOnly: Boolean
|
||||
): Pipe[F, Byte, Binary[F]] =
|
||||
_.through(bytesToMail(logger)).flatMap(mailToEntries[F](logger, glob))
|
||||
_.through(bytesToMail(logger)).flatMap(
|
||||
mailToEntries[F](logger, glob, attachmentsOnly)
|
||||
)
|
||||
|
||||
def bytesToMail[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Mail[F]] =
|
||||
s =>
|
||||
@ -34,10 +37,30 @@ object ReadMail {
|
||||
|
||||
def mailToEntries[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
glob: Glob
|
||||
glob: Glob,
|
||||
attachmentsOnly: Boolean
|
||||
)(mail: Mail[F]): Stream[F, Binary[F]] =
|
||||
Stream.eval(
|
||||
logger.debug(
|
||||
s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}"
|
||||
)
|
||||
) >>
|
||||
(makeBodyEntry(logger, glob, attachmentsOnly)(mail) ++
|
||||
Stream
|
||||
.eval(TnefExtract.replace(mail))
|
||||
.flatMap(m => Stream.emits(m.attachments.all))
|
||||
.filter(a => a.filename.exists(glob.matches(caseSensitive = false)))
|
||||
.map(a =>
|
||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
||||
))
|
||||
|
||||
private def makeBodyEntry[F[_]: Async](
|
||||
logger: Logger[F],
|
||||
glob: Glob,
|
||||
attachmentsOnly: Boolean
|
||||
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
||||
val bodyEntry: F[Option[Binary[F]]] =
|
||||
if (mail.body.isEmpty) (None: Option[Binary[F]]).pure[F]
|
||||
if (mail.body.isEmpty || attachmentsOnly) (None: Option[Binary[F]]).pure[F]
|
||||
else {
|
||||
val markdownCfg = MarkdownConfig.defaultConfig
|
||||
HtmlBodyView(
|
||||
@ -49,22 +72,14 @@ object ReadMail {
|
||||
).map(makeHtmlBinary[F] _).map(b => Some(b))
|
||||
}
|
||||
|
||||
Stream.eval(
|
||||
logger.debug(
|
||||
s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}"
|
||||
)
|
||||
) >>
|
||||
(Stream
|
||||
.eval(bodyEntry)
|
||||
.flatMap(e => Stream.emits(e.toSeq))
|
||||
.filter(a => glob.matches(caseSensitive = false)(a.name)) ++
|
||||
for {
|
||||
_ <- Stream.eval(logger.debug(s"Import attachments only: $attachmentsOnly"))
|
||||
bin <-
|
||||
Stream
|
||||
.eval(TnefExtract.replace(mail))
|
||||
.flatMap(m => Stream.emits(m.attachments.all))
|
||||
.filter(a => a.filename.exists(glob.matches(caseSensitive = false)))
|
||||
.map(a =>
|
||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
||||
))
|
||||
.eval(bodyEntry)
|
||||
.flatMap(e => Stream.emits(e.toSeq))
|
||||
.filter(a => glob.matches(caseSensitive = false)(a.name))
|
||||
} yield bin
|
||||
}
|
||||
|
||||
private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] =
|
||||
|
@ -161,7 +161,8 @@ object ExtractArchive {
|
||||
.unNoneTerminate
|
||||
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
||||
|
||||
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||
val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
|
||||
val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
|
||||
ctx.logger.debug(s"Filtering email attachments with '${glob.asString}'") *>
|
||||
email
|
||||
.through(ReadMail.bytesToMail[F](ctx.logger))
|
||||
@ -174,7 +175,7 @@ object ExtractArchive {
|
||||
} yield s
|
||||
|
||||
ReadMail
|
||||
.mailToEntries(ctx.logger, glob)(mail)
|
||||
.mailToEntries(ctx.logger, glob, attachOnly)(mail)
|
||||
.zipWithIndex
|
||||
.flatMap(handleEntry(ctx, ra, pos, archive, mId)) ++ Stream.eval(givenMeta)
|
||||
}
|
||||
|
@ -114,7 +114,8 @@ object ReProcessItem {
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
true
|
||||
true,
|
||||
None // attachOnly (not used when reprocessing attachments)
|
||||
),
|
||||
Nil
|
||||
).pure[F]
|
||||
|
@ -300,7 +300,8 @@ object ScanMailboxTask {
|
||||
true,
|
||||
args.fileFilter.getOrElse(Glob.all),
|
||||
args.tags.getOrElse(Nil),
|
||||
args.language
|
||||
args.language,
|
||||
args.attachmentsOnly
|
||||
)
|
||||
data = OUpload.UploadData(
|
||||
multiple = false,
|
||||
|
@ -4336,6 +4336,10 @@ components:
|
||||
format: language
|
||||
postHandleAll:
|
||||
type: boolean
|
||||
attachmentsOnly:
|
||||
type: boolean
|
||||
description: |
|
||||
Import only the attachments e-mails and discard the body
|
||||
|
||||
ImapSettingsList:
|
||||
description: |
|
||||
@ -5282,6 +5286,14 @@ components:
|
||||
description: |
|
||||
The `language` of the document may be specified, otherwise
|
||||
the one from settings is used.
|
||||
attachmentsOnly:
|
||||
type: boolean
|
||||
default: false
|
||||
description: |
|
||||
Only applies to e-mail files. If `true` then only
|
||||
attachments of the e-mail are imported and the e-mail body
|
||||
is discarded. E-mails that don't have any attachments are
|
||||
skipped.
|
||||
|
||||
Collective:
|
||||
description: |
|
||||
|
@ -337,7 +337,8 @@ trait Conversions {
|
||||
m.skipDuplicates.getOrElse(false),
|
||||
m.fileFilter.getOrElse(Glob.all),
|
||||
m.tags.map(_.items).getOrElse(Nil),
|
||||
m.language
|
||||
m.language,
|
||||
m.attachmentsOnly
|
||||
)
|
||||
)
|
||||
)
|
||||
@ -345,7 +346,17 @@ trait Conversions {
|
||||
.getOrElse(
|
||||
(
|
||||
true,
|
||||
UploadMeta(None, sourceName, None, validFileTypes, false, Glob.all, Nil, None)
|
||||
UploadMeta(
|
||||
None,
|
||||
sourceName,
|
||||
None,
|
||||
validFileTypes,
|
||||
false,
|
||||
Glob.all,
|
||||
Nil,
|
||||
None,
|
||||
None
|
||||
)
|
||||
)
|
||||
.pure[F]
|
||||
)
|
||||
|
@ -125,7 +125,8 @@ object ScanMailboxRoutes {
|
||||
settings.tags.map(_.items),
|
||||
settings.subjectFilter,
|
||||
settings.language,
|
||||
settings.postHandleAll
|
||||
settings.postHandleAll,
|
||||
settings.attachmentsOnly
|
||||
)
|
||||
)
|
||||
)
|
||||
@ -159,6 +160,7 @@ object ScanMailboxRoutes {
|
||||
task.args.fileFilter,
|
||||
task.args.subjectFilter,
|
||||
task.args.language,
|
||||
task.args.postHandleAll
|
||||
task.args.postHandleAll,
|
||||
task.args.attachmentsOnly
|
||||
)
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ specified via a JSON structure in a part with name `meta`:
|
||||
, tags: Maybe StringList
|
||||
, fileFilter: Maybe String
|
||||
, language: Maybe String
|
||||
, attachmentsOnly: Maybe Bool
|
||||
}
|
||||
```
|
||||
|
||||
@ -90,6 +91,10 @@ specified via a JSON structure in a part with name `meta`:
|
||||
- The `language` is used for processing the document(s) contained in
|
||||
the request. If not specified the collective's default language is
|
||||
used.
|
||||
- The `attachmentsOnly` property only applies to e-mail files (usually
|
||||
`*.eml`). If this is `true`, then the e-mail body is discarded and
|
||||
only the attachments are imported. An e-mail without any attachments
|
||||
is therefore skipped.
|
||||
|
||||
|
||||
# Endpoints
|
||||
|
Loading…
x
Reference in New Issue
Block a user