Add support for eml (rfc822 email) files

This commit is contained in:
Eike Kettner
2020-03-19 22:42:40 +01:00
parent 4ed7a137f7
commit 6b1156182c
4 changed files with 142 additions and 25 deletions

View File

@ -0,0 +1,62 @@
package docspell.joex.mail
import cats.effect._
import cats.implicits._
import fs2.{Pipe, Stream}
import emil.{MimeType => _, _}
import emil.javamail.syntax._
import cats.Applicative
import docspell.common._
object ReadMail {
def read[F[_]: Sync](str: String): F[Mail[F]] =
Mail.deserialize(str)
def readBytesP[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Binary[F]] =
s =>
Stream.eval(logger.debug(s"Converting e-mail into its parts")) >>
bytesToMail(s).flatMap(mailToEntries[F](logger))
def bytesToMail[F[_]: Sync](data: Stream[F, Byte]): Stream[F, Mail[F]] =
data.through(fs2.text.utf8Decode).foldMonoid.evalMap(read[F])
def mailToEntries[F[_]: Applicative](
logger: Logger[F]
)(mail: Mail[F]): Stream[F, Binary[F]] = {
val bodyEntry: F[Option[Binary[F]]] = mail.body.fold(
_ => (None: Option[Binary[F]]).pure[F],
txt => txt.text.map(c => Binary.text[F]("mail.txt", c).some),
html => html.html.map(c => Binary.html[F]("mail.html", c).some),
both => both.html.map(c => Binary.html[F]("mail.html", c).some)
)
Stream.eval(
logger.debug(
s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}"
)
) >>
(Stream
.eval(bodyEntry)
.flatMap(e => Stream.emits(e.toSeq)) ++
Stream
.emits(mail.attachments.all)
.map(a =>
Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content)
))
}
implicit class MimeTypeConv(m: emil.MimeType) {
def toDocspell: MimeType =
MimeType(m.primary, m.sub)
}
private def bodyType[F[_]](body: MailBody[F]): String =
body.fold(
_ => "empty-body",
_ => "text-body",
_ => "html-body",
_ => "text-and-html-body"
)
}

View File

@ -7,6 +7,7 @@ import cats.effect._
import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.joex.mail._
import docspell.joex.scheduler._
import docspell.store.records._
import docspell.files.Zip
@ -74,6 +75,11 @@ object ExtractArchive {
extractZip(ctx, archive)(ra)
.flatTap(_ => cleanupParents(ctx, ra, archive))
case Mimetype("message", "rfc822", _) =>
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
extractMail(ctx, archive)(ra)
.flatTap(_ => cleanupParents(ctx, ra, archive))
case _ =>
ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
Extracted.noArchive(ra).pure[F]
@ -114,30 +120,56 @@ object ExtractArchive {
zipData
.through(Zip.unzipP[F](8192, ctx.blocker))
.flatMap { entry =>
val mimeHint = MimetypeHint.filename(entry.name)
val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint)
Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >>
fileMeta.evalMap { fm =>
Ident.randomId.map { id =>
val nra = RAttachment(
id,
ra.itemId,
Ident.unsafe(fm.id),
0, //position is updated afterwards
ra.created,
Option(entry.name).map(_.trim).filter(_.nonEmpty)
)
val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id)
Extracted.of(nra, aa)
}
}
}
.flatMap(handleEntry(ctx, ra, archive))
.foldMonoid
.compile
.lastOrError
}
def extractMail[F[_]: Sync](
ctx: Context[F, _],
archive: Option[RAttachmentArchive]
)(ra: RAttachment): F[Extracted] = {
val email = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
email
.through(ReadMail.readBytesP[F](ctx.logger))
.flatMap(handleEntry(ctx, ra, archive))
.foldMonoid
.compile
.lastOrError
}
def handleEntry[F[_]: Sync](
ctx: Context[F, _],
ra: RAttachment,
archive: Option[RAttachmentArchive]
)(
entry: Binary[F]
): Stream[F, Extracted] = {
val mimeHint = MimetypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint)
Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >>
fileMeta.evalMap { fm =>
Ident.randomId.map { id =>
val nra = RAttachment(
id,
ra.itemId,
Ident.unsafe(fm.id),
0, //position is updated afterwards
ra.created,
Option(entry.name).map(_.trim).filter(_.nonEmpty)
)
val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id)
Extracted.of(nra, aa)
}
}
}
def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = {
val insert = CreateItem.insertAttachment(ctx)(ra)
for {