mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Extract tnef attachments and fix incomplete html
The wkhtmltopdf requires the content encoding set correctly in the document.
This commit is contained in:
parent
0b80572664
commit
e305b46708
@ -204,6 +204,7 @@ val store = project.in(file("modules/store")).
|
|||||||
Dependencies.flyway ++
|
Dependencies.flyway ++
|
||||||
Dependencies.loggingApi ++
|
Dependencies.loggingApi ++
|
||||||
Dependencies.emil ++
|
Dependencies.emil ++
|
||||||
|
Dependencies.emilDoobie ++
|
||||||
Dependencies.calev
|
Dependencies.calev
|
||||||
).dependsOn(common)
|
).dependsOn(common)
|
||||||
|
|
||||||
@ -293,6 +294,7 @@ val joex = project.in(file("modules/joex")).
|
|||||||
Dependencies.http4s ++
|
Dependencies.http4s ++
|
||||||
Dependencies.circe ++
|
Dependencies.circe ++
|
||||||
Dependencies.pureconfig ++
|
Dependencies.pureconfig ++
|
||||||
|
Dependencies.emilTnef ++
|
||||||
Dependencies.loggingApi ++
|
Dependencies.loggingApi ++
|
||||||
Dependencies.logging.map(_ % Runtime),
|
Dependencies.logging.map(_ % Runtime),
|
||||||
addCompilerPlugin(Dependencies.kindProjectorPlugin),
|
addCompilerPlugin(Dependencies.kindProjectorPlugin),
|
||||||
|
@ -5,17 +5,21 @@ import cats.implicits._
|
|||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
import emil.{MimeType => _, _}
|
import emil.{MimeType => _, _}
|
||||||
import emil.javamail.syntax._
|
import emil.javamail.syntax._
|
||||||
import cats.Applicative
|
import emil.tnef.TnefExtract
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import java.nio.charset.StandardCharsets
|
import java.nio.charset.StandardCharsets
|
||||||
|
import java.nio.charset.Charset
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
object ReadMail {
|
object ReadMail {
|
||||||
|
|
||||||
def read[F[_]: Sync](str: String): F[Mail[F]] =
|
def read[F[_]: Sync](str: String): F[Mail[F]] =
|
||||||
Mail.deserialize(str)
|
Mail.deserialize(str)
|
||||||
|
|
||||||
def readBytesP[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Binary[F]] =
|
def readBytesP[F[_]: ConcurrentEffect: ContextShift](
|
||||||
|
logger: Logger[F]
|
||||||
|
): Pipe[F, Byte, Binary[F]] =
|
||||||
s =>
|
s =>
|
||||||
Stream.eval(logger.debug(s"Converting e-mail into its parts")) >>
|
Stream.eval(logger.debug(s"Converting e-mail into its parts")) >>
|
||||||
bytesToMail(s).flatMap(mailToEntries[F](logger))
|
bytesToMail(s).flatMap(mailToEntries[F](logger))
|
||||||
@ -23,14 +27,14 @@ object ReadMail {
|
|||||||
def bytesToMail[F[_]: Sync](data: Stream[F, Byte]): Stream[F, Mail[F]] =
|
def bytesToMail[F[_]: Sync](data: Stream[F, Byte]): Stream[F, Mail[F]] =
|
||||||
data.through(Binary.decode(StandardCharsets.US_ASCII)).foldMonoid.evalMap(read[F])
|
data.through(Binary.decode(StandardCharsets.US_ASCII)).foldMonoid.evalMap(read[F])
|
||||||
|
|
||||||
def mailToEntries[F[_]: Applicative](
|
def mailToEntries[F[_]: ConcurrentEffect: ContextShift](
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
)(mail: Mail[F]): Stream[F, Binary[F]] = {
|
||||||
val bodyEntry: F[Option[Binary[F]]] = mail.body.fold(
|
val bodyEntry: F[Option[Binary[F]]] = mail.body.fold(
|
||||||
_ => (None: Option[Binary[F]]).pure[F],
|
_ => (None: Option[Binary[F]]).pure[F],
|
||||||
txt => txt.text.map(c => Binary.text[F]("mail.txt", c.bytes, c.charsetOrUtf8).some),
|
txt => txt.text.map(c => Binary.text[F]("mail.txt", c.bytes, c.charsetOrUtf8).some),
|
||||||
html => html.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some),
|
html => html.html.map(c => makeHtmlBinary(c).some),
|
||||||
both => both.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some)
|
both => both.html.map(c => makeHtmlBinary(c).some)
|
||||||
)
|
)
|
||||||
|
|
||||||
Stream.eval(
|
Stream.eval(
|
||||||
@ -42,12 +46,33 @@ object ReadMail {
|
|||||||
.eval(bodyEntry)
|
.eval(bodyEntry)
|
||||||
.flatMap(e => Stream.emits(e.toSeq)) ++
|
.flatMap(e => Stream.emits(e.toSeq)) ++
|
||||||
Stream
|
Stream
|
||||||
.emits(mail.attachments.all)
|
.eval(TnefExtract.replace(mail))
|
||||||
|
.flatMap(m => Stream.emits(m.attachments.all))
|
||||||
.map(a =>
|
.map(a =>
|
||||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content)
|
Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content)
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] = {
|
||||||
|
val c = fixHtml(cnt)
|
||||||
|
Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def fixHtml(cnt: BodyContent): BodyContent = {
|
||||||
|
val str = cnt.asString.trim.toLowerCase
|
||||||
|
val head = htmlHeader(cnt.charsetOrUtf8)
|
||||||
|
if (str.startsWith("<html")) cnt
|
||||||
|
else cnt match {
|
||||||
|
case BodyContent.StringContent(s) =>
|
||||||
|
BodyContent(head + s + htmlHeaderEnd)
|
||||||
|
case BodyContent.ByteContent(bv, cs) =>
|
||||||
|
val begin = ByteVector.view(head.getBytes(cnt.charsetOrUtf8))
|
||||||
|
val end = ByteVector.view(htmlHeaderEnd.getBytes(cnt.charsetOrUtf8))
|
||||||
|
BodyContent(begin ++ bv ++ end, cs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
implicit class MimeTypeConv(m: emil.MimeType) {
|
implicit class MimeTypeConv(m: emil.MimeType) {
|
||||||
def toDocspell: MimeType =
|
def toDocspell: MimeType =
|
||||||
MimeType(m.primary, m.sub, m.params)
|
MimeType(m.primary, m.sub, m.params)
|
||||||
@ -60,4 +85,16 @@ object ReadMail {
|
|||||||
_ => "html-body",
|
_ => "html-body",
|
||||||
_ => "text-and-html-body"
|
_ => "text-and-html-body"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private def htmlHeader(cs: Charset): String =
|
||||||
|
s"""<!DOCTYPE html>
|
||||||
|
|<html>
|
||||||
|
|<head>
|
||||||
|
|<meta charset="${cs.name}"/>
|
||||||
|
|</head>
|
||||||
|
|<body>
|
||||||
|
"""
|
||||||
|
|
||||||
|
private def htmlHeaderEnd: String =
|
||||||
|
"</body></html>"
|
||||||
}
|
}
|
||||||
|
@ -126,7 +126,7 @@ object ExtractArchive {
|
|||||||
.lastOrError
|
.lastOrError
|
||||||
}
|
}
|
||||||
|
|
||||||
def extractMail[F[_]: Sync](
|
def extractMail[F[_]: ConcurrentEffect: ContextShift](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
archive: Option[RAttachmentArchive]
|
archive: Option[RAttachmentArchive]
|
||||||
)(ra: RAttachment): F[Extracted] = {
|
)(ra: RAttachment): F[Extracted] = {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user