Fix encodings for mails with non-utf8 html parts

This commit is contained in:
Eike Kettner 2020-03-24 23:03:18 +01:00
parent 012f86994a
commit 0b80572664
3 changed files with 18 additions and 5 deletions

View File

@ -1,8 +1,9 @@
package docspell.common
import fs2.{Pipe, Stream}
import fs2.{Chunk, Pipe, Stream}
import java.nio.charset.Charset
import java.nio.charset.StandardCharsets
import scodec.bits.ByteVector
final case class Binary[F[_]](name: String, mime: MimeType, data: Stream[F, Byte]) {
@ -25,9 +26,15 @@ object Binary {
def text[F[_]](name: String, content: String): Binary[F] =
utf8(name, content).withMime(MimeType.plain.withUtf8Charset)
def text[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] =
Binary(name, MimeType.plain.withCharset(cs), Stream.chunk(Chunk.byteVector(content)))
def html[F[_]](name: String, content: String): Binary[F] =
utf8(name, content).withMime(MimeType.html.withUtf8Charset)
def html[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] =
Binary(name, MimeType.html.withCharset(cs), Stream.chunk(Chunk.byteVector(content)))
def decode[F[_]](cs: Charset): Pipe[F, Byte, String] =
if (cs == StandardCharsets.UTF_8) {
fs2.text.utf8Decode

View File

@ -28,9 +28,9 @@ object ReadMail {
)(mail: Mail[F]): Stream[F, Binary[F]] = {
val bodyEntry: F[Option[Binary[F]]] = mail.body.fold(
_ => (None: Option[Binary[F]]).pure[F],
txt => txt.text.map(c => Binary.text[F]("mail.txt", c).some),
html => html.html.map(c => Binary.html[F]("mail.html", c).some),
both => both.html.map(c => Binary.html[F]("mail.html", c).some)
txt => txt.text.map(c => Binary.text[F]("mail.txt", c.bytes, c.charsetOrUtf8).some),
html => html.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some),
both => both.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some)
)
Stream.eval(

View File

@ -10,7 +10,7 @@ object Dependencies {
val CalevVersion = "0.1.0"
val CirceVersion = "0.13.0"
val DoobieVersion = "0.8.8"
val EmilVersion = "0.3.0"
val EmilVersion = "0.4.0"
val FastparseVersion = "2.1.3"
val FlexmarkVersion = "0.60.2"
val FlywayVersion = "6.3.2"
@ -89,6 +89,12 @@ object Dependencies {
"com.github.eikek" %% "emil-common" % EmilVersion,
"com.github.eikek" %% "emil-javamail" % EmilVersion
)
val emilDoobie = Seq(
"com.github.eikek" %% "emil-doobie" % EmilVersion,
)
val emilTnef = Seq(
"com.github.eikek" %% "emil-tnef" % EmilVersion,
)
val stanfordNlpCore = Seq(
"edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion excludeAll(