From 0b805726641f91b6305660a749f1aa817e626c44 Mon Sep 17 00:00:00 2001
From: Eike Kettner <eike.kettner@posteo.de>
Date: Tue, 24 Mar 2020 23:03:18 +0100
Subject: [PATCH] Fix encodings for mails with non-utf8 html parts

---
 .../common/src/main/scala/docspell/common/Binary.scala   | 9 ++++++++-
 .../src/main/scala/docspell/joex/mail/ReadMail.scala     | 6 +++---
 project/Dependencies.scala                               | 8 +++++++-
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/modules/common/src/main/scala/docspell/common/Binary.scala b/modules/common/src/main/scala/docspell/common/Binary.scala
index ec128b66..88bcd99a 100644
--- a/modules/common/src/main/scala/docspell/common/Binary.scala
+++ b/modules/common/src/main/scala/docspell/common/Binary.scala
@@ -1,8 +1,9 @@
 package docspell.common
 
-import fs2.{Pipe, Stream}
+import fs2.{Chunk, Pipe, Stream}
 import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets
+import scodec.bits.ByteVector
 
 final case class Binary[F[_]](name: String, mime: MimeType, data: Stream[F, Byte]) {
 
@@ -25,9 +26,15 @@ object Binary {
   def text[F[_]](name: String, content: String): Binary[F] =
     utf8(name, content).withMime(MimeType.plain.withUtf8Charset)
 
+  def text[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] =
+    Binary(name, MimeType.plain.withCharset(cs), Stream.chunk(Chunk.byteVector(content)))
+
   def html[F[_]](name: String, content: String): Binary[F] =
     utf8(name, content).withMime(MimeType.html.withUtf8Charset)
 
+  def html[F[_]](name: String, content: ByteVector, cs: Charset): Binary[F] =
+    Binary(name, MimeType.html.withCharset(cs), Stream.chunk(Chunk.byteVector(content)))
+
   def decode[F[_]](cs: Charset): Pipe[F, Byte, String] =
     if (cs == StandardCharsets.UTF_8) {
       fs2.text.utf8Decode
diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
index 3525b9f5..85694176 100644
--- a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
+++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala
@@ -28,9 +28,9 @@ object ReadMail {
   )(mail: Mail[F]): Stream[F, Binary[F]] = {
     val bodyEntry: F[Option[Binary[F]]] = mail.body.fold(
       _ => (None: Option[Binary[F]]).pure[F],
-      txt => txt.text.map(c => Binary.text[F]("mail.txt", c).some),
-      html => html.html.map(c => Binary.html[F]("mail.html", c).some),
-      both => both.html.map(c => Binary.html[F]("mail.html", c).some)
+      txt => txt.text.map(c => Binary.text[F]("mail.txt", c.bytes, c.charsetOrUtf8).some),
+      html => html.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some),
+      both => both.html.map(c => Binary.html[F]("mail.html", c.bytes, c.charsetOrUtf8).some)
     )
 
     Stream.eval(
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 2a524df7..f1939a58 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -10,7 +10,7 @@ object Dependencies {
   val CalevVersion = "0.1.0"
   val CirceVersion = "0.13.0"
   val DoobieVersion = "0.8.8"
-  val EmilVersion = "0.3.0"
+  val EmilVersion = "0.4.0"
   val FastparseVersion = "2.1.3"
   val FlexmarkVersion = "0.60.2"
   val FlywayVersion = "6.3.2"
@@ -89,6 +89,12 @@ object Dependencies {
     "com.github.eikek" %% "emil-common" % EmilVersion,
     "com.github.eikek" %% "emil-javamail" % EmilVersion    
   )
+  val emilDoobie = Seq(
+    "com.github.eikek" %% "emil-doobie" % EmilVersion,
+  )
+  val emilTnef = Seq(
+    "com.github.eikek" %% "emil-tnef" % EmilVersion,
+  )
 
   val stanfordNlpCore = Seq(
     "edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion excludeAll(