Try streamline the different impls for MimeType

This commit is contained in:
Eike Kettner
2020-05-25 09:23:44 +02:00
parent 7bbc41467c
commit ee394eae86
11 changed files with 85 additions and 43 deletions

View File

@ -10,6 +10,7 @@ import emil.markdown._
import emil.jsoup._
import docspell.common._
import docspell.store.syntax.MimeTypes._
object ReadMail {
@ -51,18 +52,13 @@ object ReadMail {
.eval(TnefExtract.replace(mail))
.flatMap(m => Stream.emits(m.attachments.all))
.map(a =>
Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content)
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
))
}
private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] =
Binary.html[F]("mail.html", cnt.bytes, cnt.charsetOrUtf8)
implicit class MimeTypeConv(m: emil.MimeType) {
def toDocspell: MimeType =
MimeType(m.primary, m.sub, m.params)
}
private def bodyType[F[_]](body: MailBody[F]): String =
body.fold(
_ => "empty-body",

View File

@ -10,6 +10,7 @@ import docspell.common._
import docspell.convert._
import docspell.joex.scheduler._
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import docspell.convert.ConversionResult.Handler
import docspell.convert.SanitizeHtml
import docspell.joex.extract.JsoupSanitizer
@ -60,17 +61,16 @@ object ConvertPdf {
item: ItemData
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv =>
mime match {
case mt if mt.baseEqual(Mimetype.`application/pdf`) =>
mime.toLocal match {
case MimeType.PdfMatch(_) =>
ctx.logger.debug(s"Not going to convert a PDF file ${ra.name} into a PDF.") *>
(ra, None: Option[RAttachmentMeta]).pure[F]
case _ =>
case mt =>
val data = ctx.store.bitpeace
.get(ra.fileId.id)
.unNoneTerminate
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val mt = MimeType(mime.primary, mime.sub, mime.params)
val handler = conversionHandler[F](ctx, cfg, ra, item)
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(

View File

@ -10,6 +10,7 @@ import docspell.common._
import docspell.joex.mail._
import docspell.joex.scheduler._
import docspell.store.records._
import docspell.store.syntax.MimeTypes._
import docspell.files.Zip
import cats.kernel.Monoid
import emil.Mail
@ -88,13 +89,13 @@ object ExtractArchive {
ctx: Context[F, ProcessItemArgs],
archive: Option[RAttachmentArchive]
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
mime match {
case Mimetype("application", "zip", _) if ra.name.exists(_.endsWith(".zip")) =>
mime.toLocal match {
case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
extractZip(ctx, archive)(ra, pos)
.flatTap(_ => cleanupParents(ctx, ra, archive))
case Mimetype("message", "rfc822", _) =>
case MimeType.EmailMatch(_) =>
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
extractMail(ctx, archive)(ra, pos)
.flatTap(_ => cleanupParents(ctx, ra, archive))

View File

@ -8,6 +8,7 @@ import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
import docspell.store.syntax.MimeTypes._
object TextExtraction {
@ -82,7 +83,7 @@ object TextExtraction {
findMime
.flatMap(mt =>
extr.extractText(data, DataType(MimeType(mt.primary, mt.sub, mt.params)), lang)
extr.extractText(data, DataType(mt.toLocal), lang)
)
}

View File

@ -241,7 +241,7 @@ object ScanMailboxTask {
def submitMail(upload: OUpload[F])(mail: Mail[F]): F[OUpload.UploadResult] = {
val file = OUpload.File(
Some(mail.header.subject + ".eml"),
Some(MimeType.eml),
Some(MimeType.emls.head),
mail.toByteStream
)
for {