mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-03 05:52:51 +00:00
Try streamline the different impls for MimeType
This commit is contained in:
parent
7bbc41467c
commit
ee394eae86
@ -5,11 +5,11 @@ import cats.effect._
|
||||
import cats.implicits._
|
||||
import cats.data.OptionT
|
||||
import emil._
|
||||
import emil.javamail.syntax._
|
||||
import bitpeace.{FileMeta, RangeDef}
|
||||
|
||||
import docspell.common._
|
||||
import docspell.store._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
import docspell.store.records._
|
||||
import docspell.store.queries.QMails
|
||||
import OMail.{ImapSettings, ItemMail, Sent, SmtpSettings}
|
||||
@ -224,7 +224,7 @@ object OMail {
|
||||
Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all))
|
||||
).withFilename(a._1.name)
|
||||
.withLength(a._2.length)
|
||||
.withMimeType(_root_.emil.MimeType.parse(a._2.mimetype.asString).toOption)
|
||||
.withMimeType(a._2.mimetype.toLocal.toEmil)
|
||||
}
|
||||
val fields: Seq[Trans[F]] = Seq(
|
||||
From(sett.mailFrom),
|
||||
|
@ -4,6 +4,7 @@ import docspell.common.syntax.all._
|
||||
import io.circe.{Decoder, Encoder}
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.charset.Charset
|
||||
import cats.data.NonEmptyList
|
||||
|
||||
/** A MIME Type impl with just enough features for the use here.
|
||||
*/
|
||||
@ -96,18 +97,51 @@ object MimeType {
|
||||
val tiff = image("tiff")
|
||||
val html = text("html")
|
||||
val plain = text("plain")
|
||||
val eml = MimeType("message", "rfc822", Map.empty)
|
||||
val emls = NonEmptyList.of(
|
||||
MimeType("message", "rfc822", Map.empty),
|
||||
application("mbox")
|
||||
)
|
||||
|
||||
object PdfMatch {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(_.matches(pdf))
|
||||
}
|
||||
|
||||
object TextAllMatch {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(_.primary == "text")
|
||||
}
|
||||
|
||||
object HtmlMatch {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(_.matches(html))
|
||||
}
|
||||
|
||||
object NonHtmlText {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
if (mt.primary == "text" && !mt.sub.contains("html")) Some(mt)
|
||||
else None
|
||||
}
|
||||
|
||||
object ZipMatch {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(_.matches(zip))
|
||||
}
|
||||
|
||||
/** Only jpeg, png and tiff */
|
||||
object ImageMatch {
|
||||
val all = Set(MimeType.jpeg, MimeType.png, MimeType.tiff)
|
||||
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).map(_.baseType).filter(all.contains)
|
||||
}
|
||||
|
||||
object EmailMatch {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
if (emls.exists(mt.matches(_))) Some(mt)
|
||||
else None
|
||||
}
|
||||
|
||||
implicit val jsonEncoder: Encoder[MimeType] =
|
||||
Encoder.encodeString.contramap(_.asString)
|
||||
|
||||
|
@ -16,7 +16,7 @@ case class ProcessItemArgs(meta: ProcessMeta, files: List[File]) {
|
||||
|
||||
def makeSubject: String =
|
||||
files.flatMap(_.name) match {
|
||||
case Nil => s"${meta.sourceAbbrev}: No files"
|
||||
case Nil => s"${meta.sourceAbbrev}: No files supplied"
|
||||
case n :: Nil => n
|
||||
case n1 :: n2 :: Nil => s"$n1, $n2"
|
||||
case _ => s"${files.size} files from ${meta.sourceAbbrev}"
|
||||
|
@ -33,10 +33,10 @@ object Conversion {
|
||||
in: Stream[F, Byte]
|
||||
): F[A] =
|
||||
TikaMimetype.resolve(dataType, in).flatMap {
|
||||
case Pdfs(_) =>
|
||||
case MimeType.PdfMatch(_) =>
|
||||
handler.run(ConversionResult.successPdf(in))
|
||||
|
||||
case mt @ MimeType(_, "html", _) =>
|
||||
case MimeType.HtmlMatch(mt) =>
|
||||
val cs = mt.charsetOrUtf8
|
||||
WkHtmlPdf
|
||||
.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, cs, sanitizeHtml, blocker, logger)(
|
||||
@ -44,7 +44,7 @@ object Conversion {
|
||||
handler
|
||||
)
|
||||
|
||||
case mt @ Texts(_) =>
|
||||
case MimeType.TextAllMatch(mt) =>
|
||||
val cs = mt.charsetOrUtf8
|
||||
Markdown.toHtml(in, cfg.markdown, cs).flatMap { html =>
|
||||
val bytes = Stream
|
||||
@ -60,7 +60,7 @@ object Conversion {
|
||||
)(bytes, handler)
|
||||
}
|
||||
|
||||
case Images(mt) =>
|
||||
case MimeType.ImageMatch(mt) =>
|
||||
ImageSize.get(in).flatMap {
|
||||
case Some(dim) =>
|
||||
if (dim.product > cfg.maxImageSize)
|
||||
@ -98,23 +98,6 @@ object Conversion {
|
||||
}
|
||||
})
|
||||
|
||||
object Images {
|
||||
|
||||
val all = Set(MimeType.jpeg, MimeType.png, MimeType.tiff)
|
||||
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).map(_.baseType).filter(all.contains)
|
||||
}
|
||||
|
||||
object Texts {
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).filter(_.primary == "text")
|
||||
}
|
||||
|
||||
object Pdfs {
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).filter(_.matches(MimeType.pdf))
|
||||
}
|
||||
|
||||
object Office {
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
@ -158,8 +141,8 @@ object Conversion {
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
mt match {
|
||||
case Office(_) => Some(mt)
|
||||
case Texts(_) => Some(mt)
|
||||
case Images(_) => Some(mt)
|
||||
case MimeType.TextAllMatch(_) => Some(mt)
|
||||
case MimeType.ImageMatch(_) => Some(mt)
|
||||
case _ => None
|
||||
}
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ object Extraction {
|
||||
) *>
|
||||
OdfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
||||
case mt @ MimeType("text", sub, _) if !sub.contains("html") =>
|
||||
case MimeType.NonHtmlText(mt) =>
|
||||
val cs = mt.charsetOrUtf8
|
||||
logger.info(s"File detected as ${mt.asString}. Returning itself as text.") *>
|
||||
data.through(Binary.decode(cs)).foldMonoid.compile.last.map { txt =>
|
||||
|
@ -10,6 +10,7 @@ import emil.markdown._
|
||||
import emil.jsoup._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
|
||||
object ReadMail {
|
||||
|
||||
@ -51,18 +52,13 @@ object ReadMail {
|
||||
.eval(TnefExtract.replace(mail))
|
||||
.flatMap(m => Stream.emits(m.attachments.all))
|
||||
.map(a =>
|
||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content)
|
||||
Binary(a.filename.getOrElse("noname"), a.mimeType.toLocal, a.content)
|
||||
))
|
||||
}
|
||||
|
||||
private def makeHtmlBinary[F[_]](cnt: BodyContent): Binary[F] =
|
||||
Binary.html[F]("mail.html", cnt.bytes, cnt.charsetOrUtf8)
|
||||
|
||||
implicit class MimeTypeConv(m: emil.MimeType) {
|
||||
def toDocspell: MimeType =
|
||||
MimeType(m.primary, m.sub, m.params)
|
||||
}
|
||||
|
||||
private def bodyType[F[_]](body: MailBody[F]): String =
|
||||
body.fold(
|
||||
_ => "empty-body",
|
||||
|
@ -10,6 +10,7 @@ import docspell.common._
|
||||
import docspell.convert._
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.records._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
import docspell.convert.SanitizeHtml
|
||||
import docspell.joex.extract.JsoupSanitizer
|
||||
@ -60,17 +61,16 @@ object ConvertPdf {
|
||||
item: ItemData
|
||||
)(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||
Conversion.create[F](cfg, sanitizeHtml, ctx.blocker, ctx.logger).use { conv =>
|
||||
mime match {
|
||||
case mt if mt.baseEqual(Mimetype.`application/pdf`) =>
|
||||
mime.toLocal match {
|
||||
case MimeType.PdfMatch(_) =>
|
||||
ctx.logger.debug(s"Not going to convert a PDF file ${ra.name} into a PDF.") *>
|
||||
(ra, None: Option[RAttachmentMeta]).pure[F]
|
||||
|
||||
case _ =>
|
||||
case mt =>
|
||||
val data = ctx.store.bitpeace
|
||||
.get(ra.fileId.id)
|
||||
.unNoneTerminate
|
||||
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
|
||||
val mt = MimeType(mime.primary, mime.sub, mime.params)
|
||||
val handler = conversionHandler[F](ctx, cfg, ra, item)
|
||||
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
|
||||
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
|
||||
|
@ -10,6 +10,7 @@ import docspell.common._
|
||||
import docspell.joex.mail._
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.records._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
import docspell.files.Zip
|
||||
import cats.kernel.Monoid
|
||||
import emil.Mail
|
||||
@ -88,13 +89,13 @@ object ExtractArchive {
|
||||
ctx: Context[F, ProcessItemArgs],
|
||||
archive: Option[RAttachmentArchive]
|
||||
)(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] =
|
||||
mime match {
|
||||
case Mimetype("application", "zip", _) if ra.name.exists(_.endsWith(".zip")) =>
|
||||
mime.toLocal match {
|
||||
case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) =>
|
||||
ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
|
||||
extractZip(ctx, archive)(ra, pos)
|
||||
.flatTap(_ => cleanupParents(ctx, ra, archive))
|
||||
|
||||
case Mimetype("message", "rfc822", _) =>
|
||||
case MimeType.EmailMatch(_) =>
|
||||
ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("<noname>")}") *>
|
||||
extractMail(ctx, archive)(ra, pos)
|
||||
.flatTap(_ => cleanupParents(ctx, ra, archive))
|
||||
|
@ -8,6 +8,7 @@ import docspell.common._
|
||||
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
|
||||
import docspell.joex.scheduler.{Context, Task}
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
|
||||
object TextExtraction {
|
||||
|
||||
@ -82,7 +83,7 @@ object TextExtraction {
|
||||
|
||||
findMime
|
||||
.flatMap(mt =>
|
||||
extr.extractText(data, DataType(MimeType(mt.primary, mt.sub, mt.params)), lang)
|
||||
extr.extractText(data, DataType(mt.toLocal), lang)
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -241,7 +241,7 @@ object ScanMailboxTask {
|
||||
def submitMail(upload: OUpload[F])(mail: Mail[F]): F[OUpload.UploadResult] = {
|
||||
val file = OUpload.File(
|
||||
Some(mail.header.subject + ".eml"),
|
||||
Some(MimeType.eml),
|
||||
Some(MimeType.emls.head),
|
||||
mail.toByteStream
|
||||
)
|
||||
for {
|
||||
|
@ -0,0 +1,27 @@
|
||||
package docspell.store.syntax
|
||||
|
||||
import bitpeace.Mimetype
|
||||
import docspell.common._
|
||||
|
||||
object MimeTypes {
|
||||
|
||||
|
||||
implicit final class BitpeaceMimeTypeOps(bmt: Mimetype) {
|
||||
|
||||
def toLocal: MimeType =
|
||||
MimeType(bmt.primary, bmt.sub, bmt.params)
|
||||
}
|
||||
|
||||
implicit final class EmilMimeTypeOps(emt: emil.MimeType) {
|
||||
def toLocal: MimeType =
|
||||
MimeType(emt.primary, emt.sub, emt.params)
|
||||
}
|
||||
|
||||
implicit final class DocspellMimeTypeOps(mt: MimeType) {
|
||||
def toEmil: emil.MimeType =
|
||||
emil.MimeType(mt.primary, mt.sub, mt.params)
|
||||
|
||||
def toBitpeace: Mimetype =
|
||||
Mimetype(mt.primary, mt.sub, mt.params)
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user