mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Simplify MimeType class and parse mimetypes in a more lenient way
This commit is contained in:
@ -9,6 +9,8 @@ package docspell.common
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
|
||||
import docspell.common.syntax.all._
|
||||
@ -16,33 +18,31 @@ import docspell.common.syntax.all._
|
||||
import io.circe.{Decoder, Encoder}
|
||||
|
||||
/** A MIME Type impl with just enough features for the use here. */
|
||||
case class MimeType(primary: String, sub: String, params: Map[String, String]) {
|
||||
def withParam(name: String, value: String): MimeType =
|
||||
copy(params = params.updated(name, value))
|
||||
case class MimeType(primary: String, sub: String, charset: Option[Charset]) {
|
||||
|
||||
def withCharset(cs: Charset): MimeType =
|
||||
withParam("charset", cs.name())
|
||||
copy(charset = Some(cs))
|
||||
|
||||
def withUtf8Charset: MimeType =
|
||||
withCharset(StandardCharsets.UTF_8)
|
||||
|
||||
def resolveCharset: Option[Charset] =
|
||||
params.get("charset").flatMap { cs =>
|
||||
if (Charset.isSupported(cs)) Some(Charset.forName(cs))
|
||||
else None
|
||||
}
|
||||
def withCharsetName(csName: String): MimeType =
|
||||
if (Try(Charset.isSupported(csName)).getOrElse(false))
|
||||
withCharset(Charset.forName(csName))
|
||||
else this
|
||||
|
||||
def charsetOrUtf8: Charset =
|
||||
resolveCharset.getOrElse(StandardCharsets.UTF_8)
|
||||
charset.getOrElse(StandardCharsets.UTF_8)
|
||||
|
||||
def baseType: MimeType =
|
||||
if (params.isEmpty) this else copy(params = Map.empty)
|
||||
if (charset.isEmpty) this else copy(charset = None)
|
||||
|
||||
def asString: String =
|
||||
if (params.isEmpty) s"$primary/$sub"
|
||||
else {
|
||||
val parameters = params.toList.map(t => s"""${t._1}="${t._2}"""").mkString(";")
|
||||
s"$primary/$sub; $parameters"
|
||||
charset match {
|
||||
case Some(cs) =>
|
||||
s"$primary/$sub; charset=\"${cs.name()}\""
|
||||
case None =>
|
||||
s"$primary/$sub"
|
||||
}
|
||||
|
||||
def matches(other: MimeType): Boolean =
|
||||
@ -53,46 +53,16 @@ case class MimeType(primary: String, sub: String, params: Map[String, String]) {
|
||||
object MimeType {
|
||||
|
||||
def application(sub: String): MimeType =
|
||||
MimeType("application", sub, Map.empty)
|
||||
MimeType("application", sub, None)
|
||||
|
||||
def text(sub: String): MimeType =
|
||||
MimeType("text", sub, Map.empty)
|
||||
MimeType("text", sub, None)
|
||||
|
||||
def image(sub: String): MimeType =
|
||||
MimeType("image", sub, Map.empty)
|
||||
MimeType("image", sub, None)
|
||||
|
||||
def parse(str: String): Either[String, MimeType] = {
|
||||
def parsePrimary: Either[String, (String, String)] =
|
||||
str.indexOf('/') match {
|
||||
case -1 => Left(s"Invalid mediatype: $str")
|
||||
case n => Right(str.take(n) -> str.drop(n + 1))
|
||||
}
|
||||
|
||||
def parseSub(s: String): Either[String, (String, String)] =
|
||||
s.indexOf(';') match {
|
||||
case -1 => Right((s, ""))
|
||||
case n => Right((s.take(n), s.drop(n)))
|
||||
}
|
||||
|
||||
def parseParams(s: String): Map[String, String] =
|
||||
s.split(';')
|
||||
.map(_.trim)
|
||||
.filter(_.nonEmpty)
|
||||
.toList
|
||||
.flatMap(p =>
|
||||
p.split("=", 2).toList match {
|
||||
case a :: b :: Nil => Some((a, b))
|
||||
case _ => None
|
||||
}
|
||||
)
|
||||
.toMap
|
||||
|
||||
for {
|
||||
pt <- parsePrimary
|
||||
st <- parseSub(pt._2)
|
||||
pa = parseParams(st._2)
|
||||
} yield MimeType(pt._1, st._1, pa)
|
||||
}
|
||||
def parse(str: String): Either[String, MimeType] =
|
||||
Parser.parse(str)
|
||||
|
||||
def unsafe(str: String): MimeType =
|
||||
parse(str).throwLeft
|
||||
@ -105,8 +75,9 @@ object MimeType {
|
||||
val tiff = image("tiff")
|
||||
val html = text("html")
|
||||
val plain = text("plain")
|
||||
val json = application("json")
|
||||
val emls = NonEmptyList.of(
|
||||
MimeType("message", "rfc822", Map.empty),
|
||||
MimeType("message", "rfc822", None),
|
||||
application("mbox")
|
||||
)
|
||||
|
||||
@ -158,4 +129,88 @@ object MimeType {
|
||||
|
||||
implicit val jsonDecoder: Decoder[MimeType] =
|
||||
Decoder.decodeString.emap(parse)
|
||||
|
||||
private object Parser {
|
||||
def parse(s: String): Either[String, MimeType] =
|
||||
mimeType(s).map(_._1)
|
||||
|
||||
type Result[A] = Either[String, (A, String)]
|
||||
type P[A] = String => Result[A]
|
||||
|
||||
private[this] val tokenExtraChars = "+-$%*._~".toSet
|
||||
|
||||
private def seq[A, B, C](pa: P[A], pb: P[B])(f: (A, B) => C): P[C] =
|
||||
in =>
|
||||
pa(in) match {
|
||||
case Right((a, resta)) =>
|
||||
pb(resta) match {
|
||||
case Right((b, restb)) =>
|
||||
Right((f(a, b), restb))
|
||||
case left =>
|
||||
left.asInstanceOf[Result[C]]
|
||||
}
|
||||
case left =>
|
||||
left.asInstanceOf[Result[C]]
|
||||
}
|
||||
|
||||
private def takeWhile(p: Char => Boolean): P[String] =
|
||||
in => {
|
||||
val (prefix, suffix) = in.span(p)
|
||||
Right((prefix.trim, suffix.drop(1).trim))
|
||||
}
|
||||
|
||||
private def check[A](p: P[A], test: A => Boolean, err: => String): P[A] =
|
||||
in =>
|
||||
p(in) match {
|
||||
case r @ Right((a, _)) =>
|
||||
if (test(a)) r else Left(err)
|
||||
case left =>
|
||||
left
|
||||
}
|
||||
|
||||
//https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
|
||||
private def isToken(s: String): Boolean =
|
||||
s.nonEmpty && s.forall(c => c.isLetterOrDigit || tokenExtraChars.contains(c))
|
||||
|
||||
private val baseType: P[MimeType] = {
|
||||
val primary = check(
|
||||
takeWhile(_ != '/'),
|
||||
isToken,
|
||||
"Primary type must be non-empty and contain valid characters"
|
||||
)
|
||||
val sub = check(
|
||||
takeWhile(_ != ';'),
|
||||
isToken,
|
||||
"Subtype must be non-empty and contain valid characters"
|
||||
)
|
||||
seq(primary, sub)((p, s) => MimeType(p.toLowerCase, s.toLowerCase, None))
|
||||
}
|
||||
|
||||
//https://datatracker.ietf.org/doc/html/rfc2046#section-4.1.2
|
||||
private val charset: P[Option[Charset]] = in =>
|
||||
in.trim.toLowerCase.indexOf("charset=") match {
|
||||
case -1 => Right((None, in))
|
||||
case n =>
|
||||
val csValueStart = in.substring(n + "charset=".length).trim
|
||||
val csName = csValueStart.indexOf(';') match {
|
||||
case -1 => unquote(csValueStart).trim
|
||||
case n => unquote(csValueStart.substring(0, n)).trim
|
||||
}
|
||||
if (Charset.isSupported(csName)) Right((Some(Charset.forName(csName)), ""))
|
||||
else Right((None, ""))
|
||||
}
|
||||
|
||||
private val mimeType =
|
||||
seq(baseType, charset)((bt, cs) => bt.copy(charset = cs))
|
||||
|
||||
private def unquote(s: String): String = {
|
||||
val len = s.length
|
||||
if (len == 0 || len == 1) s
|
||||
else {
|
||||
if (s.charAt(0) == '"' && s.charAt(len - 1) == '"')
|
||||
unquote(s.substring(1, len - 1))
|
||||
else s
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -10,10 +10,7 @@ trait EitherSyntax {
|
||||
|
||||
implicit final class LeftStringEitherOps[A](e: Either[String, A]) {
|
||||
def throwLeft: A =
|
||||
e match {
|
||||
case Right(a) => a
|
||||
case Left(err) => sys.error(err)
|
||||
}
|
||||
e.fold(sys.error, identity)
|
||||
}
|
||||
|
||||
implicit final class ThrowableLeftEitherOps[A](e: Either[Throwable, A]) {
|
||||
|
128
modules/common/src/test/scala/docspell/common/MimeTypeTest.scala
Normal file
128
modules/common/src/test/scala/docspell/common/MimeTypeTest.scala
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.common
|
||||
|
||||
import java.nio.charset.{Charset, StandardCharsets}
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
|
||||
import munit.ScalaCheckSuite
|
||||
import org.scalacheck.Gen
|
||||
import org.scalacheck.Prop.forAll
|
||||
|
||||
class MimeTypeTest extends ScalaCheckSuite {
|
||||
|
||||
test("asString") {
|
||||
assertEquals(MimeType.html.asString, "text/html")
|
||||
assertEquals(
|
||||
MimeType.html.withCharset(StandardCharsets.ISO_8859_1).asString,
|
||||
"text/html; charset=\"ISO-8859-1\""
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.html.withUtf8Charset.asString,
|
||||
"text/html; charset=\"UTF-8\""
|
||||
)
|
||||
}
|
||||
|
||||
test("parse without params") {
|
||||
assertEquals(MimeType.unsafe("application/pdf"), MimeType.pdf)
|
||||
assertEquals(MimeType.unsafe("image/jpeg"), MimeType.jpeg)
|
||||
|
||||
assertEquals(MimeType.unsafe("image/jpeg "), MimeType.jpeg)
|
||||
assertEquals(MimeType.unsafe(" image/jpeg "), MimeType.jpeg)
|
||||
assertEquals(MimeType.unsafe(" image / jpeg "), MimeType.jpeg)
|
||||
|
||||
assertEquals(
|
||||
MimeType.unsafe("application/xml+html"),
|
||||
MimeType.application("xml+html")
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml"
|
||||
),
|
||||
MimeType.application(
|
||||
"vnd.openxmlformats-officedocument.presentationml.viewprops+xml"
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("application/vnd.powerbuilder75-s"),
|
||||
MimeType.application("vnd.powerbuilder75-s")
|
||||
)
|
||||
}
|
||||
|
||||
test("parse with charset") {
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; charset=UTF-8"),
|
||||
MimeType.plain.withUtf8Charset
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; CHARSET=UTF-8"),
|
||||
MimeType.plain.withUtf8Charset
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; CharSet=UTF-8"),
|
||||
MimeType.plain.withUtf8Charset
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/html; charset=\"ISO-8859-1\""),
|
||||
MimeType.html.withCharset(StandardCharsets.ISO_8859_1)
|
||||
)
|
||||
}
|
||||
|
||||
test("parse with charset and more params") {
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; charset=UTF-8; action=test"),
|
||||
MimeType.plain.withUtf8Charset
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; run=\"2\"; charset=UTF-8; action=test"),
|
||||
MimeType.plain.withUtf8Charset
|
||||
)
|
||||
}
|
||||
|
||||
test("parse without charset but params") {
|
||||
assertEquals(MimeType.unsafe("image/jpeg; action=urn:2"), MimeType.jpeg)
|
||||
}
|
||||
|
||||
test("parse some stranger values") {
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; charset=\"\"ISO-8859-1\"\""),
|
||||
MimeType.plain.withCharset(StandardCharsets.ISO_8859_1)
|
||||
)
|
||||
assertEquals(
|
||||
MimeType.unsafe("text/plain; charset=\"\" ISO-8859-1 \"\""),
|
||||
MimeType.plain.withCharset(StandardCharsets.ISO_8859_1)
|
||||
)
|
||||
}
|
||||
|
||||
test("parse invalid mime types") {
|
||||
assert(MimeType.parse("").isLeft)
|
||||
assert(MimeType.parse("_ _/plain").isLeft)
|
||||
assert(MimeType.parse("/").isLeft)
|
||||
assert(MimeType.parse("()").isLeft)
|
||||
}
|
||||
|
||||
property("read own asString") {
|
||||
forAll(MimeTypeTest.mimeType) { mt: MimeType =>
|
||||
assertEquals(MimeType.unsafe(mt.asString), mt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object MimeTypeTest {
|
||||
val someTypes = List(
|
||||
MimeType.plain,
|
||||
MimeType.html
|
||||
) ++ MimeType.emls.toList
|
||||
|
||||
val mimeType =
|
||||
for {
|
||||
base <- Gen.atLeastOne(someTypes)
|
||||
cs <- Gen.someOf(Charset.availableCharsets().values().asScala)
|
||||
} yield base.head.copy(charset = cs.headOption)
|
||||
|
||||
}
|
Reference in New Issue
Block a user