mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Simplify MimeType class and parse mimetypes in a more lenient way
This commit is contained in:
@ -31,10 +31,10 @@ object TikaMimetype {
|
||||
private def convert(mt: MediaType): MimeType =
|
||||
Option(mt) match {
|
||||
case Some(_) =>
|
||||
val params = mt.getParameters.asScala.toMap
|
||||
val cs = mt.getParameters.asScala.toMap.get("charset").getOrElse("unknown")
|
||||
val primary = mt.getType
|
||||
val sub = mt.getSubtype
|
||||
normalize(MimeType(primary, sub, params))
|
||||
normalize(MimeType(primary, sub, None).withCharsetName(cs))
|
||||
case None =>
|
||||
MimeType.octetStream
|
||||
}
|
||||
@ -48,8 +48,8 @@ object TikaMimetype {
|
||||
|
||||
private def normalize(in: MimeType): MimeType =
|
||||
in match {
|
||||
case MimeType(_, sub, p) if sub contains "xhtml" =>
|
||||
MimeType.html.copy(params = p)
|
||||
case MimeType(_, sub, cs) if sub contains "xhtml" =>
|
||||
MimeType.html.copy(charset = cs)
|
||||
case _ => in
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ object TikaMimetype {
|
||||
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
|
||||
dt match {
|
||||
case DataType.Exact(mt) =>
|
||||
mt.resolveCharset match {
|
||||
mt.charset match {
|
||||
case None if mt.primary == "text" =>
|
||||
detectCharset[F](data, MimeTypeHint.advertised(mt))
|
||||
.map {
|
||||
|
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright 2020 Eike K. & Contributors
|
||||
*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
package docspell.files
|
||||
|
||||
import docspell.common.{MimeType, MimeTypeHint}
|
||||
|
||||
import munit.FunSuite
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
class TikaMimetypeTest extends FunSuite {
|
||||
|
||||
private def detect(bv: ByteVector, hint: MimeTypeHint): MimeType =
|
||||
TikaMimetype.detect(bv, hint)
|
||||
|
||||
test("detect text/plain") {
|
||||
val mt = detect(ByteVector.view("hello world".getBytes), MimeTypeHint.none)
|
||||
assertEquals(mt.baseType, MimeType.plain)
|
||||
}
|
||||
|
||||
test("detect image/jpeg") {
|
||||
val mt = detect(
|
||||
ByteVector.fromValidBase64("/9j/4AAQSkZJRgABAgAAZABkAAA="),
|
||||
MimeTypeHint.none
|
||||
)
|
||||
assertEquals(mt, MimeType.jpeg)
|
||||
}
|
||||
|
||||
test("detect image/png") {
|
||||
val mt = detect(
|
||||
ByteVector.fromValidBase64("iVBORw0KGgoAAAANSUhEUgAAA2I="),
|
||||
MimeTypeHint.none
|
||||
)
|
||||
assertEquals(mt, MimeType.png)
|
||||
}
|
||||
|
||||
test("detect application/json") {
|
||||
val mt =
|
||||
detect(
|
||||
ByteVector.view("""{"name":"me"}""".getBytes),
|
||||
MimeTypeHint.filename("me.json")
|
||||
)
|
||||
assertEquals(mt, MimeType.json)
|
||||
}
|
||||
|
||||
test("detect application/json") {
|
||||
val mt = detect(
|
||||
ByteVector.view("""{"name":"me"}""".getBytes),
|
||||
MimeTypeHint.advertised("application/json")
|
||||
)
|
||||
assertEquals(mt, MimeType.json)
|
||||
}
|
||||
|
||||
test("detect image/jpeg wrong advertised") {
|
||||
val mt = detect(
|
||||
ByteVector.fromValidBase64("/9j/4AAQSkZJRgABAgAAZABkAAA="),
|
||||
MimeTypeHint.advertised("image/png")
|
||||
)
|
||||
assertEquals(mt, MimeType.jpeg)
|
||||
}
|
||||
|
||||
test("just filename") {
|
||||
assertEquals(
|
||||
detect(ByteVector.empty, MimeTypeHint.filename("doc.pdf")),
|
||||
MimeType.pdf
|
||||
)
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user