Use existing mimetype detection when storing files

This commit is contained in:
eikek 2021-09-23 09:27:04 +02:00
parent 1761526e20
commit 071f4067bf
4 changed files with 28 additions and 26 deletions

View File

@ -381,7 +381,7 @@ val store = project
libraryDependencies ++=
Dependencies.testContainer.map(_ % Test)
)
.dependsOn(common, query.jvm, totp)
.dependsOn(common, query.jvm, totp, files)
val extract = project
.in(file("modules/extract"))

View File

@ -24,6 +24,7 @@ import org.apache.tika.config.TikaConfig
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.txt.Icu4jEncodingDetector
import scodec.bits.ByteVector
object TikaMimetype {
private val tika = new TikaConfig().getDetector
@ -83,6 +84,9 @@ object TikaMimetype {
def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] =
data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint))
def detect(data: ByteVector, hint: MimeTypeHint): MimeType =
fromBytes(data.toArray, hint)
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
dt match {
case DataType.Exact(mt) =>

View File

@ -13,12 +13,13 @@ import cats.effect._
import fs2.{Pipe, Stream}
import docspell.common._
import docspell.files.TikaMimetype
import docspell.store.records.RFileMeta
import binny._
import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
import binny.tika.TikaContentTypeDetect
import doobie._
import scodec.bits.ByteVector
trait FileStore[F[_]] {
@ -42,8 +43,9 @@ object FileStore {
chunkSize: Int
): FileStore[F] = {
val attrStore = new AttributeStore[F](xa)
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default)
val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore)
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect)
val log = Logger.log4s[F](logger)
val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore)
new Impl[F](binStore, attrStore)
}
@ -66,27 +68,24 @@ object FileStore {
.andThen(_.map(bid => Ident.unsafe(bid.id)))
}
private object Log4sLogger {
def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] =
private object LoggerAdapter {
def apply[F[_]](log: Logger[F]): binny.util.Logger[F] =
new binny.util.Logger[F] {
override def trace(msg: => String): F[Unit] =
Sync[F].delay(log.trace(msg))
override def debug(msg: => String): F[Unit] =
Sync[F].delay(log.debug(msg))
override def info(msg: => String): F[Unit] =
Sync[F].delay(log.info(msg))
override def warn(msg: => String): F[Unit] =
Sync[F].delay(log.warn(msg))
override def error(msg: => String): F[Unit] =
Sync[F].delay(log.error(msg))
override def error(ex: Throwable)(msg: => String): F[Unit] =
Sync[F].delay(log.error(ex)(msg))
override def trace(msg: => String): F[Unit] = log.trace(msg)
override def debug(msg: => String): F[Unit] = log.debug(msg)
override def info(msg: => String): F[Unit] = log.info(msg)
override def warn(msg: => String): F[Unit] = log.warn(msg)
override def error(msg: => String): F[Unit] = log.error(msg)
override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg)
}
}
private object TikaContentTypeDetect extends ContentTypeDetect {
override def detect(data: ByteVector, hint: Hint): SimpleContentType =
SimpleContentType(
TikaMimetype
.detect(data, MimeTypeHint(hint.filename, hint.advertisedType))
.asString
)
}
}

View File

@ -275,8 +275,7 @@ object Dependencies {
val binny = Seq(
"com.github.eikek" %% "binny-core" % BinnyVersion,
"com.github.eikek" %% "binny-jdbc" % BinnyVersion,
"com.github.eikek" %% "binny-tika-detect" % BinnyVersion
"com.github.eikek" %% "binny-jdbc" % BinnyVersion
)
// https://github.com/flyway/flyway