mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-02-15 20:33:26 +00:00
Use existing mimetype detection when storing files
This commit is contained in:
parent
1761526e20
commit
071f4067bf
@ -381,7 +381,7 @@ val store = project
|
||||
libraryDependencies ++=
|
||||
Dependencies.testContainer.map(_ % Test)
|
||||
)
|
||||
.dependsOn(common, query.jvm, totp)
|
||||
.dependsOn(common, query.jvm, totp, files)
|
||||
|
||||
val extract = project
|
||||
.in(file("modules/extract"))
|
||||
|
@ -24,6 +24,7 @@ import org.apache.tika.config.TikaConfig
|
||||
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
|
||||
import org.apache.tika.mime.MediaType
|
||||
import org.apache.tika.parser.txt.Icu4jEncodingDetector
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
object TikaMimetype {
|
||||
private val tika = new TikaConfig().getDetector
|
||||
@ -83,6 +84,9 @@ object TikaMimetype {
|
||||
def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] =
|
||||
data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint))
|
||||
|
||||
def detect(data: ByteVector, hint: MimeTypeHint): MimeType =
|
||||
fromBytes(data.toArray, hint)
|
||||
|
||||
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
|
||||
dt match {
|
||||
case DataType.Exact(mt) =>
|
||||
|
@ -13,12 +13,13 @@ import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.common._
|
||||
import docspell.files.TikaMimetype
|
||||
import docspell.store.records.RFileMeta
|
||||
|
||||
import binny._
|
||||
import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
|
||||
import binny.tika.TikaContentTypeDetect
|
||||
import doobie._
|
||||
import scodec.bits.ByteVector
|
||||
|
||||
trait FileStore[F[_]] {
|
||||
|
||||
@ -42,8 +43,9 @@ object FileStore {
|
||||
chunkSize: Int
|
||||
): FileStore[F] = {
|
||||
val attrStore = new AttributeStore[F](xa)
|
||||
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default)
|
||||
val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore)
|
||||
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect)
|
||||
val log = Logger.log4s[F](logger)
|
||||
val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore)
|
||||
new Impl[F](binStore, attrStore)
|
||||
}
|
||||
|
||||
@ -66,27 +68,24 @@ object FileStore {
|
||||
.andThen(_.map(bid => Ident.unsafe(bid.id)))
|
||||
}
|
||||
|
||||
private object Log4sLogger {
|
||||
|
||||
def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] =
|
||||
private object LoggerAdapter {
|
||||
def apply[F[_]](log: Logger[F]): binny.util.Logger[F] =
|
||||
new binny.util.Logger[F] {
|
||||
override def trace(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.trace(msg))
|
||||
|
||||
override def debug(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.debug(msg))
|
||||
|
||||
override def info(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.info(msg))
|
||||
|
||||
override def warn(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.warn(msg))
|
||||
|
||||
override def error(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.error(msg))
|
||||
|
||||
override def error(ex: Throwable)(msg: => String): F[Unit] =
|
||||
Sync[F].delay(log.error(ex)(msg))
|
||||
override def trace(msg: => String): F[Unit] = log.trace(msg)
|
||||
override def debug(msg: => String): F[Unit] = log.debug(msg)
|
||||
override def info(msg: => String): F[Unit] = log.info(msg)
|
||||
override def warn(msg: => String): F[Unit] = log.warn(msg)
|
||||
override def error(msg: => String): F[Unit] = log.error(msg)
|
||||
override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg)
|
||||
}
|
||||
}
|
||||
|
||||
private object TikaContentTypeDetect extends ContentTypeDetect {
|
||||
override def detect(data: ByteVector, hint: Hint): SimpleContentType =
|
||||
SimpleContentType(
|
||||
TikaMimetype
|
||||
.detect(data, MimeTypeHint(hint.filename, hint.advertisedType))
|
||||
.asString
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -275,8 +275,7 @@ object Dependencies {
|
||||
|
||||
val binny = Seq(
|
||||
"com.github.eikek" %% "binny-core" % BinnyVersion,
|
||||
"com.github.eikek" %% "binny-jdbc" % BinnyVersion,
|
||||
"com.github.eikek" %% "binny-tika-detect" % BinnyVersion
|
||||
"com.github.eikek" %% "binny-jdbc" % BinnyVersion
|
||||
)
|
||||
|
||||
// https://github.com/flyway/flyway
|
||||
|
Loading…
Reference in New Issue
Block a user