mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Use existing mimetype detection when storing files
This commit is contained in:
parent
1761526e20
commit
071f4067bf
@ -381,7 +381,7 @@ val store = project
|
|||||||
libraryDependencies ++=
|
libraryDependencies ++=
|
||||||
Dependencies.testContainer.map(_ % Test)
|
Dependencies.testContainer.map(_ % Test)
|
||||||
)
|
)
|
||||||
.dependsOn(common, query.jvm, totp)
|
.dependsOn(common, query.jvm, totp, files)
|
||||||
|
|
||||||
val extract = project
|
val extract = project
|
||||||
.in(file("modules/extract"))
|
.in(file("modules/extract"))
|
||||||
|
@ -24,6 +24,7 @@ import org.apache.tika.config.TikaConfig
|
|||||||
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
|
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties}
|
||||||
import org.apache.tika.mime.MediaType
|
import org.apache.tika.mime.MediaType
|
||||||
import org.apache.tika.parser.txt.Icu4jEncodingDetector
|
import org.apache.tika.parser.txt.Icu4jEncodingDetector
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
object TikaMimetype {
|
object TikaMimetype {
|
||||||
private val tika = new TikaConfig().getDetector
|
private val tika = new TikaConfig().getDetector
|
||||||
@ -83,6 +84,9 @@ object TikaMimetype {
|
|||||||
def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] =
|
def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] =
|
||||||
data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint))
|
data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint))
|
||||||
|
|
||||||
|
def detect(data: ByteVector, hint: MimeTypeHint): MimeType =
|
||||||
|
fromBytes(data.toArray, hint)
|
||||||
|
|
||||||
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
|
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
|
||||||
dt match {
|
dt match {
|
||||||
case DataType.Exact(mt) =>
|
case DataType.Exact(mt) =>
|
||||||
|
@ -13,12 +13,13 @@ import cats.effect._
|
|||||||
import fs2.{Pipe, Stream}
|
import fs2.{Pipe, Stream}
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
import docspell.files.TikaMimetype
|
||||||
import docspell.store.records.RFileMeta
|
import docspell.store.records.RFileMeta
|
||||||
|
|
||||||
import binny._
|
import binny._
|
||||||
import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
|
import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
|
||||||
import binny.tika.TikaContentTypeDetect
|
|
||||||
import doobie._
|
import doobie._
|
||||||
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
trait FileStore[F[_]] {
|
trait FileStore[F[_]] {
|
||||||
|
|
||||||
@ -42,8 +43,9 @@ object FileStore {
|
|||||||
chunkSize: Int
|
chunkSize: Int
|
||||||
): FileStore[F] = {
|
): FileStore[F] = {
|
||||||
val attrStore = new AttributeStore[F](xa)
|
val attrStore = new AttributeStore[F](xa)
|
||||||
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default)
|
val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect)
|
||||||
val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore)
|
val log = Logger.log4s[F](logger)
|
||||||
|
val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore)
|
||||||
new Impl[F](binStore, attrStore)
|
new Impl[F](binStore, attrStore)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,27 +68,24 @@ object FileStore {
|
|||||||
.andThen(_.map(bid => Ident.unsafe(bid.id)))
|
.andThen(_.map(bid => Ident.unsafe(bid.id)))
|
||||||
}
|
}
|
||||||
|
|
||||||
private object Log4sLogger {
|
private object LoggerAdapter {
|
||||||
|
def apply[F[_]](log: Logger[F]): binny.util.Logger[F] =
|
||||||
def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] =
|
|
||||||
new binny.util.Logger[F] {
|
new binny.util.Logger[F] {
|
||||||
override def trace(msg: => String): F[Unit] =
|
override def trace(msg: => String): F[Unit] = log.trace(msg)
|
||||||
Sync[F].delay(log.trace(msg))
|
override def debug(msg: => String): F[Unit] = log.debug(msg)
|
||||||
|
override def info(msg: => String): F[Unit] = log.info(msg)
|
||||||
override def debug(msg: => String): F[Unit] =
|
override def warn(msg: => String): F[Unit] = log.warn(msg)
|
||||||
Sync[F].delay(log.debug(msg))
|
override def error(msg: => String): F[Unit] = log.error(msg)
|
||||||
|
override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg)
|
||||||
override def info(msg: => String): F[Unit] =
|
|
||||||
Sync[F].delay(log.info(msg))
|
|
||||||
|
|
||||||
override def warn(msg: => String): F[Unit] =
|
|
||||||
Sync[F].delay(log.warn(msg))
|
|
||||||
|
|
||||||
override def error(msg: => String): F[Unit] =
|
|
||||||
Sync[F].delay(log.error(msg))
|
|
||||||
|
|
||||||
override def error(ex: Throwable)(msg: => String): F[Unit] =
|
|
||||||
Sync[F].delay(log.error(ex)(msg))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private object TikaContentTypeDetect extends ContentTypeDetect {
|
||||||
|
override def detect(data: ByteVector, hint: Hint): SimpleContentType =
|
||||||
|
SimpleContentType(
|
||||||
|
TikaMimetype
|
||||||
|
.detect(data, MimeTypeHint(hint.filename, hint.advertisedType))
|
||||||
|
.asString
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -275,8 +275,7 @@ object Dependencies {
|
|||||||
|
|
||||||
val binny = Seq(
|
val binny = Seq(
|
||||||
"com.github.eikek" %% "binny-core" % BinnyVersion,
|
"com.github.eikek" %% "binny-core" % BinnyVersion,
|
||||||
"com.github.eikek" %% "binny-jdbc" % BinnyVersion,
|
"com.github.eikek" %% "binny-jdbc" % BinnyVersion
|
||||||
"com.github.eikek" %% "binny-tika-detect" % BinnyVersion
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// https://github.com/flyway/flyway
|
// https://github.com/flyway/flyway
|
||||||
|
Loading…
x
Reference in New Issue
Block a user