diff --git a/build.sbt b/build.sbt index 09001f92..bd15be3f 100644 --- a/build.sbt +++ b/build.sbt @@ -381,7 +381,7 @@ val store = project libraryDependencies ++= Dependencies.testContainer.map(_ % Test) ) - .dependsOn(common, query.jvm, totp) + .dependsOn(common, query.jvm, totp, files) val extract = project .in(file("modules/extract")) diff --git a/modules/files/src/main/scala/docspell/files/TikaMimetype.scala b/modules/files/src/main/scala/docspell/files/TikaMimetype.scala index 24c5bd54..74eedb60 100644 --- a/modules/files/src/main/scala/docspell/files/TikaMimetype.scala +++ b/modules/files/src/main/scala/docspell/files/TikaMimetype.scala @@ -24,6 +24,7 @@ import org.apache.tika.config.TikaConfig import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaCoreProperties} import org.apache.tika.mime.MediaType import org.apache.tika.parser.txt.Icu4jEncodingDetector +import scodec.bits.ByteVector object TikaMimetype { private val tika = new TikaConfig().getDetector @@ -83,6 +84,9 @@ object TikaMimetype { def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] = data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint)) + def detect(data: ByteVector, hint: MimeTypeHint): MimeType = + fromBytes(data.toArray, hint) + def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] = dt match { case DataType.Exact(mt) => diff --git a/modules/store/src/main/scala/docspell/store/file/FileStore.scala b/modules/store/src/main/scala/docspell/store/file/FileStore.scala index 295d4bdf..3afd4216 100644 --- a/modules/store/src/main/scala/docspell/store/file/FileStore.scala +++ b/modules/store/src/main/scala/docspell/store/file/FileStore.scala @@ -13,12 +13,13 @@ import cats.effect._ import fs2.{Pipe, Stream} import docspell.common._ +import docspell.files.TikaMimetype import docspell.store.records.RFileMeta import binny._ import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} -import binny.tika.TikaContentTypeDetect import doobie._ +import scodec.bits.ByteVector trait FileStore[F[_]] { @@ -42,8 +43,9 @@ object FileStore { chunkSize: Int ): FileStore[F] = { val attrStore = new AttributeStore[F](xa) - val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default) - val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore) + val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect) + val log = Logger.log4s[F](logger) + val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore) new Impl[F](binStore, attrStore) } @@ -66,27 +68,24 @@ object FileStore { .andThen(_.map(bid => Ident.unsafe(bid.id))) } - private object Log4sLogger { - - def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] = + private object LoggerAdapter { + def apply[F[_]](log: Logger[F]): binny.util.Logger[F] = new binny.util.Logger[F] { - override def trace(msg: => String): F[Unit] = - Sync[F].delay(log.trace(msg)) - - override def debug(msg: => String): F[Unit] = - Sync[F].delay(log.debug(msg)) - - override def info(msg: => String): F[Unit] = - Sync[F].delay(log.info(msg)) - - override def warn(msg: => String): F[Unit] = - Sync[F].delay(log.warn(msg)) - - override def error(msg: => String): F[Unit] = - Sync[F].delay(log.error(msg)) - - override def error(ex: Throwable)(msg: => String): F[Unit] = - Sync[F].delay(log.error(ex)(msg)) + override def trace(msg: => String): F[Unit] = log.trace(msg) + override def debug(msg: => String): F[Unit] = log.debug(msg) + override def info(msg: => String): F[Unit] = log.info(msg) + override def warn(msg: => String): F[Unit] = log.warn(msg) + override def error(msg: => String): F[Unit] = log.error(msg) + override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg) } } + + private object TikaContentTypeDetect extends ContentTypeDetect { + override def detect(data: ByteVector, hint: Hint): SimpleContentType = + SimpleContentType( + TikaMimetype + .detect(data, MimeTypeHint(hint.filename, hint.advertisedType)) + .asString + ) + } } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 3494f845..a9748e69 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -275,8 +275,7 @@ object Dependencies { val binny = Seq( "com.github.eikek" %% "binny-core" % BinnyVersion, - "com.github.eikek" %% "binny-jdbc" % BinnyVersion, - "com.github.eikek" %% "binny-tika-detect" % BinnyVersion + "com.github.eikek" %% "binny-jdbc" % BinnyVersion ) // https://github.com/flyway/flyway