diff --git a/build.sbt b/build.sbt index 373b4a77..66a7f90a 100644 --- a/build.sbt +++ b/build.sbt @@ -368,7 +368,7 @@ val store = project name := "docspell-store", libraryDependencies ++= Dependencies.doobie ++ - Dependencies.bitpeace ++ + Dependencies.binny ++ Dependencies.tika ++ Dependencies.fs2 ++ Dependencies.databases ++ diff --git a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala index 6fd185a5..cda6bd81 100644 --- a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala +++ b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala @@ -70,7 +70,7 @@ object BackendApp { tagImpl <- OTag[F](store) equipImpl <- OEquipment[F](store) orgImpl <- OOrganization(store) - uploadImpl <- OUpload(store, queue, cfg.files, joexImpl) + uploadImpl <- OUpload(store, queue, joexImpl) nodeImpl <- ONode(store) jobImpl <- OJob(store, joexImpl) createIndex <- CreateIndex.resource(ftsClient, store) @@ -115,7 +115,7 @@ object BackendApp { httpClientEc: ExecutionContext )(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] = for { - store <- Store.create(cfg.jdbc, connectEC) + store <- Store.create(cfg.jdbc, cfg.files.chunkSize, connectEC) httpClient <- BlazeClientBuilder[F](httpClientEc).resource ftsClient <- ftsFactory(httpClient) backend <- create(cfg, store, httpClient, ftsClient) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala index cc1024c8..24342072 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala @@ -17,7 +17,6 @@ import docspell.store._ import docspell.store.queries.{QAttachment, QItem} import docspell.store.records._ -import bitpeace.{FileMeta, RangeDef} import doobie.implicits._ trait OItemSearch[F[_]] { @@ -90,10 +89,10 @@ object OItemSearch { trait BinaryData[F[_]] { def data: Stream[F, Byte] def name: Option[String] - def meta: FileMeta + def meta: RFileMeta def fileId: Ident } - case class AttachmentData[F[_]](ra: RAttachment, meta: FileMeta, data: Stream[F, Byte]) + case class AttachmentData[F[_]](ra: RAttachment, meta: RFileMeta, data: Stream[F, Byte]) extends BinaryData[F] { val name = ra.name val fileId = ra.fileId @@ -101,7 +100,7 @@ object OItemSearch { case class AttachmentSourceData[F[_]]( rs: RAttachmentSource, - meta: FileMeta, + meta: RFileMeta, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -110,7 +109,7 @@ object OItemSearch { case class AttachmentPreviewData[F[_]]( rs: RAttachmentPreview, - meta: FileMeta, + meta: RFileMeta, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -119,7 +118,7 @@ object OItemSearch { case class AttachmentArchiveData[F[_]]( rs: RAttachmentArchive, - meta: FileMeta, + meta: RFileMeta, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -189,7 +188,7 @@ object OItemSearch { AttachmentData[F]( ra, m, - store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + store.fileStore.getBytes(m.id) ) } @@ -209,7 +208,7 @@ object OItemSearch { AttachmentSourceData[F]( ra, m, - store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + store.fileStore.getBytes(m.id) ) } @@ -229,7 +228,7 @@ object OItemSearch { AttachmentPreviewData[F]( ra, m, - store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + store.fileStore.getBytes(m.id) ) } @@ -249,7 +248,7 @@ object OItemSearch { AttachmentPreviewData[F]( ra, m, - store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + store.fileStore.getBytes(m.id) ) } @@ -269,7 +268,7 @@ object OItemSearch { AttachmentArchiveData[F]( ra, m, - store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + store.fileStore.getBytes(m.id) ) } @@ -277,15 +276,11 @@ object OItemSearch { (None: Option[AttachmentArchiveData[F]]).pure[F] } - private def makeBinaryData[A](fileId: Ident)(f: FileMeta => A): F[Option[A]] = - store.bitpeace - .get(fileId.id) - .unNoneTerminate - .compile - .last - .map( - _.map(m => f(m)) - ) + private def makeBinaryData[A](fileId: Ident)(f: RFileMeta => A): F[Option[A]] = + store.fileStore + .findMeta(fileId) + .map(fm => f(fm)) + .value def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] = store.transact(QAttachment.getAttachmentMeta(id, collective)) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala index 6bfa06de..c4426469 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala @@ -9,7 +9,6 @@ package docspell.backend.ops import cats.data.OptionT import cats.effect._ import cats.implicits._ -import fs2.Stream import docspell.backend.ops.OMail._ import docspell.common._ @@ -18,7 +17,6 @@ import docspell.store.queries.QMails import docspell.store.records._ import docspell.store.syntax.MimeTypes._ -import bitpeace.{FileMeta, RangeDef} import emil._ trait OMail[F[_]] { @@ -81,14 +79,17 @@ object OMail { ) sealed trait AttachSelection { - def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)] + def filter(v: Vector[(RAttachment, RFileMeta)]): Vector[(RAttachment, RFileMeta)] } object AttachSelection { case object All extends AttachSelection { - def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)] = v + def filter(v: Vector[(RAttachment, RFileMeta)]): Vector[(RAttachment, RFileMeta)] = + v } case class Selected(ids: List[Ident]) extends AttachSelection { - def filter(v: Vector[(RAttachment, FileMeta)]): Vector[(RAttachment, FileMeta)] = { + def filter( + v: Vector[(RAttachment, RFileMeta)] + ): Vector[(RAttachment, RFileMeta)] = { val set = ids.toSet v.filter(set contains _._1.id) } @@ -232,10 +233,10 @@ object OMail { } yield { val addAttach = m.attach.filter(ras).map { a => Attach[F]( - Stream.emit(a._2).through(store.bitpeace.fetchData2(RangeDef.all)) + store.fileStore.getBytes(a._2.id) ).withFilename(a._1.name) - .withLength(a._2.length) - .withMimeType(a._2.mimetype.toLocal.toEmil) + .withLength(a._2.length.bytes) + .withMimeType(a._2.mimetype.toEmil) } val fields: Seq[Trans[F]] = Seq( From(sett.mailFrom), diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 04d57ce3..d78adc73 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -12,14 +12,13 @@ import cats.effect._ import cats.implicits._ import fs2.Stream -import docspell.backend.{Config, JobFactory} +import docspell.backend.JobFactory import docspell.common._ import docspell.common.syntax.all._ import docspell.store.Store import docspell.store.queue.JobQueue import docspell.store.records._ -import bitpeace.MimetypeHint import org.log4s._ trait OUpload[F[_]] { @@ -116,7 +115,6 @@ object OUpload { def apply[F[_]: Sync]( store: Store[F], queue: JobQueue[F], - cfg: Config.Files, joex: OJoex[F] ): Resource[F, OUpload[F]] = Resource.pure[F, OUpload[F]](new OUpload[F] { @@ -205,11 +203,10 @@ object OUpload { /** Saves the file into the database. */ private def saveFile(file: File[F]): F[Option[ProcessItemArgs.File]] = logger.finfo(s"Receiving file $file") *> - store.bitpeace - .saveNew(file.data, cfg.chunkSize, MimetypeHint(file.name, None), None) + file.data + .through(store.fileStore.save(MimeTypeHint(file.name, None))) .compile .lastOrError - .map(fm => Ident.unsafe(fm.id)) .attempt .map( _.fold( diff --git a/modules/common/src/main/scala/docspell/common/MimeType.scala b/modules/common/src/main/scala/docspell/common/MimeType.scala index eaf37e37..c693b35f 100644 --- a/modules/common/src/main/scala/docspell/common/MimeType.scala +++ b/modules/common/src/main/scala/docspell/common/MimeType.scala @@ -123,7 +123,10 @@ object MimeType { object HtmlMatch { def unapply(mt: MimeType): Option[MimeType] = - Some(mt).filter(_.matches(html)) + if ( + (mt.primary == "text" || mt.primary == "application") && mt.sub.contains("html") + ) Some(mt) + else None } object NonHtmlText { diff --git a/modules/common/src/main/scala/docspell/common/MimeTypeHint.scala b/modules/common/src/main/scala/docspell/common/MimeTypeHint.scala index 0f734471..55fbbda1 100644 --- a/modules/common/src/main/scala/docspell/common/MimeTypeHint.scala +++ b/modules/common/src/main/scala/docspell/common/MimeTypeHint.scala @@ -10,6 +10,9 @@ case class MimeTypeHint(filename: Option[String], advertised: Option[String]) { def withName(name: String): MimeTypeHint = copy(filename = Some(name)) + + def withAdvertised(advertised: String): MimeTypeHint = + copy(advertised = Some(advertised)) } object MimeTypeHint { diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 38c001c6..c6ad2cdd 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -468,7 +468,7 @@ Docpell Update Check # The chunk size used when storing files. This should be the same # as used with the rest server. - chunk-size = 524288 + chunk-size = ${docspell.joex.files.chunk-size} # A string used to change the filename of the converted pdf file. # If empty, the original file name is used for the pdf file ( the diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 626ba0fb..8f241245 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -122,12 +122,12 @@ object JoexAppImpl { for { httpClient <- BlazeClientBuilder[F](clientEC).resource client = JoexClient(httpClient) - store <- Store.create(cfg.jdbc, connectEC) + store <- Store.create(cfg.jdbc, cfg.files.chunkSize, connectEC) queue <- JobQueue(store) pstore <- PeriodicTaskStore.create(store) nodeOps <- ONode(store) joex <- OJoex(client, store) - upload <- OUpload(store, queue, cfg.files, joex) + upload <- OUpload(store, queue, joex) fts <- createFtsClient(cfg)(httpClient) createIndex <- CreateIndex.resource(fts, store) itemOps <- OItem(store, fts, createIndex, queue, joex) @@ -212,7 +212,7 @@ object JoexAppImpl { .withTask( JobTask.json( MakePreviewArgs.taskName, - MakePreviewTask[F](cfg.convert, cfg.extraction.preview), + MakePreviewTask[F](cfg.extraction.preview), MakePreviewTask.onCancel[F] ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala index 13800fc7..db33b251 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala @@ -17,8 +17,6 @@ import docspell.common._ import docspell.store.Store import docspell.store.records.RClassifierModel -import bitpeace.RangeDef - object Classify { def apply[F[_]: Async]( @@ -33,11 +31,7 @@ object Classify { _ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name} …")) model <- OptionT(store.transact(RClassifierModel.findByName(coll, cname.name))) .flatTapNone(logger.debug("No classifier model found.")) - modelData = - store.bitpeace - .get(model.fileId.id) - .unNoneTerminate - .through(store.bitpeace.fetchData2(RangeDef.all)) + modelData = store.fileStore.getBytes(model.fileId) cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir => val modelFile = dir.resolve("model.ser.gz") modelData diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala index 69b35a26..317a155a 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala @@ -90,8 +90,8 @@ object LearnClassifierTask { ) n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id))) _ <- list - .map(_.fileId.id) - .traverse(id => ctx.store.bitpeace.delete(id).compile.drain) + .map(_.fileId) + .traverse(id => ctx.store.fileStore.delete(id)) _ <- ctx.logger.debug(s"Deleted $n model files.") } yield () diff --git a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala index 7107b981..ac04f60a 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala @@ -16,8 +16,6 @@ import docspell.joex.scheduler._ import docspell.store.Store import docspell.store.records.RClassifierModel -import bitpeace.MimetypeHint - object StoreClassifierModel { def handleModel[F[_]: Async]( @@ -43,16 +41,16 @@ object StoreClassifierModel { ) _ <- logger.debug(s"Storing new trained model for: ${modelName.name}") fileData = Files[F].readAll(trainedModel.model) - newFile <- - store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError + newFileId <- + fileData.through(store.fileStore.save(MimeTypeHint.none)).compile.lastOrError _ <- store.transact( - RClassifierModel.updateFile(collective, modelName.name, Ident.unsafe(newFile.id)) + RClassifierModel.updateFile(collective, modelName.name, newFileId) ) - _ <- logger.debug(s"New model stored at file ${newFile.id}") + _ <- logger.debug(s"New model stored at file ${newFileId.id}") _ <- oldFile match { case Some(fid) => logger.debug(s"Deleting old model file ${fid.id}") *> - store.bitpeace.delete(fid.id).compile.drain + store.fileStore.delete(fid) case None => ().pure[F] } } yield () diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala index 0c9fe1b1..c1f9752a 100644 --- a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala @@ -19,10 +19,6 @@ import docspell.joex.Config import docspell.joex.scheduler.{Context, Task} import docspell.store.records._ -import bitpeace.FileMeta -import bitpeace.Mimetype -import bitpeace.MimetypeHint -import bitpeace.RangeDef import io.circe.generic.semiauto._ import io.circe.{Decoder, Encoder} @@ -55,8 +51,11 @@ object PdfConvTask { // --- Helper // check if file exists and if it is pdf and if source id is the same and if ocrmypdf is enabled - def checkInputs[F[_]: Sync](cfg: Config, ctx: Context[F, Args]): F[Option[FileMeta]] = { - val none: Option[FileMeta] = None + def checkInputs[F[_]: Sync]( + cfg: Config, + ctx: Context[F, Args] + ): F[Option[RFileMeta]] = { + val none: Option[RFileMeta] = None val checkSameFiles = (for { ra <- OptionT(ctx.store.transact(RAttachment.findById(ctx.args.attachId))) @@ -67,7 +66,7 @@ object PdfConvTask { val existsPdf = for { meta <- ctx.store.transact(RAttachment.findMeta(ctx.args.attachId)) - res = meta.filter(_.mimetype.matches(Mimetype.applicationPdf)) + res = meta.filter(_.mimetype.matches(MimeType.pdf)) _ <- if (res.isEmpty) ctx.logger.info( @@ -91,12 +90,10 @@ object PdfConvTask { def convert[F[_]: Async]( cfg: Config, ctx: Context[F, Args], - in: FileMeta + in: RFileMeta ): F[Unit] = { - val bp = ctx.store.bitpeace - val data = Stream - .emit(in) - .through(bp.fetchData2(RangeDef.all)) + val fs = ctx.store.fileStore + val data = fs.getBytes(in.id) val storeResult: ConversionResult.Handler[F, Unit] = Kleisli { @@ -122,7 +119,7 @@ object PdfConvTask { OcrMyPdf.toPDF[F, Unit]( cfg.convert.ocrmypdf, lang, - in.chunksize, + cfg.files.chunkSize, ctx.logger )(data, storeResult) @@ -140,18 +137,13 @@ object PdfConvTask { def storeToAttachment[F[_]: Sync]( ctx: Context[F, Args], - meta: FileMeta, + meta: RFileMeta, newFile: Stream[F, Byte] ): F[Unit] = { - val mimeHint = MimetypeHint.advertised(meta.mimetype.asString) + val mimeHint = MimeTypeHint.advertised(meta.mimetype) for { - time <- Timestamp.current[F] - fid <- Ident.randomId[F] - _ <- - ctx.store.bitpeace - .saveNew(newFile, meta.chunksize, mimeHint, Some(fid.id), time.value) - .compile - .lastOrError + fid <- + newFile.through(ctx.store.fileStore.save(mimeHint)).compile.lastOrError _ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) } yield () } diff --git a/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala index fcd733f8..55b801e7 100644 --- a/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala @@ -10,7 +10,6 @@ import cats.effect._ import cats.implicits._ import docspell.common._ -import docspell.convert.ConvertConfig import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PreviewConfig import docspell.joex.process.AttachmentPreview @@ -23,7 +22,7 @@ object MakePreviewTask { type Args = MakePreviewArgs - def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig): Task[F, Args, Unit] = + def apply[F[_]: Sync](pcfg: PreviewConfig): Task[F, Args, Unit] = Task { ctx => for { exists <- previewExists(ctx) @@ -36,7 +35,7 @@ object MakePreviewTask { else ctx.logger.info( s"Generating preview image for attachment ${ctx.args.attachment}" - ) *> generatePreview(ctx, preview, cfg) + ) *> generatePreview(ctx, preview) } yield () } @@ -45,13 +44,12 @@ object MakePreviewTask { private def generatePreview[F[_]: Sync]( ctx: Context[F, Args], - preview: PdfboxPreview[F], - cfg: ConvertConfig + preview: PdfboxPreview[F] ): F[Unit] = for { ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) _ <- ra - .map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize)) + .map(AttachmentPreview.createPreview(ctx, preview)) .getOrElse( ctx.logger.error(s"No attachment found with id: ${ctx.args.attachment}") ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala index be1f6e60..d8c08aeb 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala @@ -18,9 +18,6 @@ import docspell.extract.pdfbox.PdfboxExtract import docspell.joex.scheduler._ import docspell.store.records.RAttachment import docspell.store.records._ -import docspell.store.syntax.MimeTypes._ - -import bitpeace.{Mimetype, RangeDef} /** Goes through all attachments that must be already converted into a pdf. If it is a * pdf, the number of pages are retrieved and stored in the attachment metadata. @@ -100,13 +97,8 @@ object AttachmentPageCount { def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) - .getOrElse(Mimetype.applicationOctetStream) - .map(_.toLocal) + .getOrElse(MimeType.octetStream) def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.bitpeace - .get(ra.fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) - + ctx.store.fileStore.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala index ebac0086..60bb5da4 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala @@ -13,16 +13,12 @@ import cats.implicits._ import fs2.Stream import docspell.common._ -import docspell.convert._ import docspell.extract.pdfbox.PdfboxPreview import docspell.extract.pdfbox.PreviewConfig import docspell.joex.scheduler._ import docspell.store.queries.QAttachment import docspell.store.records.RAttachment import docspell.store.records._ -import docspell.store.syntax.MimeTypes._ - -import bitpeace.{Mimetype, MimetypeHint, RangeDef} /** Goes through all attachments that must be already converted into a pdf. If it is a * pdf, the first page is converted into a small preview png image and linked to the @@ -30,7 +26,7 @@ import bitpeace.{Mimetype, MimetypeHint, RangeDef} */ object AttachmentPreview { - def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig)( + def apply[F[_]: Sync](pcfg: PreviewConfig)( item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => @@ -40,7 +36,7 @@ object AttachmentPreview { ) preview <- PdfboxPreview(pcfg) _ <- item.attachments - .traverse(createPreview(ctx, preview, cfg.chunkSize)) + .traverse(createPreview(ctx, preview)) .attempt .flatMap { case Right(_) => ().pure[F] @@ -54,8 +50,7 @@ object AttachmentPreview { def createPreview[F[_]: Sync]( ctx: Context[F, _], - preview: PdfboxPreview[F], - chunkSize: Int + preview: PdfboxPreview[F] )( ra: RAttachment ): F[Option[RAttachmentPreview]] = @@ -64,7 +59,7 @@ object AttachmentPreview { preview.previewPNG(loadFile(ctx)(ra)).flatMap { case Some(out) => ctx.logger.debug("Preview generated, saving to database…") *> - createRecord(ctx, out, ra, chunkSize).map(_.some) + createRecord(ctx, out, ra).map(_.some) case None => ctx.logger .info(s"Preview could not be generated. Maybe the pdf has no pages?") *> @@ -79,23 +74,20 @@ object AttachmentPreview { private def createRecord[F[_]: Sync]( ctx: Context[F, _], png: Stream[F, Byte], - ra: RAttachment, - chunkSize: Int + ra: RAttachment ): F[RAttachmentPreview] = { val name = ra.name .map(FileName.apply) .map(_.withPart("preview", '_').withExtension("png")) for { - fileMeta <- ctx.store.bitpeace - .saveNew( - png, - chunkSize, - MimetypeHint(name.map(_.fullName), Some("image/png")) + fileId <- png + .through( + ctx.store.fileStore.save(MimeTypeHint(name.map(_.fullName), Some("image/png"))) ) .compile .lastOrError now <- Timestamp.current[F] - rp = RAttachmentPreview(ra.id, Ident.unsafe(fileMeta.id), name.map(_.fullName), now) + rp = RAttachmentPreview(ra.id, fileId, name.map(_.fullName), now) _ <- QAttachment.deletePreview(ctx.store)(ra.id) _ <- ctx.store.transact(RAttachmentPreview.insert(rp)) } yield rp @@ -104,13 +96,8 @@ object AttachmentPreview { def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) - .getOrElse(Mimetype.applicationOctetStream) - .map(_.toLocal) + .getOrElse(MimeType.octetStream) def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.bitpeace - .get(ra.fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) - + ctx.store.fileStore.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 50a93150..35a14c7f 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -19,9 +19,6 @@ import docspell.convert._ import docspell.joex.extract.JsoupSanitizer import docspell.joex.scheduler._ import docspell.store.records._ -import docspell.store.syntax.MimeTypes._ - -import bitpeace.{Mimetype, MimetypeHint, RangeDef} /** Goes through all attachments and creates a PDF version of it where supported. * @@ -69,24 +66,21 @@ object ConvertPdf { ): F[Boolean] = ctx.store.transact(RAttachmentSource.isConverted(ra.id)) - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = + def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) - .getOrElse(Mimetype.applicationOctetStream) + .getOrElse(MimeType.octetStream) def convertSafe[F[_]: Async]( cfg: ConvertConfig, sanitizeHtml: SanitizeHtml, ctx: Context[F, ProcessItemArgs], item: ItemData - )(ra: RAttachment, mime: Mimetype): F[(RAttachment, Option[RAttachmentMeta])] = + )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv => - mime.toLocal match { + mime match { case mt => - val data = ctx.store.bitpeace - .get(ra.fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + val data = ctx.store.fileStore.getBytes(ra.fileId) val handler = conversionHandler[F](ctx, cfg, ra, item) ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( @@ -154,11 +148,11 @@ object ConvertPdf { .map(FileName.apply) .map(_.withExtension("pdf").withPart(cfg.convertedFilenamePart, '.')) .map(_.fullName) - ctx.store.bitpeace - .saveNew(pdf, cfg.chunkSize, MimetypeHint(hint.filename, hint.advertised)) + + pdf + .through(ctx.store.fileStore.save(MimeTypeHint(hint.filename, hint.advertised))) .compile .lastOrError - .map(fm => Ident.unsafe(fm.id)) .flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId)) .map(fmId => ra.copy(fileId = fmId, name = newName)) } @@ -184,10 +178,8 @@ object ConvertPdf { if (sameFile) ().pure[F] else ctx.logger.info("Deleting previous attachment file") *> - ctx.store.bitpeace - .delete(raPrev.fileId.id) - .compile - .drain + ctx.store.fileStore + .delete(raPrev.fileId) .attempt .flatMap { case Right(_) => ().pure[F] diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 2938e1c0..df18aef0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -15,9 +15,7 @@ import fs2.Stream import docspell.common._ import docspell.joex.scheduler.{Context, Task} import docspell.store.queries.QItem -import docspell.store.records.{RAttachment, RAttachmentSource, RItem} - -import bitpeace.FileMeta +import docspell.store.records._ /** Task that creates the item. */ @@ -31,12 +29,10 @@ object CreateItem { def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = Task { ctx => - def isValidFile(fm: FileMeta) = + def isValidFile(fm: RFileMeta) = ctx.args.meta.validFileTypes.isEmpty || - ctx.args.meta.validFileTypes - .map(_.asString) - .toSet - .contains(fm.mimetype.baseType) + ctx.args.meta.validFileTypes.toSet + .contains(fm.mimetype) def fileMetas(itemId: Ident, now: Timestamp) = Stream @@ -44,7 +40,9 @@ object CreateItem { .flatMap { offset => Stream .emits(ctx.args.files) - .flatMap(f => ctx.store.bitpeace.get(f.fileMetaId.id).map(fm => (f, fm))) + .evalMap(f => + ctx.store.fileStore.findMeta(f.fileMetaId).value.map(fm => (f, fm)) + ) .collect { case (f, Some(fm)) if isValidFile(fm) => f } .zipWithIndex .evalMap { case (f, index) => diff --git a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala index 48a30518..fd617c28 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala @@ -15,7 +15,6 @@ import docspell.store.queries.QItem import docspell.store.records.RFileMeta import docspell.store.records.RJob -import bitpeace.FileMeta import doobie._ object DuplicateCheck { @@ -40,7 +39,7 @@ object DuplicateCheck { _ <- fileMetas.traverse(deleteDuplicate(ctx)) ids = fileMetas.filter(_.exists).map(_.fm.id).toSet } yield ctx.args.copy(files = - ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id)) + ctx.args.files.filterNot(f => ids.contains(f.fileMetaId)) ) private def getRetryCount[F[_]: Sync](ctx: Context[F, Args]): F[Int] = @@ -49,13 +48,11 @@ object DuplicateCheck { private def deleteDuplicate[F[_]: Sync]( ctx: Context[F, Args] )(fd: FileMetaDupes): F[Unit] = { - val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name) + val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name) if (fd.exists) ctx.logger - .info(s"Deleting duplicate file $fname!") *> ctx.store.bitpeace + .info(s"Deleting duplicate file $fname!") *> ctx.store.fileStore .delete(fd.fm.id) - .compile - .drain else ().pure[F] } @@ -69,12 +66,12 @@ object DuplicateCheck { private def checkDuplicate[F[_]]( ctx: Context[F, Args] - )(fm: FileMeta): ConnectionIO[FileMetaDupes] = { + )(fm: RFileMeta): ConnectionIO[FileMetaDupes] = { val excludes = ctx.args.files.map(_.fileMetaId).toSet QItem - .findByChecksum(fm.checksum, ctx.args.meta.collective, excludes) + .findByChecksum(fm.checksum.toHex, ctx.args.meta.collective, excludes) .map(v => FileMetaDupes(fm, v.nonEmpty)) } - case class FileMetaDupes(fm: FileMeta, exists: Boolean) + case class FileMetaDupes(fm: RFileMeta, exists: Boolean) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index f8f31a69..5844a3fa 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -20,9 +20,7 @@ import docspell.files.Zip import docspell.joex.mail._ import docspell.joex.scheduler._ import docspell.store.records._ -import docspell.store.syntax.MimeTypes._ -import bitpeace.{Mimetype, MimetypeHint, RangeDef} import emil.Mail /** Goes through all attachments and extracts archive files, like zip files. The process @@ -84,16 +82,16 @@ object ExtractArchive { if (extract.archives.isEmpty) extract else extract.updatePositions - def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = + def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) - .getOrElse(Mimetype.applicationOctetStream) + .getOrElse(MimeType.octetStream) def extractSafe[F[_]: Async]( ctx: Context[F, ProcessItemArgs], archive: Option[RAttachmentArchive] - )(ra: RAttachment, pos: Int, mime: Mimetype): F[Extracted] = - mime.toLocal match { + )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] = + mime match { case MimeType.ZipMatch(_) if ra.name.exists(_.endsWith(".zip")) => ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("")}.") *> extractZip(ctx, archive)(ra, pos) @@ -122,7 +120,7 @@ object ExtractArchive { ) _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) _ <- ctx.store.transact(RAttachment.delete(ra.id)) - _ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain + _ <- ctx.store.fileStore.delete(ra.fileId) } yield extracted case None => for { @@ -137,11 +135,8 @@ object ExtractArchive { ctx: Context[F, ProcessItemArgs], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val zipData = ctx.store.bitpeace - .get(ra.fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) - val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) + val zipData = ctx.store.fileStore.getBytes(ra.fileId) + val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> zipData .through(Zip.unzipP[F](8192, glob)) @@ -156,10 +151,7 @@ object ExtractArchive { ctx: Context[F, ProcessItemArgs], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val email: Stream[F, Byte] = ctx.store.bitpeace - .get(ra.fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + val email: Stream[F, Byte] = ctx.store.fileStore.getBytes(ra.fileId) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) @@ -200,15 +192,16 @@ object ExtractArchive { tentry: (Binary[F], Long) ): Stream[F, Extracted] = { val (entry, subPos) = tentry - val mimeHint = MimetypeHint.filename(entry.name).withAdvertised(entry.mime.asString) - val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint) + val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString) + val fileId = entry.data.through(ctx.store.fileStore.save(mimeHint)) + Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >> - fileMeta.evalMap { fm => + fileId.evalMap { fid => Ident.randomId.map { id => val nra = RAttachment( id, ra.itemId, - Ident.unsafe(fm.id), + fid, pos, ra.created, Option(entry.name).map(_.trim).filter(_.nonEmpty) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index ab32b627..58a696f9 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -132,8 +132,8 @@ object ItemHandler { Task(ctx => ctx.logger.info("Deleting input files …") *> Stream - .emits(ctx.args.files.map(_.fileMetaId.id)) - .flatMap(id => ctx.store.bitpeace.delete(id).attempt.drain) + .emits(ctx.args.files.map(_.fileMetaId)) + .evalMap(id => ctx.store.fileStore.delete(id).attempt) .compile .drain ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index 55ba812e..18cb7a31 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -62,7 +62,7 @@ object ProcessItem { ConvertPdf(cfg.convert, item) .flatMap(Task.setProgress(progress._1)) .flatMap(TextExtraction(cfg.extraction, fts)) - .flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview)) + .flatMap(AttachmentPreview(cfg.extraction.preview)) .flatMap(AttachmentPageCount()) .flatMap(Task.setProgress(progress._2)) .flatMap(analysisOnly[F](cfg, analyser, regexNer)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index e0e9fd67..217b2f08 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -15,9 +15,6 @@ import docspell.extract.{ExtractConfig, ExtractResult, Extraction} import docspell.ftsclient.{FtsClient, TextData} import docspell.joex.scheduler.{Context, Task} import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta} -import docspell.store.syntax.MimeTypes._ - -import bitpeace.{Mimetype, RangeDef} object TextExtraction { @@ -130,18 +127,15 @@ object TextExtraction { extr: Extraction[F], lang: Language )(fileId: Ident): F[ExtractResult] = { - val data = ctx.store.bitpeace - .get(fileId.id) - .unNoneTerminate - .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + val data = ctx.store.fileStore.getBytes(fileId) - def findMime: F[Mimetype] = + def findMime: F[MimeType] = OptionT(ctx.store.transact(RFileMeta.findById(fileId))) .map(_.mimetype) - .getOrElse(Mimetype.applicationOctetStream) + .getOrElse(MimeType.octetStream) findMime - .flatMap(mt => extr.extractText(data, DataType(mt.toLocal), lang)) + .flatMap(mt => extr.extractText(data, DataType(mt), lang)) } private def extractTextFallback[F[_]: Async]( diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index 8ad6424d..9d6231c1 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -26,7 +26,6 @@ import docspell.store.queries.{AttachmentLight => QAttachmentLight} import docspell.store.records._ import docspell.store.{AddResult, UpdateResult} -import bitpeace.FileMeta import org.http4s.headers.`Content-Type` import org.http4s.multipart.Multipart import org.log4s.Logger @@ -140,17 +139,23 @@ trait Conversions { def mkAttachment( item: OItemSearch.ItemData - )(ra: RAttachment, m: FileMeta): Attachment = { + )(ra: RAttachment, m: RFileMeta): Attachment = { val converted = item.sources.find(_._1.id == ra.id).exists(_._2.checksum != m.checksum) - Attachment(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString), converted) + Attachment( + ra.id, + ra.name, + m.length.bytes, + MimeType.unsafe(m.mimetype.asString), + converted + ) } - def mkAttachmentSource(ra: RAttachmentSource, m: FileMeta): AttachmentSource = - AttachmentSource(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString)) + def mkAttachmentSource(ra: RAttachmentSource, m: RFileMeta): AttachmentSource = + AttachmentSource(ra.id, ra.name, m.length.bytes, MimeType.unsafe(m.mimetype.asString)) - def mkAttachmentArchive(ra: RAttachmentArchive, m: FileMeta): AttachmentSource = - AttachmentSource(ra.id, ra.name, m.length, MimeType.unsafe(m.mimetype.asString)) + def mkAttachmentArchive(ra: RAttachmentArchive, m: RFileMeta): AttachmentSource = + AttachmentSource(ra.id, ra.name, m.length.bytes, MimeType.unsafe(m.mimetype.asString)) // item list diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala index c9d652e5..156d57f3 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala @@ -12,8 +12,8 @@ import cats.effect._ import cats.implicits._ import docspell.backend.ops._ +import docspell.store.records.RFileMeta -import bitpeace.FileMeta import org.http4s._ import org.http4s.circe.CirceEntityEncoder._ import org.http4s.dsl.Http4sDsl @@ -30,8 +30,8 @@ object BinaryUtil { val mt = MediaType.unsafeParse(data.meta.mimetype.asString) val ctype = `Content-Type`(mt) - val cntLen = `Content-Length`.unsafeFromLong(data.meta.length) - val eTag = ETag(data.meta.checksum) + val cntLen = `Content-Length`.unsafeFromLong(data.meta.length.bytes) + val eTag = ETag(data.meta.checksum.toHex) val disp = `Content-Disposition`( "inline", @@ -48,16 +48,16 @@ object BinaryUtil { dsl: Http4sDsl[F] )(data: OItemSearch.BinaryData[F]): F[Response[F]] = { import dsl._ - withResponseHeaders(dsl, Ok(data.data.take(data.meta.length)))(data) + withResponseHeaders(dsl, Ok(data.data.take(data.meta.length.bytes)))(data) } def matchETag[F[_]]( - fileData: Option[FileMeta], + fileData: Option[RFileMeta], noneMatch: Option[NonEmptyList[EntityTag]] ): Boolean = (fileData, noneMatch) match { case (Some(meta), Some(nm)) => - meta.checksum == nm.head.tag + meta.checksum.toHex == nm.head.tag case _ => false } diff --git a/modules/store/src/main/resources/db/migration/h2/V1.26.3__filestore.sql b/modules/store/src/main/resources/db/migration/h2/V1.26.3__filestore.sql new file mode 100644 index 00000000..08567f67 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/h2/V1.26.3__filestore.sql @@ -0,0 +1,29 @@ +ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunksize"; +ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunks"; + +ALTER TABLE "filemeta" +RENAME COLUMN "id" TO "file_id"; + +ALTER TABLE "filechunk" +RENAME COLUMN "fileid" TO "file_id"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunknr" TO "chunk_nr"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunklength" TO "chunk_len"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunkdata" TO "chunk_data"; + +-- change timestamp format, bitpeace used a string +ALTER TABLE "filemeta" +ADD COLUMN "created" timestamp; + +UPDATE "filemeta" SET "created" = TO_TIMESTAMP("timestamp", 'YYYY-MM-DD"T"HH24:MI:SS.MS'); + +ALTER TABLE "filemeta" +ALTER COLUMN "created" SET NOT NULL; + +ALTER TABLE "filemeta" +DROP COLUMN "timestamp"; diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.26.3__filestore.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.26.3__filestore.sql new file mode 100644 index 00000000..8fca54c6 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.26.3__filestore.sql @@ -0,0 +1,29 @@ +ALTER TABLE `filemeta` DROP COLUMN IF EXISTS `chunksize`; +ALTER TABLE `filemeta` DROP COLUMN IF EXISTS `chunks`; + +ALTER TABLE `filemeta` +RENAME COLUMN `id` TO `file_id`; + +ALTER TABLE `filechunk` +RENAME COLUMN `fileid` TO `file_id`; + +ALTER TABLE `filechunk` +RENAME COLUMN `chunknr` TO `chunk_nr`; + +ALTER TABLE `filechunk` +RENAME COLUMN `chunklength` TO `chunk_len`; + +ALTER TABLE `filechunk` +RENAME COLUMN `chunkdata` TO `chunk_data`; + +-- change timestamp format, bitpeace used a string +ALTER TABLE `filemeta` +ADD COLUMN `created` timestamp; + +UPDATE `filemeta` SET `created` = STR_TO_DATE(`timestamp`, '%Y-%m-%dT%H:%i:%s.%fZ'); + +ALTER TABLE `filemeta` +MODIFY `created` timestamp NOT NULL; + +ALTER TABLE `filemeta` +DROP COLUMN `timestamp`; diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.26.3__filestore.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.26.3__filestore.sql new file mode 100644 index 00000000..f3bd542d --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.26.3__filestore.sql @@ -0,0 +1,29 @@ +ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunksize"; +ALTER TABLE "filemeta" DROP COLUMN IF EXISTS "chunks"; + +ALTER TABLE "filemeta" +RENAME COLUMN "id" TO "file_id"; + +ALTER TABLE "filechunk" +RENAME COLUMN "fileid" TO "file_id"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunknr" TO "chunk_nr"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunklength" TO "chunk_len"; + +ALTER TABLE "filechunk" +RENAME COLUMN "chunkdata" TO "chunk_data"; + +-- change timestamp format, bitpeace used a string +ALTER TABLE "filemeta" +ADD COLUMN "created" timestamp; + +UPDATE "filemeta" SET "created" = CAST("timestamp" as timestamp); + +ALTER TABLE "filemeta" +ALTER COLUMN "created" SET NOT NULL; + +ALTER TABLE "filemeta" +DROP COLUMN "timestamp"; diff --git a/modules/store/src/main/scala/docspell/store/Store.scala b/modules/store/src/main/scala/docspell/store/Store.scala index 59a1404a..6c351b74 100644 --- a/modules/store/src/main/scala/docspell/store/Store.scala +++ b/modules/store/src/main/scala/docspell/store/Store.scala @@ -11,9 +11,10 @@ import scala.concurrent.ExecutionContext import cats.effect._ import fs2._ +import docspell.store.file.FileStore import docspell.store.impl.StoreImpl -import bitpeace.Bitpeace +import com.zaxxer.hikari.HikariDataSource import doobie._ import doobie.hikari.HikariTransactor @@ -23,7 +24,7 @@ trait Store[F[_]] { def transact[A](prg: Stream[ConnectionIO, A]): Stream[F, A] - def bitpeace: Bitpeace[F] + def fileStore: FileStore[F] def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] } @@ -32,20 +33,23 @@ object Store { def create[F[_]: Async]( jdbc: JdbcConfig, + chunkSize: Int, connectEC: ExecutionContext ): Resource[F, Store[F]] = { - - val hxa = HikariTransactor.newHikariTransactor[F]( - jdbc.driverClass, - jdbc.url.asString, - jdbc.user, - jdbc.password, - connectEC - ) + val acquire = Sync[F].delay(new HikariDataSource()) + val free: HikariDataSource => F[Unit] = ds => Sync[F].delay(ds.close()) for { - xa <- hxa - st = new StoreImpl[F](jdbc, xa) + ds <- Resource.make(acquire)(free) + _ = Resource.pure { + ds.setJdbcUrl(jdbc.url.asString) + ds.setUsername(jdbc.user) + ds.setPassword(jdbc.password) + ds.setDriverClassName(jdbc.driverClass) + } + xa = HikariTransactor(ds, connectEC) + fs = FileStore[F](xa, ds, chunkSize) + st = new StoreImpl[F](fs, jdbc, xa) _ <- Resource.eval(st.migrate) } yield st } diff --git a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala new file mode 100644 index 00000000..89f5d9fa --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala @@ -0,0 +1,50 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ + +import docspell.common._ +import docspell.store.records.RFileMeta + +import binny._ +import doobie._ +import doobie.implicits._ + +final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F]) + extends BinaryAttributeStore[F] { + def saveAttr(id: BinaryId, attrs: F[BinaryAttributes]): F[Unit] = + for { + now <- Timestamp.current[F] + a <- attrs + fm = RFileMeta( + Ident.unsafe(id.id), + now, + MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream), + ByteSize(a.length), + a.sha256 + ) + _ <- RFileMeta.insert(fm).transact(xa) + } yield () + + def deleteAttr(id: BinaryId): F[Boolean] = + RFileMeta.delete(Ident.unsafe(id.id)).transact(xa).map(_ > 0) + + def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] = + findMeta(id).map(fm => + BinaryAttributes( + fm.checksum, + SimpleContentType(fm.mimetype.asString), + fm.length.bytes + ) + ) + + def findMeta(id: BinaryId): OptionT[F, RFileMeta] = + OptionT(RFileMeta.findById(Ident.unsafe(id.id)).transact(xa)) +} diff --git a/modules/store/src/main/scala/docspell/store/file/FileStore.scala b/modules/store/src/main/scala/docspell/store/file/FileStore.scala new file mode 100644 index 00000000..b95b8fcc --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/FileStore.scala @@ -0,0 +1,92 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import javax.sql.DataSource + +import cats.data.OptionT +import cats.effect._ +import fs2.{Pipe, Stream} + +import docspell.common._ +import docspell.store.records.RFileMeta + +import binny._ +import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} +import binny.tika.TikaContentTypeDetect +import doobie._ + +trait FileStore[F[_]] { + + def find(id: Ident): OptionT[F, Stream[F, Byte]] + + def getBytes(id: Ident): Stream[F, Byte] + + def findMeta(id: Ident): OptionT[F, RFileMeta] + + def delete(id: Ident): F[Unit] + + def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] +} + +object FileStore { + private[this] val logger = org.log4s.getLogger + + def apply[F[_]: Sync]( + xa: Transactor[F], + ds: DataSource, + chunkSize: Int + ): FileStore[F] = { + val attrStore = new AttributeStore[F](xa) + val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect.default) + val binStore = GenericJdbcStore[F](ds, Log4sLogger[F](logger), cfg, attrStore) + new Impl[F](binStore, attrStore) + } + + final private class Impl[F[_]](bs: BinaryStore[F], attrStore: AttributeStore[F]) + extends FileStore[F] { + def find(id: Ident): OptionT[F, Stream[F, Byte]] = + bs.findBinary(BinaryId(id.id), ByteRange.All) + + def getBytes(id: Ident): Stream[F, Byte] = + Stream.eval(find(id).value).unNoneTerminate.flatMap(identity) + + def findMeta(id: Ident): OptionT[F, RFileMeta] = + attrStore.findMeta(BinaryId(id.id)) + + def delete(id: Ident): F[Unit] = + bs.delete(BinaryId(id.id)) + + def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] = + bs.insert(Hint(hint.filename, hint.advertised)) + .andThen(_.map(bid => Ident.unsafe(bid.id))) + } + + private object Log4sLogger { + + def apply[F[_]: Sync](log: org.log4s.Logger): binny.util.Logger[F] = + new binny.util.Logger[F] { + override def trace(msg: => String): F[Unit] = + Sync[F].delay(log.trace(msg)) + + override def debug(msg: => String): F[Unit] = + Sync[F].delay(log.debug(msg)) + + override def info(msg: => String): F[Unit] = + Sync[F].delay(log.info(msg)) + + override def warn(msg: => String): F[Unit] = + Sync[F].delay(log.warn(msg)) + + override def error(msg: => String): F[Unit] = + Sync[F].delay(log.error(msg)) + + override def error(ex: Throwable)(msg: => String): F[Unit] = + Sync[F].delay(log.error(ex)(msg)) + } + } +} diff --git a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala index df12d190..1d69da70 100644 --- a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala +++ b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala @@ -20,6 +20,7 @@ import doobie.util.log.Success import emil.doobie.EmilDoobieMeta import io.circe.Json import io.circe.{Decoder, Encoder} +import scodec.bits.ByteVector trait DoobieMeta extends EmilDoobieMeta { @@ -132,6 +133,15 @@ trait DoobieMeta extends EmilDoobieMeta { implicit val metaKey: Meta[Key] = Meta[String].timap(Key.unsafeFromString)(_.asString) + + implicit val metaMimeType: Meta[MimeType] = + Meta[String].timap(MimeType.unsafe)(_.asString) + + implicit val metaByteVectorHex: Meta[ByteVector] = + Meta[String].timap(s => ByteVector.fromValidHex(s))(_.toHex) + + implicit val metaByteSize: Meta[ByteSize] = + Meta[Long].timap(ByteSize.apply)(_.bytes) } object DoobieMeta extends DoobieMeta { diff --git a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala index f9f81a91..1fc6ca87 100644 --- a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala +++ b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala @@ -9,22 +9,18 @@ package docspell.store.impl import cats.effect.Async import cats.implicits._ -import docspell.common.Ident +import docspell.store.file.FileStore import docspell.store.migrate.FlywayMigrate import docspell.store.{AddResult, JdbcConfig, Store} -import bitpeace.{Bitpeace, BitpeaceConfig, TikaMimetypeDetect} import doobie._ import doobie.implicits._ -final class StoreImpl[F[_]: Async](jdbc: JdbcConfig, xa: Transactor[F]) extends Store[F] { - val bitpeaceCfg = - BitpeaceConfig( - "filemeta", - "filechunk", - TikaMimetypeDetect, - Ident.randomId[F].map(_.id) - ) +final class StoreImpl[F[_]: Async]( + val fileStore: FileStore[F], + jdbc: JdbcConfig, + xa: Transactor[F] +) extends Store[F] { def migrate: F[Int] = FlywayMigrate.run[F](jdbc).map(_.migrationsExecuted) @@ -35,9 +31,6 @@ final class StoreImpl[F[_]: Async](jdbc: JdbcConfig, xa: Transactor[F]) extends def transact[A](prg: fs2.Stream[doobie.ConnectionIO, A]): fs2.Stream[F, A] = prg.transact(xa) - def bitpeace: Bitpeace[F] = - Bitpeace(bitpeaceCfg, xa) - def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] = for { save <- transact(insert).attempt diff --git a/modules/store/src/main/scala/docspell/store/queries/ItemData.scala b/modules/store/src/main/scala/docspell/store/queries/ItemData.scala index e1839ff5..8ac23b03 100644 --- a/modules/store/src/main/scala/docspell/store/queries/ItemData.scala +++ b/modules/store/src/main/scala/docspell/store/queries/ItemData.scala @@ -9,8 +9,6 @@ package docspell.store.queries import docspell.common._ import docspell.store.records._ -import bitpeace.FileMeta - case class ItemData( item: RItem, corrOrg: Option[ROrganization], @@ -20,9 +18,9 @@ case class ItemData( inReplyTo: Option[IdRef], folder: Option[IdRef], tags: Vector[RTag], - attachments: Vector[(RAttachment, FileMeta)], - sources: Vector[(RAttachmentSource, FileMeta)], - archives: Vector[(RAttachmentArchive, FileMeta)], + attachments: Vector[(RAttachment, RFileMeta)], + sources: Vector[(RAttachmentSource, RFileMeta)], + archives: Vector[(RAttachmentArchive, RFileMeta)], customFields: Vector[ItemFieldValue] ) { diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index f99c88f3..008da1bb 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -38,10 +38,10 @@ object QAttachment { Stream .evalSeq(store.transact(findPreview)) - .map(_.fileId.id) + .map(_.fileId) .evalTap(_ => store.transact(RAttachmentPreview.delete(attachId))) - .flatMap(store.bitpeace.delete) - .map(flag => if (flag) 1 else 0) + .evalMap(store.fileStore.delete) + .map(_ => 1) .compile .foldMonoid } @@ -68,9 +68,8 @@ object QAttachment { f <- Stream .emits(files._1) - .map(_.id) - .flatMap(store.bitpeace.delete) - .map(flag => if (flag) 1 else 0) + .evalMap(store.fileStore.delete) + .map(_ => 1) .compile .foldMonoid } yield n + k + f @@ -91,9 +90,9 @@ object QAttachment { ) f <- Stream - .emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq ++ p.map(_.fileId.id).toSeq)) - .flatMap(store.bitpeace.delete) - .map(flag => if (flag) 1 else 0) + .emits(ra.fileId +: (s.map(_.fileId).toSeq ++ p.map(_.fileId).toSeq)) + .evalMap(store.fileStore.delete) + .map(_ => 1) .compile .foldMonoid } yield n + f @@ -104,8 +103,8 @@ object QAttachment { n <- OptionT.liftF(store.transact(RAttachmentArchive.deleteAll(aa.fileId))) _ <- OptionT.liftF( Stream - .emit(aa.fileId.id) - .flatMap(store.bitpeace.delete) + .emit(aa.fileId) + .evalMap(store.fileStore.delete) .compile .drain ) diff --git a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala index e572050b..91ed43d6 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala @@ -99,16 +99,16 @@ object QCollective { inner join item i on a.itemid = i.itemid where i.cid = $coll) select a.fid,m.length from attachs a - inner join filemeta m on m.id = a.fid + inner join filemeta m on m.file_id = a.fid union distinct select a.file_id,m.length from attachment_source a - inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) + inner join filemeta m on m.file_id = a.file_id where a.id in (select aid from attachs) union distinct select p.file_id,m.length from attachment_preview p - inner join filemeta m on m.id = p.file_id where p.id in (select aid from attachs) + inner join filemeta m on m.file_id = p.file_id where p.id in (select aid from attachs) union distinct select a.file_id,m.length from attachment_archive a - inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) + inner join filemeta m on m.file_id = a.file_id where a.id in (select aid from attachs) ) as t""".query[Option[Long]].unique for { diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 7dd54cce..7c9bb408 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -496,7 +496,7 @@ object QItem { where( i.cid === collective && i.state.in(ItemState.validStates) && - Condition.Or(fms.map(m => m.checksum === checksum)) &&? + Condition.Or(fms.map(m => m.checksum ==== checksum)) &&? Nel .fromList(excludeFileMeta.toList) .map(excl => Condition.And(fms.map(m => m.id.isNull || m.id.notIn(excl)))) diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala index 9c4abbb7..b864a537 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala @@ -14,7 +14,6 @@ import docspell.common._ import docspell.store.qb.DSL._ import docspell.store.qb._ -import bitpeace.FileMeta import doobie._ import doobie.implicits._ @@ -113,9 +112,7 @@ object RAttachment { def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] = run(select(T.all), from(T), T.id === attachId).query[RAttachment].option - def findMeta(attachId: Ident): ConnectionIO[Option[FileMeta]] = { - import bitpeace.sql._ - + def findMeta(attachId: Ident): ConnectionIO[Option[RFileMeta]] = { val m = RFileMeta.as("m") val a = RAttachment.as("a") Select( @@ -123,7 +120,7 @@ object RAttachment { from(a) .innerJoin(m, a.fileId === m.id), a.id === attachId - ).build.query[FileMeta].option + ).build.query[RFileMeta].option } def updateName( @@ -206,9 +203,7 @@ object RAttachment { def findByItemAndCollectiveWithMeta( id: Ident, coll: Ident - ): ConnectionIO[Vector[(RAttachment, FileMeta)]] = { - import bitpeace.sql._ - + ): ConnectionIO[Vector[(RAttachment, RFileMeta)]] = { val a = RAttachment.as("a") val m = RFileMeta.as("m") val i = RItem.as("i") @@ -218,12 +213,10 @@ object RAttachment { .innerJoin(m, a.fileId === m.id) .innerJoin(i, a.itemId === i.id), a.itemId === id && i.cid === coll - ).build.query[(RAttachment, FileMeta)].to[Vector] + ).build.query[(RAttachment, RFileMeta)].to[Vector] } - def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachment, FileMeta)]] = { - import bitpeace.sql._ - + def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachment, RFileMeta)]] = { val a = RAttachment.as("a") val m = RFileMeta.as("m") Select( @@ -231,7 +224,7 @@ object RAttachment { from(a) .innerJoin(m, a.fileId === m.id), a.itemId === id - ).orderBy(a.position.asc).build.query[(RAttachment, FileMeta)].to[Vector] + ).orderBy(a.position.asc).build.query[(RAttachment, RFileMeta)].to[Vector] } /** Deletes the attachment and its related source and meta records. diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala index f8f1dfb8..e8f71479 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala @@ -13,7 +13,6 @@ import docspell.store.qb.DSL._ import docspell.store.qb.TableDef import docspell.store.qb._ -import bitpeace.FileMeta import doobie._ import doobie.implicits._ @@ -98,9 +97,7 @@ object RAttachmentArchive { def findByItemWithMeta( id: Ident - ): ConnectionIO[Vector[(RAttachmentArchive, FileMeta)]] = { - import bitpeace.sql._ - + ): ConnectionIO[Vector[(RAttachmentArchive, RFileMeta)]] = { val a = RAttachmentArchive.as("a") val b = RAttachment.as("b") val m = RFileMeta.as("m") @@ -110,7 +107,7 @@ object RAttachmentArchive { .innerJoin(m, a.fileId === m.id) .innerJoin(b, a.id === b.id), b.itemId === id - ).orderBy(b.position.asc).build.query[(RAttachmentArchive, FileMeta)].to[Vector] + ).orderBy(b.position.asc).build.query[(RAttachmentArchive, RFileMeta)].to[Vector] } /** If the given attachment id has an associated archive, this returns the number of all diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala index a4b8015c..e850014b 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala @@ -12,7 +12,6 @@ import docspell.common._ import docspell.store.qb.DSL._ import docspell.store.qb._ -import bitpeace.FileMeta import doobie._ import doobie.implicits._ @@ -101,9 +100,7 @@ object RAttachmentPreview { def findByItemWithMeta( id: Ident - ): ConnectionIO[Vector[(RAttachmentPreview, FileMeta)]] = { - import bitpeace.sql._ - + ): ConnectionIO[Vector[(RAttachmentPreview, RFileMeta)]] = { val a = RAttachmentPreview.as("a") val b = RAttachment.as("b") val m = RFileMeta.as("m") @@ -114,6 +111,6 @@ object RAttachmentPreview { .innerJoin(m, a.fileId === m.id) .innerJoin(b, b.id === a.id), b.itemId === id - ).orderBy(b.position.asc).build.query[(RAttachmentPreview, FileMeta)].to[Vector] + ).orderBy(b.position.asc).build.query[(RAttachmentPreview, RFileMeta)].to[Vector] } } diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala index 56ee733f..aa76b61e 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala @@ -12,7 +12,6 @@ import docspell.common._ import docspell.store.qb.DSL._ import docspell.store.qb._ -import bitpeace.FileMeta import doobie._ import doobie.implicits._ @@ -97,9 +96,7 @@ object RAttachmentSource { def findByItemWithMeta( id: Ident - ): ConnectionIO[Vector[(RAttachmentSource, FileMeta)]] = { - import bitpeace.sql._ - + ): ConnectionIO[Vector[(RAttachmentSource, RFileMeta)]] = { val a = RAttachmentSource.as("a") val b = RAttachment.as("b") val m = RFileMeta.as("m") @@ -110,7 +107,7 @@ object RAttachmentSource { .innerJoin(m, a.fileId === m.id) .innerJoin(b, b.id === a.id), b.itemId === id - ).orderBy(b.position.asc).build.query[(RAttachmentSource, FileMeta)].to[Vector] + ).orderBy(b.position.asc).build.query[(RAttachmentSource, RFileMeta)].to[Vector] } } diff --git a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala index 052e29a1..6b142510 100644 --- a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala +++ b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala @@ -6,35 +6,37 @@ package docspell.store.records -import java.time.Instant - import cats.data.NonEmptyList import cats.implicits._ import docspell.common._ import docspell.store.qb.DSL._ import docspell.store.qb._ -import docspell.store.syntax.MimeTypes._ -import bitpeace.FileMeta -import bitpeace.Mimetype import doobie._ import doobie.implicits._ +import scodec.bits.ByteVector + +final case class RFileMeta( + id: Ident, + created: Timestamp, + mimetype: MimeType, + length: ByteSize, + checksum: ByteVector +) object RFileMeta { final case class Table(alias: Option[String]) extends TableDef { val tableName = "filemeta" - val id = Column[Ident]("id", this) - val timestamp = Column[Instant]("timestamp", this) - val mimetype = Column[Mimetype]("mimetype", this) - val length = Column[Long]("length", this) - val checksum = Column[String]("checksum", this) - val chunks = Column[Int]("chunks", this) - val chunksize = Column[Int]("chunksize", this) + val id = Column[Ident]("file_id", this) + val timestamp = Column[Timestamp]("created", this) + val mimetype = Column[MimeType]("mimetype", this) + val length = Column[ByteSize]("length", this) + val checksum = Column[ByteVector]("checksum", this) val all = NonEmptyList - .of[Column[_]](id, timestamp, mimetype, length, checksum, chunks, chunksize) + .of[Column[_]](id, timestamp, mimetype, length, checksum) } @@ -42,29 +44,25 @@ object RFileMeta { def as(alias: String): Table = Table(Some(alias)) - def findById(fid: Ident): ConnectionIO[Option[FileMeta]] = { - import bitpeace.sql._ + def insert(r: RFileMeta): ConnectionIO[Int] = + DML.insert(T, T.all, fr"${r.id},${r.created},${r.mimetype},${r.length},${r.checksum}") - run(select(T.all), from(T), T.id === fid).query[FileMeta].option - } - - def findByIds(ids: List[Ident]): ConnectionIO[Vector[FileMeta]] = { - import bitpeace.sql._ + def findById(fid: Ident): ConnectionIO[Option[RFileMeta]] = + run(select(T.all), from(T), T.id === fid).query[RFileMeta].option + def findByIds(ids: List[Ident]): ConnectionIO[Vector[RFileMeta]] = NonEmptyList.fromList(ids) match { case Some(nel) => - run(select(T.all), from(T), T.id.in(nel)).query[FileMeta].to[Vector] + run(select(T.all), from(T), T.id.in(nel)).query[RFileMeta].to[Vector] case None => - Vector.empty[FileMeta].pure[ConnectionIO] + Vector.empty[RFileMeta].pure[ConnectionIO] } - } - - def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = { - import bitpeace.sql._ + def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = run(select(T.mimetype), from(T), T.id === fid) - .query[Mimetype] + .query[MimeType] .option - .map(_.map(_.toLocal)) - } + + def delete(id: Ident): ConnectionIO[Int] = + DML.delete(T, T.id === id) } diff --git a/modules/store/src/main/scala/docspell/store/syntax/MimeTypes.scala b/modules/store/src/main/scala/docspell/store/syntax/MimeTypes.scala index 8b5440bb..40efd836 100644 --- a/modules/store/src/main/scala/docspell/store/syntax/MimeTypes.scala +++ b/modules/store/src/main/scala/docspell/store/syntax/MimeTypes.scala @@ -8,16 +8,8 @@ package docspell.store.syntax import docspell.common._ -import bitpeace.Mimetype - object MimeTypes { - implicit final class BitpeaceMimeTypeOps(bmt: Mimetype) { - - def toLocal: MimeType = - MimeType(bmt.primary, bmt.sub, bmt.params) - } - implicit final class EmilMimeTypeOps(emt: emil.MimeType) { def toLocal: MimeType = MimeType(emt.primary, emt.sub, emt.params) @@ -26,8 +18,5 @@ object MimeTypes { implicit final class DocspellMimeTypeOps(mt: MimeType) { def toEmil: emil.MimeType = emil.MimeType(mt.primary, mt.sub, mt.params) - - def toBitpeace: Mimetype = - Mimetype(mt.primary, mt.sub, mt.params) } } diff --git a/modules/store/src/test/scala/docspell/store/StoreFixture.scala b/modules/store/src/test/scala/docspell/store/StoreFixture.scala index 3388dcc1..37ed4f0d 100644 --- a/modules/store/src/test/scala/docspell/store/StoreFixture.scala +++ b/modules/store/src/test/scala/docspell/store/StoreFixture.scala @@ -6,10 +6,14 @@ package docspell.store +import javax.sql.DataSource + import cats.effect._ import docspell.common.LenientUri +import docspell.store.file.FileStore import docspell.store.impl.StoreImpl +import docspell.store.migrate.FlywayMigrate import doobie._ import munit._ @@ -20,18 +24,17 @@ trait StoreFixture extends CatsEffectFunFixtures { self: CatsEffectSuite => val xa = ResourceFixture { val cfg = StoreFixture.memoryDB("test") for { - xa <- StoreFixture.makeXA(cfg) - store = new StoreImpl[IO](cfg, xa) - _ <- Resource.eval(store.migrate) + ds <- StoreFixture.dataSource(cfg) + xa <- StoreFixture.makeXA(ds) + _ <- Resource.eval(FlywayMigrate.run[IO](cfg)) } yield xa } val store = ResourceFixture { val cfg = StoreFixture.memoryDB("test") for { - xa <- StoreFixture.makeXA(cfg) - store = new StoreImpl[IO](cfg, xa) - _ <- Resource.eval(store.migrate) + store <- StoreFixture.store(cfg) + _ <- Resource.eval(store.migrate) } yield store } } @@ -47,31 +50,24 @@ object StoreFixture { "" ) - def globalXA(jdbc: JdbcConfig): Transactor[IO] = - Transactor.fromDriverManager( - "org.h2.Driver", - jdbc.url.asString, - jdbc.user, - jdbc.password - ) - - def makeXA(jdbc: JdbcConfig): Resource[IO, Transactor[IO]] = { + def dataSource(jdbc: JdbcConfig): Resource[IO, JdbcConnectionPool] = { def jdbcConnPool = JdbcConnectionPool.create(jdbc.url.asString, jdbc.user, jdbc.password) - val makePool = Resource.make(IO(jdbcConnPool))(cp => IO(cp.dispose())) - - for { - ec <- ExecutionContexts.cachedThreadPool[IO] - pool <- makePool - xa = Transactor.fromDataSource[IO].apply(pool, ec) - } yield xa + Resource.make(IO(jdbcConnPool))(cp => IO(cp.dispose())) } - def store(jdbc: JdbcConfig): Resource[IO, Store[IO]] = + def makeXA(ds: DataSource): Resource[IO, Transactor[IO]] = for { - xa <- makeXA(jdbc) - store = new StoreImpl[IO](jdbc, xa) + ec <- ExecutionContexts.cachedThreadPool[IO] + xa = Transactor.fromDataSource[IO](ds, ec) + } yield xa + + def store(jdbc: JdbcConfig): Resource[IO, StoreImpl[IO]] = + for { + ds <- dataSource(jdbc) + xa <- makeXA(ds) + store = new StoreImpl[IO](FileStore[IO](xa, ds, 64 * 1024), jdbc, xa) _ <- Resource.eval(store.migrate) } yield store } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 5aca0ce8..1c3598ec 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -7,7 +7,7 @@ object Dependencies { val BcryptVersion = "0.4" val BetterMonadicForVersion = "0.3.1" - val BitpeaceVersion = "0.9.0-M3" + val BinnyVersion = "0.1.0" val CalevVersion = "0.6.0" val CatsParseVersion = "0.3.4" val CirceVersion = "0.14.1" @@ -273,8 +273,10 @@ object Dependencies { "org.tpolecat" %% "doobie-hikari" % DoobieVersion ) - val bitpeace = Seq( - "com.github.eikek" %% "bitpeace-core" % BitpeaceVersion + val binny = Seq( + "com.github.eikek" %% "binny-core" % BinnyVersion, + "com.github.eikek" %% "binny-jdbc" % BinnyVersion, + "com.github.eikek" %% "binny-tika-detect" % BinnyVersion ) // https://github.com/flyway/flyway