From 553b1fa249eef304dd7d96b2fd303d67fb43d76f Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 13 Feb 2022 12:08:01 +0100 Subject: [PATCH 1/2] Add a file-repository for better organizing files Docspell now must use a new api for accessing files. Issue: #1379 --- .../docspell/backend/ops/OItemSearch.scala | 36 +++++--- .../scala/docspell/backend/ops/OMail.scala | 2 +- .../scala/docspell/backend/ops/OUpload.scala | 14 ++- .../scala/docspell/common/FileCategory.scala | 46 ++++++++++ .../main/scala/docspell/common/FileKey.scala | 21 +++++ .../main/scala/docspell/common/Ident.scala | 5 +- .../docspell/common/ProcessItemArgs.scala | 2 +- .../scala/docspell/joex/learn/Classify.scala | 2 +- .../joex/learn/LearnClassifierTask.scala | 2 +- .../joex/learn/StoreClassifierModel.scala | 9 +- .../docspell/joex/pdfconv/PdfConvTask.scala | 10 +- .../joex/process/AttachmentPageCount.scala | 2 +- .../joex/process/AttachmentPreview.scala | 11 ++- .../docspell/joex/process/ConvertPdf.scala | 27 ++++-- .../docspell/joex/process/CreateItem.scala | 9 +- .../joex/process/DuplicateCheck.scala | 2 +- .../joex/process/ExtractArchive.scala | 26 +++--- .../docspell/joex/process/ItemData.scala | 2 +- .../docspell/joex/process/ItemHandler.scala | 2 +- .../joex/process/TextExtraction.scala | 10 +- .../restserver/http4s/BinaryUtil.scala | 4 +- .../src/main/scala/docspell/store/Store.scala | 8 +- .../docspell/store/file/AttributeStore.scala | 12 ++- .../docspell/store/file/BinnyUtils.scala | 59 ++++++++++++ .../docspell/store/file/FileMetadata.scala | 19 ++++ .../docspell/store/file/FileRepository.scala | 50 ++++++++++ .../store/file/FileRepositoryImpl.scala | 60 ++++++++++++ .../scala/docspell/store/file/FileStore.scala | 91 ------------------- .../docspell/store/impl/DoobieMeta.scala | 46 ++++++---- .../scala/docspell/store/impl/StoreImpl.scala | 8 +- .../docspell/store/queries/QAttachment.scala | 8 +- .../scala/docspell/store/queries/QItem.scala | 12 +-- .../docspell/store/records/RAttachment.scala | 14 +-- .../store/records/RAttachmentArchive.scala | 8 +- .../store/records/RAttachmentPreview.scala | 6 +- .../store/records/RAttachmentSource.scala | 8 +- .../store/records/RClassifierModel.scala | 8 +- .../docspell/store/records/RFileMeta.scala | 14 +-- .../scala/docspell/store/StoreFixture.scala | 5 +- project/Dependencies.scala | 3 +- 40 files changed, 451 insertions(+), 232 deletions(-) create mode 100644 modules/common/src/main/scala/docspell/common/FileCategory.scala create mode 100644 modules/common/src/main/scala/docspell/common/FileKey.scala create mode 100644 modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala create mode 100644 modules/store/src/main/scala/docspell/store/file/FileMetadata.scala create mode 100644 modules/store/src/main/scala/docspell/store/file/FileRepository.scala create mode 100644 modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala delete mode 100644 modules/store/src/main/scala/docspell/store/file/FileStore.scala diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala index 56e417e5..6bf03816 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala @@ -14,6 +14,7 @@ import fs2.Stream import docspell.backend.ops.OItemSearch._ import docspell.common._ import docspell.store._ +import docspell.store.file.FileMetadata import docspell.store.queries.{QAttachment, QItem} import docspell.store.records._ @@ -89,18 +90,21 @@ object OItemSearch { trait BinaryData[F[_]] { def data: Stream[F, Byte] def name: Option[String] - def meta: RFileMeta - def fileId: Ident + def meta: FileMetadata + def fileId: FileKey } - case class AttachmentData[F[_]](ra: RAttachment, meta: RFileMeta, data: Stream[F, Byte]) - extends BinaryData[F] { + case class AttachmentData[F[_]]( + ra: RAttachment, + meta: FileMetadata, + data: Stream[F, Byte] + ) extends BinaryData[F] { val name = ra.name val fileId = ra.fileId } case class AttachmentSourceData[F[_]]( rs: RAttachmentSource, - meta: RFileMeta, + meta: FileMetadata, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -109,7 +113,7 @@ object OItemSearch { case class AttachmentPreviewData[F[_]]( rs: RAttachmentPreview, - meta: RFileMeta, + meta: FileMetadata, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -118,7 +122,7 @@ object OItemSearch { case class AttachmentArchiveData[F[_]]( rs: RAttachmentArchive, - meta: RFileMeta, + meta: FileMetadata, data: Stream[F, Byte] ) extends BinaryData[F] { val name = rs.name @@ -188,7 +192,7 @@ object OItemSearch { AttachmentData[F]( ra, m, - store.fileStore.getBytes(m.id) + store.fileRepo.getBytes(m.id) ) } @@ -208,7 +212,7 @@ object OItemSearch { AttachmentSourceData[F]( ra, m, - store.fileStore.getBytes(m.id) + store.fileRepo.getBytes(m.id) ) } @@ -228,7 +232,7 @@ object OItemSearch { AttachmentPreviewData[F]( ra, m, - store.fileStore.getBytes(m.id) + store.fileRepo.getBytes(m.id) ) } @@ -248,7 +252,7 @@ object OItemSearch { AttachmentPreviewData[F]( ra, m, - store.fileStore.getBytes(m.id) + store.fileRepo.getBytes(m.id) ) } @@ -268,7 +272,7 @@ object OItemSearch { AttachmentArchiveData[F]( ra, m, - store.fileStore.getBytes(m.id) + store.fileRepo.getBytes(m.id) ) } @@ -276,9 +280,11 @@ object OItemSearch { (None: Option[AttachmentArchiveData[F]]).pure[F] } - private def makeBinaryData[A](fileId: Ident)(f: RFileMeta => A): F[Option[A]] = - store.fileStore - .findMeta(fileId) + private def makeBinaryData[A](fileId: FileKey)(f: FileMetadata => A): F[Option[A]] = + OptionT( + store.fileRepo + .findMeta(fileId) + ) .map(fm => f(fm)) .value diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala index 368477d0..ca3e0fc6 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala @@ -249,7 +249,7 @@ object OMail { } yield { val addAttach = m.attach.filter(ras).map { a => Attach[F]( - store.fileStore.getBytes(a._2.id) + store.fileRepo.getBytes(a._2.id) ).withFilename(a._1.name) .withLength(a._2.length.bytes) .withMimeType(a._2.mimetype.toEmil) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index ddc64bb9..ec959f6c 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -126,7 +126,7 @@ object OUpload { ): F[OUpload.UploadResult] = (for { _ <- checkExistingItem(itemId, account.collective) - files <- right(data.files.traverse(saveFile).map(_.flatten)) + files <- right(data.files.traverse(saveFile(account)).map(_.flatten)) _ <- checkFileList(files) lang <- data.meta.language match { case Some(lang) => right(lang.pure[F]) @@ -200,10 +200,18 @@ object OUpload { } yield UploadResult.Success /** Saves the file into the database. */ - private def saveFile(file: File[F]): F[Option[ProcessItemArgs.File]] = + private def saveFile( + accountId: AccountId + )(file: File[F]): F[Option[ProcessItemArgs.File]] = logger.finfo(s"Receiving file $file") *> file.data - .through(store.fileStore.save(MimeTypeHint(file.name, None))) + .through( + store.fileRepo.save( + accountId.collective, + FileCategory.AttachmentSource, + MimeTypeHint(file.name, None) + ) + ) .compile .lastOrError .attempt diff --git a/modules/common/src/main/scala/docspell/common/FileCategory.scala b/modules/common/src/main/scala/docspell/common/FileCategory.scala new file mode 100644 index 00000000..52c29b26 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileCategory.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import cats.data.NonEmptyList + +import docspell.common + +import io.circe.{Decoder, Encoder} + +/** This is used to have a rough idea for what a file is used in the system. It is part of + * the file-key to identify a file, backends could ignore it, since the file-id (the last + * part of the file-key) should be globally unique anyways. + */ +sealed trait FileCategory { self: Product => + final def id: Ident = + Ident.unsafe(self.productPrefix.toLowerCase) + + def toFileKey(collective: Ident, fileId: Ident): FileKey = + common.FileKey(collective, this, fileId) +} + +object FileCategory { + // Impl note: Changing constants here requires a database migration! + + case object AttachmentSource extends FileCategory + case object AttachmentConvert extends FileCategory + case object PreviewImage extends FileCategory + case object Classifier extends FileCategory + + val all: NonEmptyList[FileCategory] = + NonEmptyList.of(AttachmentSource, AttachmentConvert, PreviewImage, Classifier) + + def fromString(str: String): Either[String, FileCategory] = + all.find(_.id.id == str).toRight(s"Unknown category: $str") + + implicit val jsonDecoder: Decoder[FileCategory] = + Decoder[String].emap(fromString) + + implicit val jsonEncoder: Encoder[FileCategory] = + Encoder[String].contramap(_.id.id) +} diff --git a/modules/common/src/main/scala/docspell/common/FileKey.scala b/modules/common/src/main/scala/docspell/common/FileKey.scala new file mode 100644 index 00000000..17dc009e --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileKey.scala @@ -0,0 +1,21 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.common + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} + +case class FileKey(collective: Ident, category: FileCategory, id: Ident) + +object FileKey { + + implicit val jsonDecoder: Decoder[FileKey] = + deriveDecoder[FileKey] + + implicit val jsonEncoder: Encoder[FileKey] = + deriveEncoder[FileKey] +} diff --git a/modules/common/src/main/scala/docspell/common/Ident.scala b/modules/common/src/main/scala/docspell/common/Ident.scala index 5251b16c..2f630e33 100644 --- a/modules/common/src/main/scala/docspell/common/Ident.scala +++ b/modules/common/src/main/scala/docspell/common/Ident.scala @@ -25,13 +25,15 @@ case class Ident(id: String) { !isEmpty def /(next: Ident): Ident = - new Ident(id + "." + next.id) + new Ident(id + Ident.concatChar + next.id) } object Ident { + private val concatChar = '.' implicit val identEq: Eq[Ident] = Eq.by(_.id) + // Note, the slash *must not* be part of valid characters val chars: Set[Char] = (('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "-_.@").toSet def randomUUID[F[_]: Sync]: F[Ident] = @@ -75,5 +77,4 @@ object Ident { implicit val order: Order[Ident] = Order.by(_.id) - } diff --git a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala index dab18e3f..046b2255 100644 --- a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala +++ b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala @@ -60,7 +60,7 @@ object ProcessItemArgs { implicit val jsonDecoder: Decoder[ProcessMeta] = deriveDecoder[ProcessMeta] } - case class File(name: Option[String], fileMetaId: Ident) + case class File(name: Option[String], fileMetaId: FileKey) object File { implicit val jsonEncoder: Encoder[File] = deriveEncoder[File] implicit val jsonDecoder: Decoder[File] = deriveDecoder[File] diff --git a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala index db33b251..e208c79d 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala @@ -31,7 +31,7 @@ object Classify { _ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name} …")) model <- OptionT(store.transact(RClassifierModel.findByName(coll, cname.name))) .flatTapNone(logger.debug("No classifier model found.")) - modelData = store.fileStore.getBytes(model.fileId) + modelData = store.fileRepo.getBytes(model.fileId) cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir => val modelFile = dir.resolve("model.ser.gz") modelData diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala index 317a155a..92fbf401 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala @@ -91,7 +91,7 @@ object LearnClassifierTask { n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id))) _ <- list .map(_.fileId) - .traverse(id => ctx.store.fileStore.delete(id)) + .traverse(id => ctx.store.fileRepo.delete(id)) _ <- ctx.logger.debug(s"Deleted $n model files.") } yield () diff --git a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala index ac04f60a..af614e8b 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala @@ -42,7 +42,12 @@ object StoreClassifierModel { _ <- logger.debug(s"Storing new trained model for: ${modelName.name}") fileData = Files[F].readAll(trainedModel.model) newFileId <- - fileData.through(store.fileStore.save(MimeTypeHint.none)).compile.lastOrError + fileData + .through( + store.fileRepo.save(collective, FileCategory.Classifier, MimeTypeHint.none) + ) + .compile + .lastOrError _ <- store.transact( RClassifierModel.updateFile(collective, modelName.name, newFileId) ) @@ -50,7 +55,7 @@ object StoreClassifierModel { _ <- oldFile match { case Some(fid) => logger.debug(s"Deleting old model file ${fid.id}") *> - store.fileStore.delete(fid) + store.fileRepo.delete(fid) case None => ().pure[F] } } yield () diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala index e3c43c7e..ca82615a 100644 --- a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala @@ -92,7 +92,7 @@ object PdfConvTask { ctx: Context[F, Args], in: RFileMeta ): F[Unit] = { - val fs = ctx.store.fileStore + val fs = ctx.store.fileRepo val data = fs.getBytes(in.id) val storeResult: ConversionResult.Handler[F, Unit] = @@ -141,11 +141,15 @@ object PdfConvTask { newFile: Stream[F, Byte] ): F[Unit] = { val mimeHint = MimeTypeHint.advertised(meta.mimetype) + val collective = meta.id.collective + val cat = FileCategory.AttachmentConvert for { fid <- - newFile.through(ctx.store.fileStore.save(mimeHint)).compile.lastOrError + newFile + .through(ctx.store.fileRepo.save(collective, cat, mimeHint)) + .compile + .lastOrError _ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid)) } yield () } - } diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala index d8c08aeb..cce9400d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala @@ -100,5 +100,5 @@ object AttachmentPageCount { .getOrElse(MimeType.octetStream) def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.fileStore.getBytes(ra.fileId) + ctx.store.fileRepo.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala index 60bb5da4..2a55775d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala @@ -59,7 +59,7 @@ object AttachmentPreview { preview.previewPNG(loadFile(ctx)(ra)).flatMap { case Some(out) => ctx.logger.debug("Preview generated, saving to database…") *> - createRecord(ctx, out, ra).map(_.some) + createRecord(ctx, ra.fileId.collective, out, ra).map(_.some) case None => ctx.logger .info(s"Preview could not be generated. Maybe the pdf has no pages?") *> @@ -73,6 +73,7 @@ object AttachmentPreview { private def createRecord[F[_]: Sync]( ctx: Context[F, _], + collective: Ident, png: Stream[F, Byte], ra: RAttachment ): F[RAttachmentPreview] = { @@ -82,7 +83,11 @@ object AttachmentPreview { for { fileId <- png .through( - ctx.store.fileStore.save(MimeTypeHint(name.map(_.fullName), Some("image/png"))) + ctx.store.fileRepo.save( + collective, + FileCategory.PreviewImage, + MimeTypeHint(name.map(_.fullName), Some("image/png")) + ) ) .compile .lastOrError @@ -99,5 +104,5 @@ object AttachmentPreview { .getOrElse(MimeType.octetStream) def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = - ctx.store.fileStore.getBytes(ra.fileId) + ctx.store.fileRepo.getBytes(ra.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 0108ef98..73754127 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -32,11 +32,12 @@ import docspell.store.records._ * This step assumes an existing premature item, it traverses its attachments. */ object ConvertPdf { + type Args = ProcessItemArgs def apply[F[_]: Async]( cfg: ConvertConfig, item: ItemData - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = Task { ctx => def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = isConverted(ctx)(ra).flatMap { @@ -61,7 +62,7 @@ object ConvertPdf { } - def isConverted[F[_]](ctx: Context[F, ProcessItemArgs])( + def isConverted[F[_]](ctx: Context[F, Args])( ra: RAttachment ): F[Boolean] = ctx.store.transact(RAttachmentSource.isConverted(ra.id)) @@ -74,14 +75,14 @@ object ConvertPdf { def convertSafe[F[_]: Async]( cfg: ConvertConfig, sanitizeHtml: SanitizeHtml, - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], item: ItemData )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = loadCollectivePasswords(ctx).flatMap(collPass => Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv => mime match { case mt => - val data = ctx.store.fileStore.getBytes(ra.fileId) + val data = ctx.store.fileRepo.getBytes(ra.fileId) val handler = conversionHandler[F](ctx, cfg, ra, item) ctx.logger .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> @@ -93,14 +94,14 @@ object ConvertPdf { ) private def loadCollectivePasswords[F[_]: Async]( - ctx: Context[F, ProcessItemArgs] + ctx: Context[F, Args] ): F[List[Password]] = ctx.store .transact(RCollectivePassword.findAll(ctx.args.meta.collective)) .map(_.map(_.password).distinct) private def conversionHandler[F[_]: Sync]( - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], cfg: ConvertConfig, ra: RAttachment, item: ItemData @@ -146,7 +147,7 @@ object ConvertPdf { } private def storePDF[F[_]: Sync]( - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], cfg: ConvertConfig, ra: RAttachment, pdf: Stream[F, Byte] @@ -160,7 +161,13 @@ object ConvertPdf { .map(_.fullName) pdf - .through(ctx.store.fileStore.save(MimeTypeHint(hint.filename, hint.advertised))) + .through( + ctx.store.fileRepo.save( + ctx.args.meta.collective, + FileCategory.AttachmentConvert, + MimeTypeHint(hint.filename, hint.advertised) + ) + ) .compile .lastOrError .flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId)) @@ -170,7 +177,7 @@ object ConvertPdf { private def updateAttachment[F[_]: Sync]( ctx: Context[F, _], ra: RAttachment, - fmId: Ident, + fmId: FileKey, newName: Option[String] ): F[Unit] = for { @@ -188,7 +195,7 @@ object ConvertPdf { if (sameFile) ().pure[F] else ctx.logger.info("Deleting previous attachment file") *> - ctx.store.fileStore + ctx.store.fileRepo .delete(raPrev.fileId) .attempt .flatMap { diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 031815c2..092c8495 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -14,6 +14,7 @@ import fs2.Stream import docspell.common._ import docspell.joex.scheduler.{Context, Task} +import docspell.store.file.FileMetadata import docspell.store.queries.QItem import docspell.store.records._ @@ -28,7 +29,7 @@ object CreateItem { def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] = Task { ctx => - def isValidFile(fm: RFileMeta) = + def isValidFile(fm: FileMetadata) = ctx.args.meta.validFileTypes.isEmpty || ctx.args.meta.validFileTypes.toSet .contains(fm.mimetype) @@ -39,9 +40,7 @@ object CreateItem { .flatMap { offset => Stream .emits(ctx.args.files) - .evalMap(f => - ctx.store.fileStore.findMeta(f.fileMetaId).value.map(fm => (f, fm)) - ) + .evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm))) .collect { case (f, Some(fm)) if isValidFile(fm) => f } .zipWithIndex .evalMap { case (f, index) => @@ -198,6 +197,6 @@ object CreateItem { // TODO if no source is present, it must be saved! private def originFileTuple( t: (RAttachment, Option[RAttachmentSource]) - ): (Ident, Ident) = + ): (Ident, FileKey) = t._2.map(s => s.id -> s.fileId).getOrElse(t._1.id -> t._1.fileId) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala index 31b2dfac..41fcd0e3 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala @@ -51,7 +51,7 @@ object DuplicateCheck { val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name) if (fd.exists) ctx.logger - .info(s"Deleting duplicate file $fname!") *> ctx.store.fileStore + .info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo .delete(fd.fm.id) else ().pure[F] } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 81e2df4c..ef98b43d 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -32,16 +32,17 @@ import emil.Mail * This step assumes an existing premature item, it traverses its attachments. */ object ExtractArchive { + type Args = ProcessItemArgs def apply[F[_]: Async]( item: ItemData - ): Task[F, ProcessItemArgs, ItemData] = + ): Task[F, Args, ItemData] = multiPass(item, None).map(_._2) def multiPass[F[_]: Async]( item: ItemData, archive: Option[RAttachmentArchive] - ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = + ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = singlePass(item, archive).flatMap { t => if (t._1.isEmpty) Task.pure(t) else multiPass(t._2, t._1) @@ -50,7 +51,7 @@ object ExtractArchive { def singlePass[F[_]: Async]( item: ItemData, archive: Option[RAttachmentArchive] - ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = + ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] = Task { ctx => def extract(ra: RAttachment, pos: Int): F[Extracted] = findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m)) @@ -88,7 +89,7 @@ object ExtractArchive { .getOrElse(MimeType.octetStream) def extractSafe[F[_]: Async]( - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] = mime match { @@ -120,7 +121,7 @@ object ExtractArchive { ) _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) _ <- ctx.store.transact(RAttachment.delete(ra.id)) - _ <- ctx.store.fileStore.delete(ra.fileId) + _ <- ctx.store.fileRepo.delete(ra.fileId) } yield extracted case None => for { @@ -132,10 +133,10 @@ object ExtractArchive { } def extractZip[F[_]: Async]( - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val zipData = ctx.store.fileStore.getBytes(ra.fileId) + val zipData = ctx.store.fileRepo.getBytes(ra.fileId) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *> zipData @@ -148,10 +149,10 @@ object ExtractArchive { } def extractMail[F[_]: Async]( - ctx: Context[F, ProcessItemArgs], + ctx: Context[F, Args], archive: Option[RAttachmentArchive] )(ra: RAttachment, pos: Int): F[Extracted] = { - val email: Stream[F, Byte] = ctx.store.fileStore.getBytes(ra.fileId) + val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId) val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all) val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false) @@ -183,7 +184,7 @@ object ExtractArchive { .getOrElse(Extracted.empty) def handleEntry[F[_]: Sync]( - ctx: Context[F, _], + ctx: Context[F, Args], ra: RAttachment, pos: Int, archive: Option[RAttachmentArchive], @@ -193,7 +194,10 @@ object ExtractArchive { ): Stream[F, Extracted] = { val (entry, subPos) = tentry val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString) - val fileId = entry.data.through(ctx.store.fileStore.save(mimeHint)) + val fileId = entry.data.through( + ctx.store.fileRepo + .save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint) + ) Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >> fileId.evalMap { fid => diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 7e8bd697..4d1c03b5 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -38,7 +38,7 @@ case class ItemData( attachments: Vector[RAttachment], metas: Vector[RAttachmentMeta], dateLabels: Vector[AttachmentDates], - originFile: Map[Ident, Ident], // maps RAttachment.id -> FileMeta.id + originFile: Map[Ident, FileKey], // maps RAttachment.id -> FileMeta.id givenMeta: MetaProposalList, // given meta data not associated to a specific attachment // a list of tags (names or ids) attached to the item if they exist tags: List[String], diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index 0afd3784..fa1774e0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -133,7 +133,7 @@ object ItemHandler { ctx.logger.info("Deleting input files …") *> Stream .emits(ctx.args.files.map(_.fileMetaId)) - .evalMap(id => ctx.store.fileStore.delete(id).attempt) + .evalMap(id => ctx.store.fileRepo.delete(id).attempt) .compile .drain ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index 2836638a..3f6be877 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -126,11 +126,11 @@ object TextExtraction { ctx: Context[F, _], extr: Extraction[F], lang: Language - )(fileId: Ident): F[ExtractResult] = { - val data = ctx.store.fileStore.getBytes(fileId) + )(fileId: FileKey): F[ExtractResult] = { + val data = ctx.store.fileRepo.getBytes(fileId) def findMime: F[MimeType] = - OptionT(ctx.store.transact(RFileMeta.findById(fileId))) + OptionT(ctx.store.fileRepo.findMeta(fileId)) .map(_.mimetype) .getOrElse(MimeType.octetStream) @@ -143,7 +143,7 @@ object TextExtraction { cfg: ExtractConfig, ra: RAttachment, lang: Language - )(fileIds: List[Ident]): F[Option[ExtractResult.Success]] = + )(fileIds: List[FileKey]): F[Option[ExtractResult.Success]] = fileIds match { case Nil => ctx.logger.error(s"Cannot extract text").map(_ => None) @@ -179,7 +179,7 @@ object TextExtraction { private def filesToExtract[F[_]: Sync](ctx: Context[F, _])( item: ItemData, ra: RAttachment - ): F[List[Ident]] = + ): F[List[FileKey]] = item.originFile.get(ra.id) match { case Some(sid) => ctx.store.transact(RFileMeta.findMime(sid)).map { diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala index 7ebdb9b3..d3412089 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala @@ -15,7 +15,7 @@ import docspell.backend.ops.OItemSearch.{AttachmentData, AttachmentPreviewData} import docspell.backend.ops._ import docspell.restapi.model.BasicResult import docspell.restserver.http4s.{QueryParam => QP} -import docspell.store.records.RFileMeta +import docspell.store.file.FileMetadata import org.http4s._ import org.http4s.circe.CirceEntityEncoder._ @@ -117,7 +117,7 @@ object BinaryUtil { } def matchETag[F[_]]( - fileData: Option[RFileMeta], + fileData: Option[FileMetadata], noneMatch: Option[NonEmptyList[EntityTag]] ): Boolean = (fileData, noneMatch) match { diff --git a/modules/store/src/main/scala/docspell/store/Store.scala b/modules/store/src/main/scala/docspell/store/Store.scala index 338b177a..3e54a7a2 100644 --- a/modules/store/src/main/scala/docspell/store/Store.scala +++ b/modules/store/src/main/scala/docspell/store/Store.scala @@ -12,7 +12,7 @@ import cats.effect._ import cats.~> import fs2._ -import docspell.store.file.FileStore +import docspell.store.file.FileRepository import docspell.store.impl.StoreImpl import com.zaxxer.hikari.HikariDataSource @@ -26,7 +26,7 @@ trait Store[F[_]] { def transact[A](prg: Stream[ConnectionIO, A]): Stream[F, A] - def fileStore: FileStore[F] + def fileRepo: FileRepository[F] def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] } @@ -50,8 +50,8 @@ object Store { ds.setDriverClassName(jdbc.driverClass) } xa = HikariTransactor(ds, connectEC) - fs = FileStore[F](xa, ds, chunkSize) - st = new StoreImpl[F](fs, jdbc, xa) + fr = FileRepository.genericJDBC(xa, ds, chunkSize) + st = new StoreImpl[F](fr, jdbc, xa) _ <- Resource.eval(st.migrate) } yield st } diff --git a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala index ab9c7a8c..4814a9dc 100644 --- a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala +++ b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala @@ -23,8 +23,9 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F]) for { now <- Timestamp.current[F] a <- attrs + fileKey <- makeFileKey(id) fm = RFileMeta( - Ident.unsafe(id.id), + fileKey, now, MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream), ByteSize(a.length), @@ -34,7 +35,7 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F]) } yield () def deleteAttr(id: BinaryId): F[Boolean] = - RFileMeta.delete(Ident.unsafe(id.id)).transact(xa).map(_ > 0) + makeFileKey(id).flatMap(fileKey => RFileMeta.delete(fileKey).transact(xa).map(_ > 0)) def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] = findMeta(id).map(fm => @@ -46,5 +47,10 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F]) ) def findMeta(id: BinaryId): OptionT[F, RFileMeta] = - OptionT(RFileMeta.findById(Ident.unsafe(id.id)).transact(xa)) + OptionT(makeFileKey(id).flatMap(fileKey => RFileMeta.findById(fileKey).transact(xa))) + + private def makeFileKey(binaryId: BinaryId): F[FileKey] = + Sync[F] + .pure(BinnyUtils.binaryIdToFileKey(binaryId).left.map(new IllegalStateException(_))) + .rethrow } diff --git a/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala new file mode 100644 index 00000000..71d426d5 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala @@ -0,0 +1,59 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import docspell.common +import docspell.common._ +import docspell.files.TikaMimetype + +import binny._ +import scodec.bits.ByteVector + +private[store] object BinnyUtils { + + def fileKeyToBinaryId(fk: FileKey): BinaryId = + BinaryId(s"${fk.collective.id}/${fk.category.id.id}/${fk.id.id}") + + def binaryIdToFileKey(bid: BinaryId): Either[String, FileKey] = + bid.id.split('/').toList match { + case cId :: catId :: fId :: Nil => + for { + coll <- Ident.fromString(cId) + cat <- FileCategory.fromString(catId) + file <- Ident.fromString(fId) + } yield common.FileKey(coll, cat, file) + case _ => + Left(s"Invalid format for file-key: $bid") + } + + def unsafeBinaryIdToFileKey(bid: BinaryId): FileKey = + binaryIdToFileKey(bid).fold( + err => throw new IllegalStateException(err), + identity + ) + + object LoggerAdapter { + def apply[F[_]](log: Logger[F]): binny.util.Logger[F] = + new binny.util.Logger[F] { + override def trace(msg: => String): F[Unit] = log.trace(msg) + override def debug(msg: => String): F[Unit] = log.debug(msg) + override def info(msg: => String): F[Unit] = log.info(msg) + override def warn(msg: => String): F[Unit] = log.warn(msg) + override def error(msg: => String): F[Unit] = log.error(msg) + override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg) + } + } + + object TikaContentTypeDetect extends ContentTypeDetect { + override def detect(data: ByteVector, hint: Hint): SimpleContentType = + SimpleContentType( + TikaMimetype + .detect(data, MimeTypeHint(hint.filename, hint.advertisedType)) + .asString + ) + } +} diff --git a/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala b/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala new file mode 100644 index 00000000..b96227c0 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala @@ -0,0 +1,19 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import docspell.common._ + +import scodec.bits.ByteVector + +final case class FileMetadata( + id: FileKey, + created: Timestamp, + mimetype: MimeType, + length: ByteSize, + checksum: ByteVector +) diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepository.scala b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala new file mode 100644 index 00000000..7eb73f12 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala @@ -0,0 +1,50 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import javax.sql.DataSource + +import cats.effect._ +import fs2._ + +import docspell.common._ + +import binny.BinaryId +import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} +import doobie.Transactor + +trait FileRepository[F[_]] { + def getBytes(key: FileKey): Stream[F, Byte] + + def findMeta(key: FileKey): F[Option[FileMetadata]] + + def delete(key: FileKey): F[Unit] + + def save( + collective: Ident, + category: FileCategory, + hint: MimeTypeHint + ): Pipe[F, Byte, FileKey] +} + +object FileRepository { + private[this] val logger = org.log4s.getLogger + + def genericJDBC[F[_]: Sync]( + xa: Transactor[F], + ds: DataSource, + chunkSize: Int + ): FileRepository[F] = { + val attrStore = new AttributeStore[F](xa) + val cfg = JdbcStoreConfig("filechunk", chunkSize, BinnyUtils.TikaContentTypeDetect) + val log = Logger.log4s[F](logger) + val binStore = GenericJdbcStore[F](ds, BinnyUtils.LoggerAdapter(log), cfg, attrStore) + val keyFun: FileKey => BinaryId = BinnyUtils.fileKeyToBinaryId + + new FileRepositoryImpl[F](binStore, attrStore, keyFun) + } +} diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala new file mode 100644 index 00000000..7f820244 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.file + +import cats.data.OptionT +import cats.effect.Sync +import cats.implicits._ +import fs2.{Pipe, Stream} + +import docspell.common._ + +import binny._ + +final class FileRepositoryImpl[F[_]: Sync]( + bs: BinaryStore[F], + attrStore: AttributeStore[F], + keyFun: FileKey => BinaryId +) extends FileRepository[F] { + + def find(key: FileKey): OptionT[F, Stream[F, Byte]] = + bs.findBinary(keyFun(key), ByteRange.All) + + def getBytes(key: FileKey): Stream[F, Byte] = + Stream.eval(find(key).value).unNoneTerminate.flatMap(identity) + + def findMeta(key: FileKey): F[Option[FileMetadata]] = + attrStore + .findMeta(keyFun(key)) + .map(rfm => + FileMetadata(rfm.id, rfm.created, rfm.mimetype, rfm.length, rfm.checksum) + ) + .value + + def delete(key: FileKey): F[Unit] = + bs.delete(keyFun(key)) + + def save( + collective: Ident, + category: FileCategory, + hint: MimeTypeHint + ): Pipe[F, Byte, FileKey] = { + val fhint = Hint(hint.filename, hint.advertised) + in => + Stream + .eval(randomKey(collective, category)) + .flatMap(fkey => + in.through(bs.insertWith(keyFun(fkey), fhint)) ++ Stream.emit(fkey) + ) + } + + def randomKey( + collective: Ident, + category: FileCategory + ): F[FileKey] = + BinaryId.random[F].map(bid => FileKey(collective, category, Ident.unsafe(bid.id))) +} diff --git a/modules/store/src/main/scala/docspell/store/file/FileStore.scala b/modules/store/src/main/scala/docspell/store/file/FileStore.scala deleted file mode 100644 index 3afd4216..00000000 --- a/modules/store/src/main/scala/docspell/store/file/FileStore.scala +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2020 Eike K. & Contributors - * - * SPDX-License-Identifier: AGPL-3.0-or-later - */ - -package docspell.store.file - -import javax.sql.DataSource - -import cats.data.OptionT -import cats.effect._ -import fs2.{Pipe, Stream} - -import docspell.common._ -import docspell.files.TikaMimetype -import docspell.store.records.RFileMeta - -import binny._ -import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig} -import doobie._ -import scodec.bits.ByteVector - -trait FileStore[F[_]] { - - def find(id: Ident): OptionT[F, Stream[F, Byte]] - - def getBytes(id: Ident): Stream[F, Byte] - - def findMeta(id: Ident): OptionT[F, RFileMeta] - - def delete(id: Ident): F[Unit] - - def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] -} - -object FileStore { - private[this] val logger = org.log4s.getLogger - - def apply[F[_]: Sync]( - xa: Transactor[F], - ds: DataSource, - chunkSize: Int - ): FileStore[F] = { - val attrStore = new AttributeStore[F](xa) - val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect) - val log = Logger.log4s[F](logger) - val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore) - new Impl[F](binStore, attrStore) - } - - final private class Impl[F[_]](bs: BinaryStore[F], attrStore: AttributeStore[F]) - extends FileStore[F] { - def find(id: Ident): OptionT[F, Stream[F, Byte]] = - bs.findBinary(BinaryId(id.id), ByteRange.All) - - def getBytes(id: Ident): Stream[F, Byte] = - Stream.eval(find(id).value).unNoneTerminate.flatMap(identity) - - def findMeta(id: Ident): OptionT[F, RFileMeta] = - attrStore.findMeta(BinaryId(id.id)) - - def delete(id: Ident): F[Unit] = - bs.delete(BinaryId(id.id)) - - def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] = - bs.insert(Hint(hint.filename, hint.advertised)) - .andThen(_.map(bid => Ident.unsafe(bid.id))) - } - - private object LoggerAdapter { - def apply[F[_]](log: Logger[F]): binny.util.Logger[F] = - new binny.util.Logger[F] { - override def trace(msg: => String): F[Unit] = log.trace(msg) - override def debug(msg: => String): F[Unit] = log.debug(msg) - override def info(msg: => String): F[Unit] = log.info(msg) - override def warn(msg: => String): F[Unit] = log.warn(msg) - override def error(msg: => String): F[Unit] = log.error(msg) - override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg) - } - } - - private object TikaContentTypeDetect extends ContentTypeDetect { - override def detect(data: ByteVector, hint: Hint): SimpleContentType = - SimpleContentType( - TikaMimetype - .detect(data, MimeTypeHint(hint.filename, hint.advertisedType)) - .asString - ) - } -} diff --git a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala index dee20f76..738a078b 100644 --- a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala +++ b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala @@ -14,8 +14,10 @@ import docspell.common.syntax.all._ import docspell.jsonminiq.JsonMiniQuery import docspell.notification.api.{ChannelType, EventType} import docspell.query.{ItemQuery, ItemQueryParser} +import docspell.store.file.BinnyUtils import docspell.totp.Key +import binny.BinaryId import com.github.eikek.calev.CalEvent import doobie._ import doobie.implicits.legacy.instant._ @@ -27,7 +29,7 @@ import scodec.bits.ByteVector trait DoobieMeta extends EmilDoobieMeta { - implicit val sqlLogging = LogHandler { + implicit val sqlLogging: LogHandler = LogHandler { case e @ Success(_, _, _, _) => DoobieMeta.logger.trace("SQL " + e) case e => @@ -39,58 +41,64 @@ trait DoobieMeta extends EmilDoobieMeta { e.apply(a).noSpaces ) + implicit val metaBinaryId: Meta[BinaryId] = + Meta[String].timap(BinaryId.apply)(_.id) + + implicit val metaFileKey: Meta[FileKey] = + Meta[BinaryId].timap(BinnyUtils.unsafeBinaryIdToFileKey)(BinnyUtils.fileKeyToBinaryId) + implicit val metaAccountSource: Meta[AccountSource] = - Meta[String].imap(AccountSource.unsafeFromString)(_.name) + Meta[String].timap(AccountSource.unsafeFromString)(_.name) implicit val metaDuration: Meta[Duration] = - Meta[Long].imap(Duration.millis)(_.millis) + Meta[Long].timap(Duration.millis)(_.millis) implicit val metaCollectiveState: Meta[CollectiveState] = - Meta[String].imap(CollectiveState.unsafe)(CollectiveState.asString) + Meta[String].timap(CollectiveState.unsafe)(CollectiveState.asString) implicit val metaUserState: Meta[UserState] = - Meta[String].imap(UserState.unsafe)(UserState.asString) + Meta[String].timap(UserState.unsafe)(UserState.asString) implicit val metaPassword: Meta[Password] = - Meta[String].imap(Password(_))(_.pass) + Meta[String].timap(Password(_))(_.pass) implicit val metaIdent: Meta[Ident] = - Meta[String].imap(Ident.unsafe)(_.id) + Meta[String].timap(Ident.unsafe)(_.id) implicit val metaContactKind: Meta[ContactKind] = - Meta[String].imap(ContactKind.unsafe)(_.asString) + Meta[String].timap(ContactKind.unsafe)(_.asString) implicit val metaTimestamp: Meta[Timestamp] = - Meta[Instant].imap(Timestamp(_))(_.value) + Meta[Instant].timap(Timestamp(_))(_.value) implicit val metaJobState: Meta[JobState] = - Meta[String].imap(JobState.unsafe)(_.name) + Meta[String].timap(JobState.unsafe)(_.name) implicit val metaDirection: Meta[Direction] = - Meta[Boolean].imap(flag => + Meta[Boolean].timap(flag => if (flag) Direction.Incoming: Direction else Direction.Outgoing: Direction )(d => Direction.isIncoming(d)) implicit val metaPriority: Meta[Priority] = - Meta[Int].imap(Priority.fromInt)(Priority.toInt) + Meta[Int].timap(Priority.fromInt)(Priority.toInt) implicit val metaLogLevel: Meta[LogLevel] = - Meta[String].imap(LogLevel.unsafeString)(_.name) + Meta[String].timap(LogLevel.unsafeString)(_.name) implicit val metaLenientUri: Meta[LenientUri] = - Meta[String].imap(LenientUri.unsafe)(_.asString) + Meta[String].timap(LenientUri.unsafe)(_.asString) implicit val metaNodeType: Meta[NodeType] = - Meta[String].imap(NodeType.unsafe)(_.name) + Meta[String].timap(NodeType.unsafe)(_.name) implicit val metaLocalDate: Meta[LocalDate] = - Meta[String].imap(str => LocalDate.parse(str))(_.format(DateTimeFormatter.ISO_DATE)) + Meta[String].timap(str => LocalDate.parse(str))(_.format(DateTimeFormatter.ISO_DATE)) implicit val metaItemState: Meta[ItemState] = - Meta[String].imap(ItemState.unsafe)(_.name) + Meta[String].timap(ItemState.unsafe)(_.name) implicit val metNerTag: Meta[NerTag] = - Meta[String].imap(NerTag.unsafe)(_.name) + Meta[String].timap(NerTag.unsafe)(_.name) implicit val metaNerLabel: Meta[NerLabel] = jsonMeta[NerLabel] @@ -108,7 +116,7 @@ trait DoobieMeta extends EmilDoobieMeta { jsonMeta[List[IdRef]] implicit val metaLanguage: Meta[Language] = - Meta[String].imap(Language.unsafe)(_.iso3) + Meta[String].timap(Language.unsafe)(_.iso3) implicit val metaCalEvent: Meta[CalEvent] = Meta[String].timap(CalEvent.unsafe)(_.asString) diff --git a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala index 50c856b1..54505649 100644 --- a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala +++ b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala @@ -11,7 +11,7 @@ import cats.effect.Async import cats.implicits._ import cats.~> -import docspell.store.file.FileStore +import docspell.store.file.FileRepository import docspell.store.migrate.FlywayMigrate import docspell.store.{AddResult, JdbcConfig, Store} @@ -19,7 +19,7 @@ import doobie._ import doobie.implicits._ final class StoreImpl[F[_]: Async]( - val fileStore: FileStore[F], + val fileRepo: FileRepository[F], jdbc: JdbcConfig, xa: Transactor[F] ) extends Store[F] { @@ -30,10 +30,10 @@ final class StoreImpl[F[_]: Async]( def migrate: F[Int] = FlywayMigrate.run[F](jdbc).map(_.migrationsExecuted) - def transact[A](prg: doobie.ConnectionIO[A]): F[A] = + def transact[A](prg: ConnectionIO[A]): F[A] = prg.transact(xa) - def transact[A](prg: fs2.Stream[doobie.ConnectionIO, A]): fs2.Stream[F, A] = + def transact[A](prg: fs2.Stream[ConnectionIO, A]): fs2.Stream[F, A] = prg.transact(xa) def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] = diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index 78abf6a3..7768a25c 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -40,7 +40,7 @@ object QAttachment { .evalSeq(store.transact(findPreview)) .map(_.fileId) .evalTap(_ => store.transact(RAttachmentPreview.delete(attachId))) - .evalMap(store.fileStore.delete) + .evalMap(store.fileRepo.delete) .map(_ => 1) .compile .foldMonoid @@ -68,7 +68,7 @@ object QAttachment { f <- Stream .emits(files._1) - .evalMap(store.fileStore.delete) + .evalMap(store.fileRepo.delete) .map(_ => 1) .compile .foldMonoid @@ -91,7 +91,7 @@ object QAttachment { f <- Stream .emits(ra.fileId +: (s.map(_.fileId).toSeq ++ p.map(_.fileId).toSeq)) - .evalMap(store.fileStore.delete) + .evalMap(store.fileRepo.delete) .map(_ => 1) .compile .foldMonoid @@ -104,7 +104,7 @@ object QAttachment { _ <- OptionT.liftF( Stream .emit(aa.fileId) - .evalMap(store.fileStore.delete) + .evalMap(store.fileRepo.delete) .compile .drain ) diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 22e64a0d..3b5d0c19 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -15,7 +15,7 @@ import cats.implicits._ import fs2.Stream import docspell.common.syntax.all._ -import docspell.common.{IdRef, _} +import docspell.common.{FileKey, IdRef, _} import docspell.query.ItemQuery import docspell.store.Store import docspell.store.qb.DSL._ @@ -470,7 +470,7 @@ object QItem { } yield tn + rn + n + mn + cf + im private def findByFileIdsQuery( - fileMetaIds: Nel[Ident], + fileMetaIds: Nel[FileKey], states: Option[Nel[ItemState]] ): Select.SimpleSelect = { val i = RItem.as("i") @@ -490,7 +490,7 @@ object QItem { ).distinct } - def findOneByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Option[RItem]] = + def findOneByFileIds(fileMetaIds: Seq[FileKey]): ConnectionIO[Option[RItem]] = Nel.fromList(fileMetaIds.toList) match { case Some(nel) => findByFileIdsQuery(nel, None).limit(1).build.query[RItem].option @@ -499,7 +499,7 @@ object QItem { } def findByFileIds( - fileMetaIds: Seq[Ident], + fileMetaIds: Seq[FileKey], states: Nel[ItemState] ): ConnectionIO[Vector[RItem]] = Nel.fromList(fileMetaIds.toList) match { @@ -512,7 +512,7 @@ object QItem { def findByChecksum( checksum: String, collective: Ident, - excludeFileMeta: Set[Ident] + excludeFileMeta: Set[FileKey] ): ConnectionIO[Vector[RItem]] = { val qq = findByChecksumQuery(checksum, collective, excludeFileMeta).build logger.debug(s"FindByChecksum: $qq") @@ -522,7 +522,7 @@ object QItem { def findByChecksumQuery( checksum: String, collective: Ident, - excludeFileMeta: Set[Ident] + excludeFileMeta: Set[FileKey] ): Select = { val m1 = RFileMeta.as("m1") val m2 = RFileMeta.as("m2") diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala index b8695176..bf99a01b 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala @@ -10,7 +10,7 @@ import cats.data.NonEmptyList import cats.implicits._ import fs2.Stream -import docspell.common._ +import docspell.common.{FileKey, _} import docspell.store.qb.DSL._ import docspell.store.qb._ @@ -20,7 +20,7 @@ import doobie.implicits._ case class RAttachment( id: Ident, itemId: Ident, - fileId: Ident, + fileId: FileKey, position: Int, created: Timestamp, name: Option[String] @@ -32,7 +32,7 @@ object RAttachment { val id = Column[Ident]("attachid", this) val itemId = Column[Ident]("itemid", this) - val fileId = Column[Ident]("filemetaid", this) + val fileId = Column[FileKey]("filemetaid", this) val position = Column[Int]("position", this) val created = Column[Timestamp]("created", this) val name = Column[String]("name", this) @@ -47,7 +47,7 @@ object RAttachment { DML.insert( T, T.all, - fr"${v.id},${v.itemId},${v.fileId.id},${v.position},${v.created},${v.name}" + fr"${v.id},${v.itemId},${v.fileId},${v.position},${v.created},${v.name}" ) def decPositions(iId: Ident, lowerBound: Int, upperBound: Int): ConnectionIO[Int] = @@ -77,7 +77,7 @@ object RAttachment { def updateFileIdAndName( attachId: Ident, - fId: Ident, + fId: FileKey, fname: Option[String] ): ConnectionIO[Int] = DML.update( @@ -88,7 +88,7 @@ object RAttachment { def updateFileId( attachId: Ident, - fId: Ident + fId: FileKey ): ConnectionIO[Int] = DML.update( T, @@ -182,7 +182,7 @@ object RAttachment { def findByItemCollectiveSource( id: Ident, coll: Ident, - fileIds: NonEmptyList[Ident] + fileIds: NonEmptyList[FileKey] ): ConnectionIO[Vector[RAttachment]] = { val i = RItem.as("i") val a = RAttachment.as("a") diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala index d8e768fc..3913c135 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala @@ -8,7 +8,7 @@ package docspell.store.records import cats.data.NonEmptyList -import docspell.common._ +import docspell.common.{FileKey, _} import docspell.store.qb.DSL._ import docspell.store.qb.TableDef import docspell.store.qb._ @@ -21,7 +21,7 @@ import doobie.implicits._ */ case class RAttachmentArchive( id: Ident, // same as RAttachment.id - fileId: Ident, + fileId: FileKey, name: Option[String], messageId: Option[String], created: Timestamp @@ -32,7 +32,7 @@ object RAttachmentArchive { val tableName = "attachment_archive" val id = Column[Ident]("id", this) - val fileId = Column[Ident]("file_id", this) + val fileId = Column[FileKey]("file_id", this) val name = Column[String]("filename", this) val messageId = Column[String]("message_id", this) val created = Column[Timestamp]("created", this) @@ -59,7 +59,7 @@ object RAttachmentArchive { def delete(attachId: Ident): ConnectionIO[Int] = DML.delete(T, T.id === attachId) - def deleteAll(fId: Ident): ConnectionIO[Int] = + def deleteAll(fId: FileKey): ConnectionIO[Int] = DML.delete(T, T.fileId === fId) def findByIdAndCollective( diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala index 8afddeee..3b9d23aa 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala @@ -8,7 +8,7 @@ package docspell.store.records import cats.data.NonEmptyList -import docspell.common._ +import docspell.common.{FileKey, _} import docspell.store.qb.DSL._ import docspell.store.qb._ @@ -20,7 +20,7 @@ import doobie.implicits._ */ case class RAttachmentPreview( id: Ident, // same as RAttachment.id - fileId: Ident, + fileId: FileKey, name: Option[String], created: Timestamp ) @@ -30,7 +30,7 @@ object RAttachmentPreview { val tableName = "attachment_preview" val id = Column[Ident]("id", this) - val fileId = Column[Ident]("file_id", this) + val fileId = Column[FileKey]("file_id", this) val name = Column[String]("filename", this) val created = Column[Timestamp]("created", this) diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala index 5579daca..a2e3f949 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala @@ -8,7 +8,7 @@ package docspell.store.records import cats.data.NonEmptyList -import docspell.common._ +import docspell.common.{FileKey, _} import docspell.store.qb.DSL._ import docspell.store.qb._ @@ -20,7 +20,7 @@ import doobie.implicits._ */ case class RAttachmentSource( id: Ident, // same as RAttachment.id - fileId: Ident, + fileId: FileKey, name: Option[String], created: Timestamp ) @@ -30,7 +30,7 @@ object RAttachmentSource { val tableName = "attachment_source" val id = Column[Ident]("id", this) - val fileId = Column[Ident]("file_id", this) + val fileId = Column[FileKey]("file_id", this) val name = Column[String]("filename", this) val created = Column[Timestamp]("created", this) @@ -50,7 +50,7 @@ object RAttachmentSource { def findById(attachId: Ident): ConnectionIO[Option[RAttachmentSource]] = run(select(T.all), from(T), T.id === attachId).query[RAttachmentSource].option - def isSameFile(attachId: Ident, file: Ident): ConnectionIO[Boolean] = + def isSameFile(attachId: Ident, file: FileKey): ConnectionIO[Boolean] = Select(count(T.id).s, from(T), T.id === attachId && T.fileId === file).build .query[Int] .unique diff --git a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala index ace5ccad..89fae4df 100644 --- a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala +++ b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala @@ -21,7 +21,7 @@ final case class RClassifierModel( id: Ident, cid: Ident, name: String, - fileId: Ident, + fileId: FileKey, created: Timestamp ) {} @@ -30,7 +30,7 @@ object RClassifierModel { def createNew[F[_]: Sync]( cid: Ident, name: String, - fileId: Ident + fileId: FileKey ): F[RClassifierModel] = for { id <- Ident.randomId[F] @@ -43,7 +43,7 @@ object RClassifierModel { val id = Column[Ident]("id", this) val cid = Column[Ident]("cid", this) val name = Column[String]("name", this) - val fileId = Column[Ident]("file_id", this) + val fileId = Column[FileKey]("file_id", this) val created = Column[Timestamp]("created", this) val all = NonEmptyList.of[Column[_]](id, cid, name, fileId, created) @@ -61,7 +61,7 @@ object RClassifierModel { fr"${v.id},${v.cid},${v.name},${v.fileId},${v.created}" ) - def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] = + def updateFile(coll: Ident, name: String, fid: FileKey): ConnectionIO[Int] = for { now <- Timestamp.current[ConnectionIO] n <- DML.update( diff --git a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala index 02ace94b..8263e875 100644 --- a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala +++ b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala @@ -9,7 +9,7 @@ package docspell.store.records import cats.data.NonEmptyList import cats.implicits._ -import docspell.common._ +import docspell.common.{FileKey, _} import docspell.store.qb.DSL._ import docspell.store.qb._ @@ -18,7 +18,7 @@ import doobie.implicits._ import scodec.bits.ByteVector final case class RFileMeta( - id: Ident, + id: FileKey, created: Timestamp, mimetype: MimeType, length: ByteSize, @@ -29,7 +29,7 @@ object RFileMeta { final case class Table(alias: Option[String]) extends TableDef { val tableName = "filemeta" - val id = Column[Ident]("file_id", this) + val id = Column[FileKey]("file_id", this) val timestamp = Column[Timestamp]("created", this) val mimetype = Column[MimeType]("mimetype", this) val length = Column[ByteSize]("length", this) @@ -47,10 +47,10 @@ object RFileMeta { def insert(r: RFileMeta): ConnectionIO[Int] = DML.insert(T, T.all, fr"${r.id},${r.created},${r.mimetype},${r.length},${r.checksum}") - def findById(fid: Ident): ConnectionIO[Option[RFileMeta]] = + def findById(fid: FileKey): ConnectionIO[Option[RFileMeta]] = run(select(T.all), from(T), T.id === fid).query[RFileMeta].option - def findByIds(ids: List[Ident]): ConnectionIO[Vector[RFileMeta]] = + def findByIds(ids: List[FileKey]): ConnectionIO[Vector[RFileMeta]] = NonEmptyList.fromList(ids) match { case Some(nel) => run(select(T.all), from(T), T.id.in(nel)).query[RFileMeta].to[Vector] @@ -58,11 +58,11 @@ object RFileMeta { Vector.empty[RFileMeta].pure[ConnectionIO] } - def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = + def findMime(fid: FileKey): ConnectionIO[Option[MimeType]] = run(select(T.mimetype), from(T), T.id === fid) .query[MimeType] .option - def delete(id: Ident): ConnectionIO[Int] = + def delete(id: FileKey): ConnectionIO[Int] = DML.delete(T, T.id === id) } diff --git a/modules/store/src/test/scala/docspell/store/StoreFixture.scala b/modules/store/src/test/scala/docspell/store/StoreFixture.scala index 6f741230..91441701 100644 --- a/modules/store/src/test/scala/docspell/store/StoreFixture.scala +++ b/modules/store/src/test/scala/docspell/store/StoreFixture.scala @@ -11,7 +11,7 @@ import javax.sql.DataSource import cats.effect._ import docspell.common.LenientUri -import docspell.store.file.FileStore +import docspell.store.file.FileRepository import docspell.store.impl.StoreImpl import docspell.store.migrate.FlywayMigrate @@ -67,7 +67,8 @@ object StoreFixture { for { ds <- dataSource(jdbc) xa <- makeXA(ds) - store = new StoreImpl[IO](FileStore[IO](xa, ds, 64 * 1024), jdbc, xa) + fr = FileRepository.genericJDBC[IO](xa, ds, 64 * 1024) + store = new StoreImpl[IO](fr, jdbc, xa) _ <- Resource.eval(store.migrate) } yield store } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 865d33f6..f7810fa2 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -282,7 +282,8 @@ object Dependencies { val binny = Seq( "com.github.eikek" %% "binny-core" % BinnyVersion, - "com.github.eikek" %% "binny-jdbc" % BinnyVersion + "com.github.eikek" %% "binny-jdbc" % BinnyVersion, + "com.github.eikek" %% "binny-minio" % BinnyVersion ) // https://github.com/flyway/flyway From 55700992c2fd789f3a3e084afc29e33b8c2e2e99 Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 13 Feb 2022 19:55:02 +0100 Subject: [PATCH 2/2] Migrate file ids Issue: #1379 --- .../h2/V1.33.0__reorganize_file_ids.sql | 150 ++++++++++++++++++ .../mariadb/V1.33.0__reorganize_file_ids.sql | 148 +++++++++++++++++ .../V1.33.0__reorganize_file_ids.sql | 150 ++++++++++++++++++ 3 files changed, 448 insertions(+) create mode 100644 modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql create mode 100644 modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql create mode 100644 modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql diff --git a/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql new file mode 100644 index 00000000..5c6161b9 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql @@ -0,0 +1,150 @@ +drop table if exists file_migration_temp; +create table file_migration_temp ( + id bigserial primary key, + original_file varchar(254) not null unique, + cid varchar(254) not null, + category varchar(254) not null, + new_file varchar(254) not null unique +); + +-- Source files +insert into file_migration_temp (original_file, cid, category, new_file) + select + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + i.cid || '/attachmentsource/' || rs.file_id as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Archive files +insert into file_migration_temp (original_file, cid, category, new_file) + select distinct + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + i.cid || '/attachmentsource/' || rs.file_id as new_file + from attachment_archive rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Converted files +insert into file_migration_temp (original_file, cid, category, new_file) + select + ra.filemetaid as original_file, + i.cid, + 'attachmentconvert' as category, + i.cid || '/attachmentconvert/' || ra.filemetaid as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid + where rs.file_id <> ra.filemetaid +; + +-- Preview image +insert into file_migration_temp (original_file, cid, category, new_file) + select + ap.file_id as original_file, + i.cid, + 'previewimage' as category, + i.cid || '/previewimage/' || ap.file_id as new_file + from attachment_preview ap + inner join attachment ra on ra.attachid = ap.id + inner join item i on i.itemid = ra.itemid + order by id +; + +-- classifier +insert into file_migration_temp (original_file, cid, category, new_file) + select + file_id as original_file, + cid, + 'classifier' as category, + cid || '/classifier/' || file_id as new_file + from classifier_model +; + + +-- save obsolete/orphaned files +drop table if exists obsolete_files; +create table obsolete_files( + file_id varchar(254) not null, + mimetype varchar(254) not null, + length bigint not null, + checksum varchar(254) not null, + created timestamp not null +); + +with + missing_ids as ( + select file_id from filemeta + except + select original_file as file_id from file_migration_temp) +insert into obsolete_files (file_id, mimetype, length, checksum, created) + select file_id, mimetype, length, checksum, created from filemeta + where file_id in (select file_id from missing_ids) +; + + +-- duplicate each filemeta with the new id +insert into filemeta (file_id, mimetype, length, checksum, created) + select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created + from file_migration_temp mm + inner join filemeta fm on fm.file_id = mm.original_file +; + + +-- update each reference to the new id +update attachment_source + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_source.id is not null) +; + +update attachment + set filemetaid = (select new_file + from file_migration_temp + where original_file = filemetaid and attachment.attachid is not null) +; + +update attachment_archive + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_archive.id is not null) +; + +update attachment_preview + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_preview.id is not null) +; + +update classifier_model + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and classifier_model.id is not null) +; + +-- delete old filemeta and filechunk rows +delete from filemeta +where file_id in (select original_file from file_migration_temp); + +delete from filemeta +where file_id in (select file_id from obsolete_files); + +delete from filechunk +where file_id in (select file_id from obsolete_files); + +-- update chunks +update filechunk + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and filechunk.file_id is not null) +; + +-- drop temp table +drop table file_migration_temp; +drop table obsolete_files; diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql new file mode 100644 index 00000000..62ab8670 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql @@ -0,0 +1,148 @@ +drop table if exists file_migration_temp; +create table file_migration_temp ( + id bigint auto_increment primary key, + original_file varchar(254) not null unique, + cid varchar(254) not null, + category varchar(254) not null, + new_file varchar(254) not null unique +); + +-- Source files +insert into file_migration_temp (original_file, cid, category, new_file) + select + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + concat(i.cid, '/attachmentsource/', rs.file_id) as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Archive files +insert into file_migration_temp (original_file, cid, category, new_file) + select distinct + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + concat(i.cid, '/attachmentsource/', rs.file_id) as new_file + from attachment_archive rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Converted files +insert into file_migration_temp (original_file, cid, category, new_file) + select + ra.filemetaid as original_file, + i.cid, + 'attachmentconvert' as category, + concat(i.cid, '/attachmentconvert/', ra.filemetaid) as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid + where rs.file_id <> ra.filemetaid +; + +-- Preview image +insert into file_migration_temp (original_file, cid, category, new_file) + select + ap.file_id as original_file, + i.cid, + 'previewimage' as category, + concat(i.cid, '/previewimage/', ap.file_id) as new_file + from attachment_preview ap + inner join attachment ra on ra.attachid = ap.id + inner join item i on i.itemid = ra.itemid + order by id +; + +-- classifier +insert into file_migration_temp (original_file, cid, category, new_file) + select + file_id as original_file, + cid, + 'classifier' as category, + concat(cid, '/classifier/', file_id) as new_file + from classifier_model +; + + +-- save obsolete/orphaned files +drop table if exists obsolete_files; +create table obsolete_files( + file_id varchar(254) not null, + mimetype varchar(254) not null, + length bigint not null, + checksum varchar(254) not null, + created timestamp not null +); + +insert into obsolete_files (file_id, mimetype, length, checksum, created) + select file_id, mimetype, length, checksum, created from filemeta + where file_id in ( + select file_id from filemeta + except + select original_file as file_id from file_migration_temp + ); + + +-- duplicate each filemeta with the new id +insert into filemeta (file_id, mimetype, length, checksum, created) + select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created + from file_migration_temp mm + inner join filemeta fm on fm.file_id = mm.original_file +; + + +-- update each reference to the new id +update attachment_source + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_source.id is not null) +; + +update attachment + set filemetaid = (select new_file + from file_migration_temp + where original_file = filemetaid and attachment.attachid is not null) +; + +update attachment_archive + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_archive.id is not null) +; + +update attachment_preview + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_preview.id is not null) +; + +update classifier_model + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and classifier_model.id is not null) +; + +-- delete old filemeta and filechunk rows +delete from filemeta +where file_id in (select original_file from file_migration_temp); + +delete from filemeta +where file_id in (select file_id from obsolete_files); + +delete from filechunk +where file_id in (select file_id from obsolete_files); + +-- update chunks +update filechunk + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and filechunk.file_id is not null) +; + +-- drop temp table +drop table file_migration_temp; +drop table obsolete_files; diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql new file mode 100644 index 00000000..5c6161b9 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql @@ -0,0 +1,150 @@ +drop table if exists file_migration_temp; +create table file_migration_temp ( + id bigserial primary key, + original_file varchar(254) not null unique, + cid varchar(254) not null, + category varchar(254) not null, + new_file varchar(254) not null unique +); + +-- Source files +insert into file_migration_temp (original_file, cid, category, new_file) + select + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + i.cid || '/attachmentsource/' || rs.file_id as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Archive files +insert into file_migration_temp (original_file, cid, category, new_file) + select distinct + rs.file_id as original_file, + i.cid, + 'attachmentsource' as category, + i.cid || '/attachmentsource/' || rs.file_id as new_file + from attachment_archive rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid +; + +-- Converted files +insert into file_migration_temp (original_file, cid, category, new_file) + select + ra.filemetaid as original_file, + i.cid, + 'attachmentconvert' as category, + i.cid || '/attachmentconvert/' || ra.filemetaid as new_file + from attachment_source rs + inner join attachment ra on rs.id = ra.attachid + inner join item i on ra.itemid = i.itemid + where rs.file_id <> ra.filemetaid +; + +-- Preview image +insert into file_migration_temp (original_file, cid, category, new_file) + select + ap.file_id as original_file, + i.cid, + 'previewimage' as category, + i.cid || '/previewimage/' || ap.file_id as new_file + from attachment_preview ap + inner join attachment ra on ra.attachid = ap.id + inner join item i on i.itemid = ra.itemid + order by id +; + +-- classifier +insert into file_migration_temp (original_file, cid, category, new_file) + select + file_id as original_file, + cid, + 'classifier' as category, + cid || '/classifier/' || file_id as new_file + from classifier_model +; + + +-- save obsolete/orphaned files +drop table if exists obsolete_files; +create table obsolete_files( + file_id varchar(254) not null, + mimetype varchar(254) not null, + length bigint not null, + checksum varchar(254) not null, + created timestamp not null +); + +with + missing_ids as ( + select file_id from filemeta + except + select original_file as file_id from file_migration_temp) +insert into obsolete_files (file_id, mimetype, length, checksum, created) + select file_id, mimetype, length, checksum, created from filemeta + where file_id in (select file_id from missing_ids) +; + + +-- duplicate each filemeta with the new id +insert into filemeta (file_id, mimetype, length, checksum, created) + select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created + from file_migration_temp mm + inner join filemeta fm on fm.file_id = mm.original_file +; + + +-- update each reference to the new id +update attachment_source + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_source.id is not null) +; + +update attachment + set filemetaid = (select new_file + from file_migration_temp + where original_file = filemetaid and attachment.attachid is not null) +; + +update attachment_archive + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_archive.id is not null) +; + +update attachment_preview + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and attachment_preview.id is not null) +; + +update classifier_model + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and classifier_model.id is not null) +; + +-- delete old filemeta and filechunk rows +delete from filemeta +where file_id in (select original_file from file_migration_temp); + +delete from filemeta +where file_id in (select file_id from obsolete_files); + +delete from filechunk +where file_id in (select file_id from obsolete_files); + +-- update chunks +update filechunk + set file_id = (select new_file + from file_migration_temp + where original_file = file_id and filechunk.file_id is not null) +; + +-- drop temp table +drop table file_migration_temp; +drop table obsolete_files;