From 553b1fa249eef304dd7d96b2fd303d67fb43d76f Mon Sep 17 00:00:00 2001
From: eikek <eike.kettner@posteo.de>
Date: Sun, 13 Feb 2022 12:08:01 +0100
Subject: [PATCH 1/2] Add a file-repository for better organizing files

Docspell now must use a new api for accessing files.

Issue: #1379
---
 .../docspell/backend/ops/OItemSearch.scala    | 36 +++++---
 .../scala/docspell/backend/ops/OMail.scala    |  2 +-
 .../scala/docspell/backend/ops/OUpload.scala  | 14 ++-
 .../scala/docspell/common/FileCategory.scala  | 46 ++++++++++
 .../main/scala/docspell/common/FileKey.scala  | 21 +++++
 .../main/scala/docspell/common/Ident.scala    |  5 +-
 .../docspell/common/ProcessItemArgs.scala     |  2 +-
 .../scala/docspell/joex/learn/Classify.scala  |  2 +-
 .../joex/learn/LearnClassifierTask.scala      |  2 +-
 .../joex/learn/StoreClassifierModel.scala     |  9 +-
 .../docspell/joex/pdfconv/PdfConvTask.scala   | 10 +-
 .../joex/process/AttachmentPageCount.scala    |  2 +-
 .../joex/process/AttachmentPreview.scala      | 11 ++-
 .../docspell/joex/process/ConvertPdf.scala    | 27 ++++--
 .../docspell/joex/process/CreateItem.scala    |  9 +-
 .../joex/process/DuplicateCheck.scala         |  2 +-
 .../joex/process/ExtractArchive.scala         | 26 +++---
 .../docspell/joex/process/ItemData.scala      |  2 +-
 .../docspell/joex/process/ItemHandler.scala   |  2 +-
 .../joex/process/TextExtraction.scala         | 10 +-
 .../restserver/http4s/BinaryUtil.scala        |  4 +-
 .../src/main/scala/docspell/store/Store.scala |  8 +-
 .../docspell/store/file/AttributeStore.scala  | 12 ++-
 .../docspell/store/file/BinnyUtils.scala      | 59 ++++++++++++
 .../docspell/store/file/FileMetadata.scala    | 19 ++++
 .../docspell/store/file/FileRepository.scala  | 50 ++++++++++
 .../store/file/FileRepositoryImpl.scala       | 60 ++++++++++++
 .../scala/docspell/store/file/FileStore.scala | 91 -------------------
 .../docspell/store/impl/DoobieMeta.scala      | 46 ++++++----
 .../scala/docspell/store/impl/StoreImpl.scala |  8 +-
 .../docspell/store/queries/QAttachment.scala  |  8 +-
 .../scala/docspell/store/queries/QItem.scala  | 12 +--
 .../docspell/store/records/RAttachment.scala  | 14 +--
 .../store/records/RAttachmentArchive.scala    |  8 +-
 .../store/records/RAttachmentPreview.scala    |  6 +-
 .../store/records/RAttachmentSource.scala     |  8 +-
 .../store/records/RClassifierModel.scala      |  8 +-
 .../docspell/store/records/RFileMeta.scala    | 14 +--
 .../scala/docspell/store/StoreFixture.scala   |  5 +-
 project/Dependencies.scala                    |  3 +-
 40 files changed, 451 insertions(+), 232 deletions(-)
 create mode 100644 modules/common/src/main/scala/docspell/common/FileCategory.scala
 create mode 100644 modules/common/src/main/scala/docspell/common/FileKey.scala
 create mode 100644 modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala
 create mode 100644 modules/store/src/main/scala/docspell/store/file/FileMetadata.scala
 create mode 100644 modules/store/src/main/scala/docspell/store/file/FileRepository.scala
 create mode 100644 modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala
 delete mode 100644 modules/store/src/main/scala/docspell/store/file/FileStore.scala

diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala
index 56e417e5..6bf03816 100644
--- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala
+++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala
@@ -14,6 +14,7 @@ import fs2.Stream
 import docspell.backend.ops.OItemSearch._
 import docspell.common._
 import docspell.store._
+import docspell.store.file.FileMetadata
 import docspell.store.queries.{QAttachment, QItem}
 import docspell.store.records._
 
@@ -89,18 +90,21 @@ object OItemSearch {
   trait BinaryData[F[_]] {
     def data: Stream[F, Byte]
     def name: Option[String]
-    def meta: RFileMeta
-    def fileId: Ident
+    def meta: FileMetadata
+    def fileId: FileKey
   }
-  case class AttachmentData[F[_]](ra: RAttachment, meta: RFileMeta, data: Stream[F, Byte])
-      extends BinaryData[F] {
+  case class AttachmentData[F[_]](
+      ra: RAttachment,
+      meta: FileMetadata,
+      data: Stream[F, Byte]
+  ) extends BinaryData[F] {
     val name = ra.name
     val fileId = ra.fileId
   }
 
   case class AttachmentSourceData[F[_]](
       rs: RAttachmentSource,
-      meta: RFileMeta,
+      meta: FileMetadata,
       data: Stream[F, Byte]
   ) extends BinaryData[F] {
     val name = rs.name
@@ -109,7 +113,7 @@ object OItemSearch {
 
   case class AttachmentPreviewData[F[_]](
       rs: RAttachmentPreview,
-      meta: RFileMeta,
+      meta: FileMetadata,
       data: Stream[F, Byte]
   ) extends BinaryData[F] {
     val name = rs.name
@@ -118,7 +122,7 @@ object OItemSearch {
 
   case class AttachmentArchiveData[F[_]](
       rs: RAttachmentArchive,
-      meta: RFileMeta,
+      meta: FileMetadata,
       data: Stream[F, Byte]
   ) extends BinaryData[F] {
     val name = rs.name
@@ -188,7 +192,7 @@ object OItemSearch {
                 AttachmentData[F](
                   ra,
                   m,
-                  store.fileStore.getBytes(m.id)
+                  store.fileRepo.getBytes(m.id)
                 )
               }
 
@@ -208,7 +212,7 @@ object OItemSearch {
                 AttachmentSourceData[F](
                   ra,
                   m,
-                  store.fileStore.getBytes(m.id)
+                  store.fileRepo.getBytes(m.id)
                 )
               }
 
@@ -228,7 +232,7 @@ object OItemSearch {
                 AttachmentPreviewData[F](
                   ra,
                   m,
-                  store.fileStore.getBytes(m.id)
+                  store.fileRepo.getBytes(m.id)
                 )
               }
 
@@ -248,7 +252,7 @@ object OItemSearch {
                 AttachmentPreviewData[F](
                   ra,
                   m,
-                  store.fileStore.getBytes(m.id)
+                  store.fileRepo.getBytes(m.id)
                 )
               }
 
@@ -268,7 +272,7 @@ object OItemSearch {
                 AttachmentArchiveData[F](
                   ra,
                   m,
-                  store.fileStore.getBytes(m.id)
+                  store.fileRepo.getBytes(m.id)
                 )
               }
 
@@ -276,9 +280,11 @@ object OItemSearch {
               (None: Option[AttachmentArchiveData[F]]).pure[F]
           }
 
-      private def makeBinaryData[A](fileId: Ident)(f: RFileMeta => A): F[Option[A]] =
-        store.fileStore
-          .findMeta(fileId)
+      private def makeBinaryData[A](fileId: FileKey)(f: FileMetadata => A): F[Option[A]] =
+        OptionT(
+          store.fileRepo
+            .findMeta(fileId)
+        )
           .map(fm => f(fm))
           .value
 
diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala
index 368477d0..ca3e0fc6 100644
--- a/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala
+++ b/modules/backend/src/main/scala/docspell/backend/ops/OMail.scala
@@ -249,7 +249,7 @@ object OMail {
           } yield {
             val addAttach = m.attach.filter(ras).map { a =>
               Attach[F](
-                store.fileStore.getBytes(a._2.id)
+                store.fileRepo.getBytes(a._2.id)
               ).withFilename(a._1.name)
                 .withLength(a._2.length.bytes)
                 .withMimeType(a._2.mimetype.toEmil)
diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
index ddc64bb9..ec959f6c 100644
--- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
+++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala
@@ -126,7 +126,7 @@ object OUpload {
       ): F[OUpload.UploadResult] =
         (for {
           _ <- checkExistingItem(itemId, account.collective)
-          files <- right(data.files.traverse(saveFile).map(_.flatten))
+          files <- right(data.files.traverse(saveFile(account)).map(_.flatten))
           _ <- checkFileList(files)
           lang <- data.meta.language match {
             case Some(lang) => right(lang.pure[F])
@@ -200,10 +200,18 @@ object OUpload {
         } yield UploadResult.Success
 
       /** Saves the file into the database. */
-      private def saveFile(file: File[F]): F[Option[ProcessItemArgs.File]] =
+      private def saveFile(
+          accountId: AccountId
+      )(file: File[F]): F[Option[ProcessItemArgs.File]] =
         logger.finfo(s"Receiving file $file") *>
           file.data
-            .through(store.fileStore.save(MimeTypeHint(file.name, None)))
+            .through(
+              store.fileRepo.save(
+                accountId.collective,
+                FileCategory.AttachmentSource,
+                MimeTypeHint(file.name, None)
+              )
+            )
             .compile
             .lastOrError
             .attempt
diff --git a/modules/common/src/main/scala/docspell/common/FileCategory.scala b/modules/common/src/main/scala/docspell/common/FileCategory.scala
new file mode 100644
index 00000000..52c29b26
--- /dev/null
+++ b/modules/common/src/main/scala/docspell/common/FileCategory.scala
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.common
+
+import cats.data.NonEmptyList
+
+import docspell.common
+
+import io.circe.{Decoder, Encoder}
+
+/** This is used to have a rough idea for what a file is used in the system. It is part of
+  * the file-key to identify a file, backends could ignore it, since the file-id (the last
+  * part of the file-key) should be globally unique anyways.
+  */
+sealed trait FileCategory { self: Product =>
+  final def id: Ident =
+    Ident.unsafe(self.productPrefix.toLowerCase)
+
+  def toFileKey(collective: Ident, fileId: Ident): FileKey =
+    common.FileKey(collective, this, fileId)
+}
+
+object FileCategory {
+  // Impl note: Changing constants here requires a database migration!
+
+  case object AttachmentSource extends FileCategory
+  case object AttachmentConvert extends FileCategory
+  case object PreviewImage extends FileCategory
+  case object Classifier extends FileCategory
+
+  val all: NonEmptyList[FileCategory] =
+    NonEmptyList.of(AttachmentSource, AttachmentConvert, PreviewImage, Classifier)
+
+  def fromString(str: String): Either[String, FileCategory] =
+    all.find(_.id.id == str).toRight(s"Unknown category: $str")
+
+  implicit val jsonDecoder: Decoder[FileCategory] =
+    Decoder[String].emap(fromString)
+
+  implicit val jsonEncoder: Encoder[FileCategory] =
+    Encoder[String].contramap(_.id.id)
+}
diff --git a/modules/common/src/main/scala/docspell/common/FileKey.scala b/modules/common/src/main/scala/docspell/common/FileKey.scala
new file mode 100644
index 00000000..17dc009e
--- /dev/null
+++ b/modules/common/src/main/scala/docspell/common/FileKey.scala
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.common
+
+import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
+import io.circe.{Decoder, Encoder}
+
+case class FileKey(collective: Ident, category: FileCategory, id: Ident)
+
+object FileKey {
+
+  implicit val jsonDecoder: Decoder[FileKey] =
+    deriveDecoder[FileKey]
+
+  implicit val jsonEncoder: Encoder[FileKey] =
+    deriveEncoder[FileKey]
+}
diff --git a/modules/common/src/main/scala/docspell/common/Ident.scala b/modules/common/src/main/scala/docspell/common/Ident.scala
index 5251b16c..2f630e33 100644
--- a/modules/common/src/main/scala/docspell/common/Ident.scala
+++ b/modules/common/src/main/scala/docspell/common/Ident.scala
@@ -25,13 +25,15 @@ case class Ident(id: String) {
     !isEmpty
 
   def /(next: Ident): Ident =
-    new Ident(id + "." + next.id)
+    new Ident(id + Ident.concatChar + next.id)
 }
 
 object Ident {
+  private val concatChar = '.'
   implicit val identEq: Eq[Ident] =
     Eq.by(_.id)
 
+  // Note, the slash *must not* be part of valid characters
   val chars: Set[Char] = (('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "-_.@").toSet
 
   def randomUUID[F[_]: Sync]: F[Ident] =
@@ -75,5 +77,4 @@ object Ident {
 
   implicit val order: Order[Ident] =
     Order.by(_.id)
-
 }
diff --git a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala
index dab18e3f..046b2255 100644
--- a/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala
+++ b/modules/common/src/main/scala/docspell/common/ProcessItemArgs.scala
@@ -60,7 +60,7 @@ object ProcessItemArgs {
     implicit val jsonDecoder: Decoder[ProcessMeta] = deriveDecoder[ProcessMeta]
   }
 
-  case class File(name: Option[String], fileMetaId: Ident)
+  case class File(name: Option[String], fileMetaId: FileKey)
   object File {
     implicit val jsonEncoder: Encoder[File] = deriveEncoder[File]
     implicit val jsonDecoder: Decoder[File] = deriveDecoder[File]
diff --git a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala
index db33b251..e208c79d 100644
--- a/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala
+++ b/modules/joex/src/main/scala/docspell/joex/learn/Classify.scala
@@ -31,7 +31,7 @@ object Classify {
       _ <- OptionT.liftF(logger.info(s"Guessing label for ${cname.name} …"))
       model <- OptionT(store.transact(RClassifierModel.findByName(coll, cname.name)))
         .flatTapNone(logger.debug("No classifier model found."))
-      modelData = store.fileStore.getBytes(model.fileId)
+      modelData = store.fileRepo.getBytes(model.fileId)
       cls <- OptionT(File.withTempDir(workingDir, "classify").use { dir =>
         val modelFile = dir.resolve("model.ser.gz")
         modelData
diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala
index 317a155a..92fbf401 100644
--- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala
+++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala
@@ -91,7 +91,7 @@ object LearnClassifierTask {
       n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id)))
       _ <- list
         .map(_.fileId)
-        .traverse(id => ctx.store.fileStore.delete(id))
+        .traverse(id => ctx.store.fileRepo.delete(id))
       _ <- ctx.logger.debug(s"Deleted $n model files.")
     } yield ()
 
diff --git a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala
index ac04f60a..af614e8b 100644
--- a/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala
+++ b/modules/joex/src/main/scala/docspell/joex/learn/StoreClassifierModel.scala
@@ -42,7 +42,12 @@ object StoreClassifierModel {
       _ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
       fileData = Files[F].readAll(trainedModel.model)
       newFileId <-
-        fileData.through(store.fileStore.save(MimeTypeHint.none)).compile.lastOrError
+        fileData
+          .through(
+            store.fileRepo.save(collective, FileCategory.Classifier, MimeTypeHint.none)
+          )
+          .compile
+          .lastOrError
       _ <- store.transact(
         RClassifierModel.updateFile(collective, modelName.name, newFileId)
       )
@@ -50,7 +55,7 @@ object StoreClassifierModel {
       _ <- oldFile match {
         case Some(fid) =>
           logger.debug(s"Deleting old model file ${fid.id}") *>
-            store.fileStore.delete(fid)
+            store.fileRepo.delete(fid)
         case None => ().pure[F]
       }
     } yield ()
diff --git a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala
index e3c43c7e..ca82615a 100644
--- a/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala
+++ b/modules/joex/src/main/scala/docspell/joex/pdfconv/PdfConvTask.scala
@@ -92,7 +92,7 @@ object PdfConvTask {
       ctx: Context[F, Args],
       in: RFileMeta
   ): F[Unit] = {
-    val fs = ctx.store.fileStore
+    val fs = ctx.store.fileRepo
     val data = fs.getBytes(in.id)
 
     val storeResult: ConversionResult.Handler[F, Unit] =
@@ -141,11 +141,15 @@ object PdfConvTask {
       newFile: Stream[F, Byte]
   ): F[Unit] = {
     val mimeHint = MimeTypeHint.advertised(meta.mimetype)
+    val collective = meta.id.collective
+    val cat = FileCategory.AttachmentConvert
     for {
       fid <-
-        newFile.through(ctx.store.fileStore.save(mimeHint)).compile.lastOrError
+        newFile
+          .through(ctx.store.fileRepo.save(collective, cat, mimeHint))
+          .compile
+          .lastOrError
       _ <- ctx.store.transact(RAttachment.updateFileId(ctx.args.attachId, fid))
     } yield ()
   }
-
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
index d8c08aeb..cce9400d 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPageCount.scala
@@ -100,5 +100,5 @@ object AttachmentPageCount {
       .getOrElse(MimeType.octetStream)
 
   def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
-    ctx.store.fileStore.getBytes(ra.fileId)
+    ctx.store.fileRepo.getBytes(ra.fileId)
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala
index 60bb5da4..2a55775d 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala
@@ -59,7 +59,7 @@ object AttachmentPreview {
         preview.previewPNG(loadFile(ctx)(ra)).flatMap {
           case Some(out) =>
             ctx.logger.debug("Preview generated, saving to database…") *>
-              createRecord(ctx, out, ra).map(_.some)
+              createRecord(ctx, ra.fileId.collective, out, ra).map(_.some)
           case None =>
             ctx.logger
               .info(s"Preview could not be generated. Maybe the pdf has no pages?") *>
@@ -73,6 +73,7 @@ object AttachmentPreview {
 
   private def createRecord[F[_]: Sync](
       ctx: Context[F, _],
+      collective: Ident,
       png: Stream[F, Byte],
       ra: RAttachment
   ): F[RAttachmentPreview] = {
@@ -82,7 +83,11 @@ object AttachmentPreview {
     for {
       fileId <- png
         .through(
-          ctx.store.fileStore.save(MimeTypeHint(name.map(_.fullName), Some("image/png")))
+          ctx.store.fileRepo.save(
+            collective,
+            FileCategory.PreviewImage,
+            MimeTypeHint(name.map(_.fullName), Some("image/png"))
+          )
         )
         .compile
         .lastOrError
@@ -99,5 +104,5 @@ object AttachmentPreview {
       .getOrElse(MimeType.octetStream)
 
   def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] =
-    ctx.store.fileStore.getBytes(ra.fileId)
+    ctx.store.fileRepo.getBytes(ra.fileId)
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
index 0108ef98..73754127 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala
@@ -32,11 +32,12 @@ import docspell.store.records._
   * This step assumes an existing premature item, it traverses its attachments.
   */
 object ConvertPdf {
+  type Args = ProcessItemArgs
 
   def apply[F[_]: Async](
       cfg: ConvertConfig,
       item: ItemData
-  ): Task[F, ProcessItemArgs, ItemData] =
+  ): Task[F, Args, ItemData] =
     Task { ctx =>
       def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
         isConverted(ctx)(ra).flatMap {
@@ -61,7 +62,7 @@ object ConvertPdf {
 
     }
 
-  def isConverted[F[_]](ctx: Context[F, ProcessItemArgs])(
+  def isConverted[F[_]](ctx: Context[F, Args])(
       ra: RAttachment
   ): F[Boolean] =
     ctx.store.transact(RAttachmentSource.isConverted(ra.id))
@@ -74,14 +75,14 @@ object ConvertPdf {
   def convertSafe[F[_]: Async](
       cfg: ConvertConfig,
       sanitizeHtml: SanitizeHtml,
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       item: ItemData
   )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
     loadCollectivePasswords(ctx).flatMap(collPass =>
       Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
         mime match {
           case mt =>
-            val data = ctx.store.fileStore.getBytes(ra.fileId)
+            val data = ctx.store.fileRepo.getBytes(ra.fileId)
             val handler = conversionHandler[F](ctx, cfg, ra, item)
             ctx.logger
               .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
@@ -93,14 +94,14 @@ object ConvertPdf {
     )
 
   private def loadCollectivePasswords[F[_]: Async](
-      ctx: Context[F, ProcessItemArgs]
+      ctx: Context[F, Args]
   ): F[List[Password]] =
     ctx.store
       .transact(RCollectivePassword.findAll(ctx.args.meta.collective))
       .map(_.map(_.password).distinct)
 
   private def conversionHandler[F[_]: Sync](
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       cfg: ConvertConfig,
       ra: RAttachment,
       item: ItemData
@@ -146,7 +147,7 @@ object ConvertPdf {
     }
 
   private def storePDF[F[_]: Sync](
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       cfg: ConvertConfig,
       ra: RAttachment,
       pdf: Stream[F, Byte]
@@ -160,7 +161,13 @@ object ConvertPdf {
         .map(_.fullName)
 
     pdf
-      .through(ctx.store.fileStore.save(MimeTypeHint(hint.filename, hint.advertised)))
+      .through(
+        ctx.store.fileRepo.save(
+          ctx.args.meta.collective,
+          FileCategory.AttachmentConvert,
+          MimeTypeHint(hint.filename, hint.advertised)
+        )
+      )
       .compile
       .lastOrError
       .flatMap(fmId => updateAttachment[F](ctx, ra, fmId, newName).map(_ => fmId))
@@ -170,7 +177,7 @@ object ConvertPdf {
   private def updateAttachment[F[_]: Sync](
       ctx: Context[F, _],
       ra: RAttachment,
-      fmId: Ident,
+      fmId: FileKey,
       newName: Option[String]
   ): F[Unit] =
     for {
@@ -188,7 +195,7 @@ object ConvertPdf {
               if (sameFile) ().pure[F]
               else
                 ctx.logger.info("Deleting previous attachment file") *>
-                  ctx.store.fileStore
+                  ctx.store.fileRepo
                     .delete(raPrev.fileId)
                     .attempt
                     .flatMap {
diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
index 031815c2..092c8495 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala
@@ -14,6 +14,7 @@ import fs2.Stream
 
 import docspell.common._
 import docspell.joex.scheduler.{Context, Task}
+import docspell.store.file.FileMetadata
 import docspell.store.queries.QItem
 import docspell.store.records._
 
@@ -28,7 +29,7 @@ object CreateItem {
 
   def createNew[F[_]: Sync]: Task[F, ProcessItemArgs, ItemData] =
     Task { ctx =>
-      def isValidFile(fm: RFileMeta) =
+      def isValidFile(fm: FileMetadata) =
         ctx.args.meta.validFileTypes.isEmpty ||
           ctx.args.meta.validFileTypes.toSet
             .contains(fm.mimetype)
@@ -39,9 +40,7 @@ object CreateItem {
           .flatMap { offset =>
             Stream
               .emits(ctx.args.files)
-              .evalMap(f =>
-                ctx.store.fileStore.findMeta(f.fileMetaId).value.map(fm => (f, fm))
-              )
+              .evalMap(f => ctx.store.fileRepo.findMeta(f.fileMetaId).map(fm => (f, fm)))
               .collect { case (f, Some(fm)) if isValidFile(fm) => f }
               .zipWithIndex
               .evalMap { case (f, index) =>
@@ -198,6 +197,6 @@ object CreateItem {
   // TODO if no source is present, it must be saved!
   private def originFileTuple(
       t: (RAttachment, Option[RAttachmentSource])
-  ): (Ident, Ident) =
+  ): (Ident, FileKey) =
     t._2.map(s => s.id -> s.fileId).getOrElse(t._1.id -> t._1.fileId)
 }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala
index 31b2dfac..41fcd0e3 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala
@@ -51,7 +51,7 @@ object DuplicateCheck {
     val fname = ctx.args.files.find(_.fileMetaId == fd.fm.id).flatMap(_.name)
     if (fd.exists)
       ctx.logger
-        .info(s"Deleting duplicate file $fname!") *> ctx.store.fileStore
+        .info(s"Deleting duplicate file $fname!") *> ctx.store.fileRepo
         .delete(fd.fm.id)
     else ().pure[F]
   }
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
index 81e2df4c..ef98b43d 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala
@@ -32,16 +32,17 @@ import emil.Mail
   * This step assumes an existing premature item, it traverses its attachments.
   */
 object ExtractArchive {
+  type Args = ProcessItemArgs
 
   def apply[F[_]: Async](
       item: ItemData
-  ): Task[F, ProcessItemArgs, ItemData] =
+  ): Task[F, Args, ItemData] =
     multiPass(item, None).map(_._2)
 
   def multiPass[F[_]: Async](
       item: ItemData,
       archive: Option[RAttachmentArchive]
-  ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
+  ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
     singlePass(item, archive).flatMap { t =>
       if (t._1.isEmpty) Task.pure(t)
       else multiPass(t._2, t._1)
@@ -50,7 +51,7 @@ object ExtractArchive {
   def singlePass[F[_]: Async](
       item: ItemData,
       archive: Option[RAttachmentArchive]
-  ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
+  ): Task[F, Args, (Option[RAttachmentArchive], ItemData)] =
     Task { ctx =>
       def extract(ra: RAttachment, pos: Int): F[Extracted] =
         findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, pos, m))
@@ -88,7 +89,7 @@ object ExtractArchive {
       .getOrElse(MimeType.octetStream)
 
   def extractSafe[F[_]: Async](
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       archive: Option[RAttachmentArchive]
   )(ra: RAttachment, pos: Int, mime: MimeType): F[Extracted] =
     mime match {
@@ -120,7 +121,7 @@ object ExtractArchive {
           )
           _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
           _ <- ctx.store.transact(RAttachment.delete(ra.id))
-          _ <- ctx.store.fileStore.delete(ra.fileId)
+          _ <- ctx.store.fileRepo.delete(ra.fileId)
         } yield extracted
       case None =>
         for {
@@ -132,10 +133,10 @@ object ExtractArchive {
     }
 
   def extractZip[F[_]: Async](
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       archive: Option[RAttachmentArchive]
   )(ra: RAttachment, pos: Int): F[Extracted] = {
-    val zipData = ctx.store.fileStore.getBytes(ra.fileId)
+    val zipData = ctx.store.fileRepo.getBytes(ra.fileId)
     val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
     ctx.logger.debug(s"Filtering zip entries with '${glob.asString}'") *>
       zipData
@@ -148,10 +149,10 @@ object ExtractArchive {
   }
 
   def extractMail[F[_]: Async](
-      ctx: Context[F, ProcessItemArgs],
+      ctx: Context[F, Args],
       archive: Option[RAttachmentArchive]
   )(ra: RAttachment, pos: Int): F[Extracted] = {
-    val email: Stream[F, Byte] = ctx.store.fileStore.getBytes(ra.fileId)
+    val email: Stream[F, Byte] = ctx.store.fileRepo.getBytes(ra.fileId)
 
     val glob = ctx.args.meta.fileFilter.getOrElse(Glob.all)
     val attachOnly = ctx.args.meta.attachmentsOnly.getOrElse(false)
@@ -183,7 +184,7 @@ object ExtractArchive {
       .getOrElse(Extracted.empty)
 
   def handleEntry[F[_]: Sync](
-      ctx: Context[F, _],
+      ctx: Context[F, Args],
       ra: RAttachment,
       pos: Int,
       archive: Option[RAttachmentArchive],
@@ -193,7 +194,10 @@ object ExtractArchive {
   ): Stream[F, Extracted] = {
     val (entry, subPos) = tentry
     val mimeHint = MimeTypeHint.filename(entry.name).withAdvertised(entry.mime.asString)
-    val fileId = entry.data.through(ctx.store.fileStore.save(mimeHint))
+    val fileId = entry.data.through(
+      ctx.store.fileRepo
+        .save(ctx.args.meta.collective, FileCategory.AttachmentSource, mimeHint)
+    )
 
     Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >>
       fileId.evalMap { fid =>
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
index 7e8bd697..4d1c03b5 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala
@@ -38,7 +38,7 @@ case class ItemData(
     attachments: Vector[RAttachment],
     metas: Vector[RAttachmentMeta],
     dateLabels: Vector[AttachmentDates],
-    originFile: Map[Ident, Ident], // maps RAttachment.id -> FileMeta.id
+    originFile: Map[Ident, FileKey], // maps RAttachment.id -> FileMeta.id
     givenMeta: MetaProposalList, // given meta data not associated to a specific attachment
     // a list of tags (names or ids) attached to the item if they exist
     tags: List[String],
diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala
index 0afd3784..fa1774e0 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala
@@ -133,7 +133,7 @@ object ItemHandler {
       ctx.logger.info("Deleting input files …") *>
         Stream
           .emits(ctx.args.files.map(_.fileMetaId))
-          .evalMap(id => ctx.store.fileStore.delete(id).attempt)
+          .evalMap(id => ctx.store.fileRepo.delete(id).attempt)
           .compile
           .drain
     )
diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
index 2836638a..3f6be877 100644
--- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
+++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala
@@ -126,11 +126,11 @@ object TextExtraction {
       ctx: Context[F, _],
       extr: Extraction[F],
       lang: Language
-  )(fileId: Ident): F[ExtractResult] = {
-    val data = ctx.store.fileStore.getBytes(fileId)
+  )(fileId: FileKey): F[ExtractResult] = {
+    val data = ctx.store.fileRepo.getBytes(fileId)
 
     def findMime: F[MimeType] =
-      OptionT(ctx.store.transact(RFileMeta.findById(fileId)))
+      OptionT(ctx.store.fileRepo.findMeta(fileId))
         .map(_.mimetype)
         .getOrElse(MimeType.octetStream)
 
@@ -143,7 +143,7 @@ object TextExtraction {
       cfg: ExtractConfig,
       ra: RAttachment,
       lang: Language
-  )(fileIds: List[Ident]): F[Option[ExtractResult.Success]] =
+  )(fileIds: List[FileKey]): F[Option[ExtractResult.Success]] =
     fileIds match {
       case Nil =>
         ctx.logger.error(s"Cannot extract text").map(_ => None)
@@ -179,7 +179,7 @@ object TextExtraction {
   private def filesToExtract[F[_]: Sync](ctx: Context[F, _])(
       item: ItemData,
       ra: RAttachment
-  ): F[List[Ident]] =
+  ): F[List[FileKey]] =
     item.originFile.get(ra.id) match {
       case Some(sid) =>
         ctx.store.transact(RFileMeta.findMime(sid)).map {
diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala
index 7ebdb9b3..d3412089 100644
--- a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala
+++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala
@@ -15,7 +15,7 @@ import docspell.backend.ops.OItemSearch.{AttachmentData, AttachmentPreviewData}
 import docspell.backend.ops._
 import docspell.restapi.model.BasicResult
 import docspell.restserver.http4s.{QueryParam => QP}
-import docspell.store.records.RFileMeta
+import docspell.store.file.FileMetadata
 
 import org.http4s._
 import org.http4s.circe.CirceEntityEncoder._
@@ -117,7 +117,7 @@ object BinaryUtil {
   }
 
   def matchETag[F[_]](
-      fileData: Option[RFileMeta],
+      fileData: Option[FileMetadata],
       noneMatch: Option[NonEmptyList[EntityTag]]
   ): Boolean =
     (fileData, noneMatch) match {
diff --git a/modules/store/src/main/scala/docspell/store/Store.scala b/modules/store/src/main/scala/docspell/store/Store.scala
index 338b177a..3e54a7a2 100644
--- a/modules/store/src/main/scala/docspell/store/Store.scala
+++ b/modules/store/src/main/scala/docspell/store/Store.scala
@@ -12,7 +12,7 @@ import cats.effect._
 import cats.~>
 import fs2._
 
-import docspell.store.file.FileStore
+import docspell.store.file.FileRepository
 import docspell.store.impl.StoreImpl
 
 import com.zaxxer.hikari.HikariDataSource
@@ -26,7 +26,7 @@ trait Store[F[_]] {
 
   def transact[A](prg: Stream[ConnectionIO, A]): Stream[F, A]
 
-  def fileStore: FileStore[F]
+  def fileRepo: FileRepository[F]
 
   def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
 }
@@ -50,8 +50,8 @@ object Store {
         ds.setDriverClassName(jdbc.driverClass)
       }
       xa = HikariTransactor(ds, connectEC)
-      fs = FileStore[F](xa, ds, chunkSize)
-      st = new StoreImpl[F](fs, jdbc, xa)
+      fr = FileRepository.genericJDBC(xa, ds, chunkSize)
+      st = new StoreImpl[F](fr, jdbc, xa)
       _ <- Resource.eval(st.migrate)
     } yield st
   }
diff --git a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala
index ab9c7a8c..4814a9dc 100644
--- a/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala
+++ b/modules/store/src/main/scala/docspell/store/file/AttributeStore.scala
@@ -23,8 +23,9 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F])
     for {
       now <- Timestamp.current[F]
       a <- attrs
+      fileKey <- makeFileKey(id)
       fm = RFileMeta(
-        Ident.unsafe(id.id),
+        fileKey,
         now,
         MimeType.parse(a.contentType.contentType).getOrElse(MimeType.octetStream),
         ByteSize(a.length),
@@ -34,7 +35,7 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F])
     } yield ()
 
   def deleteAttr(id: BinaryId): F[Boolean] =
-    RFileMeta.delete(Ident.unsafe(id.id)).transact(xa).map(_ > 0)
+    makeFileKey(id).flatMap(fileKey => RFileMeta.delete(fileKey).transact(xa).map(_ > 0))
 
   def findAttr(id: BinaryId): OptionT[F, BinaryAttributes] =
     findMeta(id).map(fm =>
@@ -46,5 +47,10 @@ final private[file] class AttributeStore[F[_]: Sync](xa: Transactor[F])
     )
 
   def findMeta(id: BinaryId): OptionT[F, RFileMeta] =
-    OptionT(RFileMeta.findById(Ident.unsafe(id.id)).transact(xa))
+    OptionT(makeFileKey(id).flatMap(fileKey => RFileMeta.findById(fileKey).transact(xa)))
+
+  private def makeFileKey(binaryId: BinaryId): F[FileKey] =
+    Sync[F]
+      .pure(BinnyUtils.binaryIdToFileKey(binaryId).left.map(new IllegalStateException(_)))
+      .rethrow
 }
diff --git a/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala
new file mode 100644
index 00000000..71d426d5
--- /dev/null
+++ b/modules/store/src/main/scala/docspell/store/file/BinnyUtils.scala
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.store.file
+
+import docspell.common
+import docspell.common._
+import docspell.files.TikaMimetype
+
+import binny._
+import scodec.bits.ByteVector
+
+private[store] object BinnyUtils {
+
+  def fileKeyToBinaryId(fk: FileKey): BinaryId =
+    BinaryId(s"${fk.collective.id}/${fk.category.id.id}/${fk.id.id}")
+
+  def binaryIdToFileKey(bid: BinaryId): Either[String, FileKey] =
+    bid.id.split('/').toList match {
+      case cId :: catId :: fId :: Nil =>
+        for {
+          coll <- Ident.fromString(cId)
+          cat <- FileCategory.fromString(catId)
+          file <- Ident.fromString(fId)
+        } yield common.FileKey(coll, cat, file)
+      case _ =>
+        Left(s"Invalid format for file-key: $bid")
+    }
+
+  def unsafeBinaryIdToFileKey(bid: BinaryId): FileKey =
+    binaryIdToFileKey(bid).fold(
+      err => throw new IllegalStateException(err),
+      identity
+    )
+
+  object LoggerAdapter {
+    def apply[F[_]](log: Logger[F]): binny.util.Logger[F] =
+      new binny.util.Logger[F] {
+        override def trace(msg: => String): F[Unit] = log.trace(msg)
+        override def debug(msg: => String): F[Unit] = log.debug(msg)
+        override def info(msg: => String): F[Unit] = log.info(msg)
+        override def warn(msg: => String): F[Unit] = log.warn(msg)
+        override def error(msg: => String): F[Unit] = log.error(msg)
+        override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg)
+      }
+  }
+
+  object TikaContentTypeDetect extends ContentTypeDetect {
+    override def detect(data: ByteVector, hint: Hint): SimpleContentType =
+      SimpleContentType(
+        TikaMimetype
+          .detect(data, MimeTypeHint(hint.filename, hint.advertisedType))
+          .asString
+      )
+  }
+}
diff --git a/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala b/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala
new file mode 100644
index 00000000..b96227c0
--- /dev/null
+++ b/modules/store/src/main/scala/docspell/store/file/FileMetadata.scala
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.store.file
+
+import docspell.common._
+
+import scodec.bits.ByteVector
+
+final case class FileMetadata(
+    id: FileKey,
+    created: Timestamp,
+    mimetype: MimeType,
+    length: ByteSize,
+    checksum: ByteVector
+)
diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepository.scala b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala
new file mode 100644
index 00000000..7eb73f12
--- /dev/null
+++ b/modules/store/src/main/scala/docspell/store/file/FileRepository.scala
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.store.file
+
+import javax.sql.DataSource
+
+import cats.effect._
+import fs2._
+
+import docspell.common._
+
+import binny.BinaryId
+import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
+import doobie.Transactor
+
+trait FileRepository[F[_]] {
+  def getBytes(key: FileKey): Stream[F, Byte]
+
+  def findMeta(key: FileKey): F[Option[FileMetadata]]
+
+  def delete(key: FileKey): F[Unit]
+
+  def save(
+      collective: Ident,
+      category: FileCategory,
+      hint: MimeTypeHint
+  ): Pipe[F, Byte, FileKey]
+}
+
+object FileRepository {
+  private[this] val logger = org.log4s.getLogger
+
+  def genericJDBC[F[_]: Sync](
+      xa: Transactor[F],
+      ds: DataSource,
+      chunkSize: Int
+  ): FileRepository[F] = {
+    val attrStore = new AttributeStore[F](xa)
+    val cfg = JdbcStoreConfig("filechunk", chunkSize, BinnyUtils.TikaContentTypeDetect)
+    val log = Logger.log4s[F](logger)
+    val binStore = GenericJdbcStore[F](ds, BinnyUtils.LoggerAdapter(log), cfg, attrStore)
+    val keyFun: FileKey => BinaryId = BinnyUtils.fileKeyToBinaryId
+
+    new FileRepositoryImpl[F](binStore, attrStore, keyFun)
+  }
+}
diff --git a/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala
new file mode 100644
index 00000000..7f820244
--- /dev/null
+++ b/modules/store/src/main/scala/docspell/store/file/FileRepositoryImpl.scala
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2020 Eike K. & Contributors
+ *
+ * SPDX-License-Identifier: AGPL-3.0-or-later
+ */
+
+package docspell.store.file
+
+import cats.data.OptionT
+import cats.effect.Sync
+import cats.implicits._
+import fs2.{Pipe, Stream}
+
+import docspell.common._
+
+import binny._
+
+final class FileRepositoryImpl[F[_]: Sync](
+    bs: BinaryStore[F],
+    attrStore: AttributeStore[F],
+    keyFun: FileKey => BinaryId
+) extends FileRepository[F] {
+
+  def find(key: FileKey): OptionT[F, Stream[F, Byte]] =
+    bs.findBinary(keyFun(key), ByteRange.All)
+
+  def getBytes(key: FileKey): Stream[F, Byte] =
+    Stream.eval(find(key).value).unNoneTerminate.flatMap(identity)
+
+  def findMeta(key: FileKey): F[Option[FileMetadata]] =
+    attrStore
+      .findMeta(keyFun(key))
+      .map(rfm =>
+        FileMetadata(rfm.id, rfm.created, rfm.mimetype, rfm.length, rfm.checksum)
+      )
+      .value
+
+  def delete(key: FileKey): F[Unit] =
+    bs.delete(keyFun(key))
+
+  def save(
+      collective: Ident,
+      category: FileCategory,
+      hint: MimeTypeHint
+  ): Pipe[F, Byte, FileKey] = {
+    val fhint = Hint(hint.filename, hint.advertised)
+    in =>
+      Stream
+        .eval(randomKey(collective, category))
+        .flatMap(fkey =>
+          in.through(bs.insertWith(keyFun(fkey), fhint)) ++ Stream.emit(fkey)
+        )
+  }
+
+  def randomKey(
+      collective: Ident,
+      category: FileCategory
+  ): F[FileKey] =
+    BinaryId.random[F].map(bid => FileKey(collective, category, Ident.unsafe(bid.id)))
+}
diff --git a/modules/store/src/main/scala/docspell/store/file/FileStore.scala b/modules/store/src/main/scala/docspell/store/file/FileStore.scala
deleted file mode 100644
index 3afd4216..00000000
--- a/modules/store/src/main/scala/docspell/store/file/FileStore.scala
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2020 Eike K. & Contributors
- *
- * SPDX-License-Identifier: AGPL-3.0-or-later
- */
-
-package docspell.store.file
-
-import javax.sql.DataSource
-
-import cats.data.OptionT
-import cats.effect._
-import fs2.{Pipe, Stream}
-
-import docspell.common._
-import docspell.files.TikaMimetype
-import docspell.store.records.RFileMeta
-
-import binny._
-import binny.jdbc.{GenericJdbcStore, JdbcStoreConfig}
-import doobie._
-import scodec.bits.ByteVector
-
-trait FileStore[F[_]] {
-
-  def find(id: Ident): OptionT[F, Stream[F, Byte]]
-
-  def getBytes(id: Ident): Stream[F, Byte]
-
-  def findMeta(id: Ident): OptionT[F, RFileMeta]
-
-  def delete(id: Ident): F[Unit]
-
-  def save(hint: MimeTypeHint): Pipe[F, Byte, Ident]
-}
-
-object FileStore {
-  private[this] val logger = org.log4s.getLogger
-
-  def apply[F[_]: Sync](
-      xa: Transactor[F],
-      ds: DataSource,
-      chunkSize: Int
-  ): FileStore[F] = {
-    val attrStore = new AttributeStore[F](xa)
-    val cfg = JdbcStoreConfig("filechunk", chunkSize, TikaContentTypeDetect)
-    val log = Logger.log4s[F](logger)
-    val binStore = GenericJdbcStore[F](ds, LoggerAdapter(log), cfg, attrStore)
-    new Impl[F](binStore, attrStore)
-  }
-
-  final private class Impl[F[_]](bs: BinaryStore[F], attrStore: AttributeStore[F])
-      extends FileStore[F] {
-    def find(id: Ident): OptionT[F, Stream[F, Byte]] =
-      bs.findBinary(BinaryId(id.id), ByteRange.All)
-
-    def getBytes(id: Ident): Stream[F, Byte] =
-      Stream.eval(find(id).value).unNoneTerminate.flatMap(identity)
-
-    def findMeta(id: Ident): OptionT[F, RFileMeta] =
-      attrStore.findMeta(BinaryId(id.id))
-
-    def delete(id: Ident): F[Unit] =
-      bs.delete(BinaryId(id.id))
-
-    def save(hint: MimeTypeHint): Pipe[F, Byte, Ident] =
-      bs.insert(Hint(hint.filename, hint.advertised))
-        .andThen(_.map(bid => Ident.unsafe(bid.id)))
-  }
-
-  private object LoggerAdapter {
-    def apply[F[_]](log: Logger[F]): binny.util.Logger[F] =
-      new binny.util.Logger[F] {
-        override def trace(msg: => String): F[Unit] = log.trace(msg)
-        override def debug(msg: => String): F[Unit] = log.debug(msg)
-        override def info(msg: => String): F[Unit] = log.info(msg)
-        override def warn(msg: => String): F[Unit] = log.warn(msg)
-        override def error(msg: => String): F[Unit] = log.error(msg)
-        override def error(ex: Throwable)(msg: => String): F[Unit] = log.error(ex)(msg)
-      }
-  }
-
-  private object TikaContentTypeDetect extends ContentTypeDetect {
-    override def detect(data: ByteVector, hint: Hint): SimpleContentType =
-      SimpleContentType(
-        TikaMimetype
-          .detect(data, MimeTypeHint(hint.filename, hint.advertisedType))
-          .asString
-      )
-  }
-}
diff --git a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
index dee20f76..738a078b 100644
--- a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala
@@ -14,8 +14,10 @@ import docspell.common.syntax.all._
 import docspell.jsonminiq.JsonMiniQuery
 import docspell.notification.api.{ChannelType, EventType}
 import docspell.query.{ItemQuery, ItemQueryParser}
+import docspell.store.file.BinnyUtils
 import docspell.totp.Key
 
+import binny.BinaryId
 import com.github.eikek.calev.CalEvent
 import doobie._
 import doobie.implicits.legacy.instant._
@@ -27,7 +29,7 @@ import scodec.bits.ByteVector
 
 trait DoobieMeta extends EmilDoobieMeta {
 
-  implicit val sqlLogging = LogHandler {
+  implicit val sqlLogging: LogHandler = LogHandler {
     case e @ Success(_, _, _, _) =>
       DoobieMeta.logger.trace("SQL " + e)
     case e =>
@@ -39,58 +41,64 @@ trait DoobieMeta extends EmilDoobieMeta {
       e.apply(a).noSpaces
     )
 
+  implicit val metaBinaryId: Meta[BinaryId] =
+    Meta[String].timap(BinaryId.apply)(_.id)
+
+  implicit val metaFileKey: Meta[FileKey] =
+    Meta[BinaryId].timap(BinnyUtils.unsafeBinaryIdToFileKey)(BinnyUtils.fileKeyToBinaryId)
+
   implicit val metaAccountSource: Meta[AccountSource] =
-    Meta[String].imap(AccountSource.unsafeFromString)(_.name)
+    Meta[String].timap(AccountSource.unsafeFromString)(_.name)
 
   implicit val metaDuration: Meta[Duration] =
-    Meta[Long].imap(Duration.millis)(_.millis)
+    Meta[Long].timap(Duration.millis)(_.millis)
 
   implicit val metaCollectiveState: Meta[CollectiveState] =
-    Meta[String].imap(CollectiveState.unsafe)(CollectiveState.asString)
+    Meta[String].timap(CollectiveState.unsafe)(CollectiveState.asString)
 
   implicit val metaUserState: Meta[UserState] =
-    Meta[String].imap(UserState.unsafe)(UserState.asString)
+    Meta[String].timap(UserState.unsafe)(UserState.asString)
 
   implicit val metaPassword: Meta[Password] =
-    Meta[String].imap(Password(_))(_.pass)
+    Meta[String].timap(Password(_))(_.pass)
 
   implicit val metaIdent: Meta[Ident] =
-    Meta[String].imap(Ident.unsafe)(_.id)
+    Meta[String].timap(Ident.unsafe)(_.id)
 
   implicit val metaContactKind: Meta[ContactKind] =
-    Meta[String].imap(ContactKind.unsafe)(_.asString)
+    Meta[String].timap(ContactKind.unsafe)(_.asString)
 
   implicit val metaTimestamp: Meta[Timestamp] =
-    Meta[Instant].imap(Timestamp(_))(_.value)
+    Meta[Instant].timap(Timestamp(_))(_.value)
 
   implicit val metaJobState: Meta[JobState] =
-    Meta[String].imap(JobState.unsafe)(_.name)
+    Meta[String].timap(JobState.unsafe)(_.name)
 
   implicit val metaDirection: Meta[Direction] =
-    Meta[Boolean].imap(flag =>
+    Meta[Boolean].timap(flag =>
       if (flag) Direction.Incoming: Direction else Direction.Outgoing: Direction
     )(d => Direction.isIncoming(d))
 
   implicit val metaPriority: Meta[Priority] =
-    Meta[Int].imap(Priority.fromInt)(Priority.toInt)
+    Meta[Int].timap(Priority.fromInt)(Priority.toInt)
 
   implicit val metaLogLevel: Meta[LogLevel] =
-    Meta[String].imap(LogLevel.unsafeString)(_.name)
+    Meta[String].timap(LogLevel.unsafeString)(_.name)
 
   implicit val metaLenientUri: Meta[LenientUri] =
-    Meta[String].imap(LenientUri.unsafe)(_.asString)
+    Meta[String].timap(LenientUri.unsafe)(_.asString)
 
   implicit val metaNodeType: Meta[NodeType] =
-    Meta[String].imap(NodeType.unsafe)(_.name)
+    Meta[String].timap(NodeType.unsafe)(_.name)
 
   implicit val metaLocalDate: Meta[LocalDate] =
-    Meta[String].imap(str => LocalDate.parse(str))(_.format(DateTimeFormatter.ISO_DATE))
+    Meta[String].timap(str => LocalDate.parse(str))(_.format(DateTimeFormatter.ISO_DATE))
 
   implicit val metaItemState: Meta[ItemState] =
-    Meta[String].imap(ItemState.unsafe)(_.name)
+    Meta[String].timap(ItemState.unsafe)(_.name)
 
   implicit val metNerTag: Meta[NerTag] =
-    Meta[String].imap(NerTag.unsafe)(_.name)
+    Meta[String].timap(NerTag.unsafe)(_.name)
 
   implicit val metaNerLabel: Meta[NerLabel] =
     jsonMeta[NerLabel]
@@ -108,7 +116,7 @@ trait DoobieMeta extends EmilDoobieMeta {
     jsonMeta[List[IdRef]]
 
   implicit val metaLanguage: Meta[Language] =
-    Meta[String].imap(Language.unsafe)(_.iso3)
+    Meta[String].timap(Language.unsafe)(_.iso3)
 
   implicit val metaCalEvent: Meta[CalEvent] =
     Meta[String].timap(CalEvent.unsafe)(_.asString)
diff --git a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala
index 50c856b1..54505649 100644
--- a/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala
+++ b/modules/store/src/main/scala/docspell/store/impl/StoreImpl.scala
@@ -11,7 +11,7 @@ import cats.effect.Async
 import cats.implicits._
 import cats.~>
 
-import docspell.store.file.FileStore
+import docspell.store.file.FileRepository
 import docspell.store.migrate.FlywayMigrate
 import docspell.store.{AddResult, JdbcConfig, Store}
 
@@ -19,7 +19,7 @@ import doobie._
 import doobie.implicits._
 
 final class StoreImpl[F[_]: Async](
-    val fileStore: FileStore[F],
+    val fileRepo: FileRepository[F],
     jdbc: JdbcConfig,
     xa: Transactor[F]
 ) extends Store[F] {
@@ -30,10 +30,10 @@ final class StoreImpl[F[_]: Async](
   def migrate: F[Int] =
     FlywayMigrate.run[F](jdbc).map(_.migrationsExecuted)
 
-  def transact[A](prg: doobie.ConnectionIO[A]): F[A] =
+  def transact[A](prg: ConnectionIO[A]): F[A] =
     prg.transact(xa)
 
-  def transact[A](prg: fs2.Stream[doobie.ConnectionIO, A]): fs2.Stream[F, A] =
+  def transact[A](prg: fs2.Stream[ConnectionIO, A]): fs2.Stream[F, A] =
     prg.transact(xa)
 
   def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult] =
diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
index 78abf6a3..7768a25c 100644
--- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala
@@ -40,7 +40,7 @@ object QAttachment {
       .evalSeq(store.transact(findPreview))
       .map(_.fileId)
       .evalTap(_ => store.transact(RAttachmentPreview.delete(attachId)))
-      .evalMap(store.fileStore.delete)
+      .evalMap(store.fileRepo.delete)
       .map(_ => 1)
       .compile
       .foldMonoid
@@ -68,7 +68,7 @@ object QAttachment {
       f <-
         Stream
           .emits(files._1)
-          .evalMap(store.fileStore.delete)
+          .evalMap(store.fileRepo.delete)
           .map(_ => 1)
           .compile
           .foldMonoid
@@ -91,7 +91,7 @@ object QAttachment {
       f <-
         Stream
           .emits(ra.fileId +: (s.map(_.fileId).toSeq ++ p.map(_.fileId).toSeq))
-          .evalMap(store.fileStore.delete)
+          .evalMap(store.fileRepo.delete)
           .map(_ => 1)
           .compile
           .foldMonoid
@@ -104,7 +104,7 @@ object QAttachment {
       _ <- OptionT.liftF(
         Stream
           .emit(aa.fileId)
-          .evalMap(store.fileStore.delete)
+          .evalMap(store.fileRepo.delete)
           .compile
           .drain
       )
diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala
index 22e64a0d..3b5d0c19 100644
--- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala
+++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala
@@ -15,7 +15,7 @@ import cats.implicits._
 import fs2.Stream
 
 import docspell.common.syntax.all._
-import docspell.common.{IdRef, _}
+import docspell.common.{FileKey, IdRef, _}
 import docspell.query.ItemQuery
 import docspell.store.Store
 import docspell.store.qb.DSL._
@@ -470,7 +470,7 @@ object QItem {
     } yield tn + rn + n + mn + cf + im
 
   private def findByFileIdsQuery(
-      fileMetaIds: Nel[Ident],
+      fileMetaIds: Nel[FileKey],
       states: Option[Nel[ItemState]]
   ): Select.SimpleSelect = {
     val i = RItem.as("i")
@@ -490,7 +490,7 @@ object QItem {
     ).distinct
   }
 
-  def findOneByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Option[RItem]] =
+  def findOneByFileIds(fileMetaIds: Seq[FileKey]): ConnectionIO[Option[RItem]] =
     Nel.fromList(fileMetaIds.toList) match {
       case Some(nel) =>
         findByFileIdsQuery(nel, None).limit(1).build.query[RItem].option
@@ -499,7 +499,7 @@ object QItem {
     }
 
   def findByFileIds(
-      fileMetaIds: Seq[Ident],
+      fileMetaIds: Seq[FileKey],
       states: Nel[ItemState]
   ): ConnectionIO[Vector[RItem]] =
     Nel.fromList(fileMetaIds.toList) match {
@@ -512,7 +512,7 @@ object QItem {
   def findByChecksum(
       checksum: String,
       collective: Ident,
-      excludeFileMeta: Set[Ident]
+      excludeFileMeta: Set[FileKey]
   ): ConnectionIO[Vector[RItem]] = {
     val qq = findByChecksumQuery(checksum, collective, excludeFileMeta).build
     logger.debug(s"FindByChecksum: $qq")
@@ -522,7 +522,7 @@ object QItem {
   def findByChecksumQuery(
       checksum: String,
       collective: Ident,
-      excludeFileMeta: Set[Ident]
+      excludeFileMeta: Set[FileKey]
   ): Select = {
     val m1 = RFileMeta.as("m1")
     val m2 = RFileMeta.as("m2")
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala
index b8695176..bf99a01b 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala
@@ -10,7 +10,7 @@ import cats.data.NonEmptyList
 import cats.implicits._
 import fs2.Stream
 
-import docspell.common._
+import docspell.common.{FileKey, _}
 import docspell.store.qb.DSL._
 import docspell.store.qb._
 
@@ -20,7 +20,7 @@ import doobie.implicits._
 case class RAttachment(
     id: Ident,
     itemId: Ident,
-    fileId: Ident,
+    fileId: FileKey,
     position: Int,
     created: Timestamp,
     name: Option[String]
@@ -32,7 +32,7 @@ object RAttachment {
 
     val id = Column[Ident]("attachid", this)
     val itemId = Column[Ident]("itemid", this)
-    val fileId = Column[Ident]("filemetaid", this)
+    val fileId = Column[FileKey]("filemetaid", this)
     val position = Column[Int]("position", this)
     val created = Column[Timestamp]("created", this)
     val name = Column[String]("name", this)
@@ -47,7 +47,7 @@ object RAttachment {
     DML.insert(
       T,
       T.all,
-      fr"${v.id},${v.itemId},${v.fileId.id},${v.position},${v.created},${v.name}"
+      fr"${v.id},${v.itemId},${v.fileId},${v.position},${v.created},${v.name}"
     )
 
   def decPositions(iId: Ident, lowerBound: Int, upperBound: Int): ConnectionIO[Int] =
@@ -77,7 +77,7 @@ object RAttachment {
 
   def updateFileIdAndName(
       attachId: Ident,
-      fId: Ident,
+      fId: FileKey,
       fname: Option[String]
   ): ConnectionIO[Int] =
     DML.update(
@@ -88,7 +88,7 @@ object RAttachment {
 
   def updateFileId(
       attachId: Ident,
-      fId: Ident
+      fId: FileKey
   ): ConnectionIO[Int] =
     DML.update(
       T,
@@ -182,7 +182,7 @@ object RAttachment {
   def findByItemCollectiveSource(
       id: Ident,
       coll: Ident,
-      fileIds: NonEmptyList[Ident]
+      fileIds: NonEmptyList[FileKey]
   ): ConnectionIO[Vector[RAttachment]] = {
     val i = RItem.as("i")
     val a = RAttachment.as("a")
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala
index d8e768fc..3913c135 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala
@@ -8,7 +8,7 @@ package docspell.store.records
 
 import cats.data.NonEmptyList
 
-import docspell.common._
+import docspell.common.{FileKey, _}
 import docspell.store.qb.DSL._
 import docspell.store.qb.TableDef
 import docspell.store.qb._
@@ -21,7 +21,7 @@ import doobie.implicits._
   */
 case class RAttachmentArchive(
     id: Ident, // same as RAttachment.id
-    fileId: Ident,
+    fileId: FileKey,
     name: Option[String],
     messageId: Option[String],
     created: Timestamp
@@ -32,7 +32,7 @@ object RAttachmentArchive {
     val tableName = "attachment_archive"
 
     val id = Column[Ident]("id", this)
-    val fileId = Column[Ident]("file_id", this)
+    val fileId = Column[FileKey]("file_id", this)
     val name = Column[String]("filename", this)
     val messageId = Column[String]("message_id", this)
     val created = Column[Timestamp]("created", this)
@@ -59,7 +59,7 @@ object RAttachmentArchive {
   def delete(attachId: Ident): ConnectionIO[Int] =
     DML.delete(T, T.id === attachId)
 
-  def deleteAll(fId: Ident): ConnectionIO[Int] =
+  def deleteAll(fId: FileKey): ConnectionIO[Int] =
     DML.delete(T, T.fileId === fId)
 
   def findByIdAndCollective(
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala
index 8afddeee..3b9d23aa 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala
@@ -8,7 +8,7 @@ package docspell.store.records
 
 import cats.data.NonEmptyList
 
-import docspell.common._
+import docspell.common.{FileKey, _}
 import docspell.store.qb.DSL._
 import docspell.store.qb._
 
@@ -20,7 +20,7 @@ import doobie.implicits._
   */
 case class RAttachmentPreview(
     id: Ident, // same as RAttachment.id
-    fileId: Ident,
+    fileId: FileKey,
     name: Option[String],
     created: Timestamp
 )
@@ -30,7 +30,7 @@ object RAttachmentPreview {
     val tableName = "attachment_preview"
 
     val id = Column[Ident]("id", this)
-    val fileId = Column[Ident]("file_id", this)
+    val fileId = Column[FileKey]("file_id", this)
     val name = Column[String]("filename", this)
     val created = Column[Timestamp]("created", this)
 
diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala
index 5579daca..a2e3f949 100644
--- a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala
@@ -8,7 +8,7 @@ package docspell.store.records
 
 import cats.data.NonEmptyList
 
-import docspell.common._
+import docspell.common.{FileKey, _}
 import docspell.store.qb.DSL._
 import docspell.store.qb._
 
@@ -20,7 +20,7 @@ import doobie.implicits._
   */
 case class RAttachmentSource(
     id: Ident, // same as RAttachment.id
-    fileId: Ident,
+    fileId: FileKey,
     name: Option[String],
     created: Timestamp
 )
@@ -30,7 +30,7 @@ object RAttachmentSource {
     val tableName = "attachment_source"
 
     val id = Column[Ident]("id", this)
-    val fileId = Column[Ident]("file_id", this)
+    val fileId = Column[FileKey]("file_id", this)
     val name = Column[String]("filename", this)
     val created = Column[Timestamp]("created", this)
 
@@ -50,7 +50,7 @@ object RAttachmentSource {
   def findById(attachId: Ident): ConnectionIO[Option[RAttachmentSource]] =
     run(select(T.all), from(T), T.id === attachId).query[RAttachmentSource].option
 
-  def isSameFile(attachId: Ident, file: Ident): ConnectionIO[Boolean] =
+  def isSameFile(attachId: Ident, file: FileKey): ConnectionIO[Boolean] =
     Select(count(T.id).s, from(T), T.id === attachId && T.fileId === file).build
       .query[Int]
       .unique
diff --git a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala
index ace5ccad..89fae4df 100644
--- a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala
@@ -21,7 +21,7 @@ final case class RClassifierModel(
     id: Ident,
     cid: Ident,
     name: String,
-    fileId: Ident,
+    fileId: FileKey,
     created: Timestamp
 ) {}
 
@@ -30,7 +30,7 @@ object RClassifierModel {
   def createNew[F[_]: Sync](
       cid: Ident,
       name: String,
-      fileId: Ident
+      fileId: FileKey
   ): F[RClassifierModel] =
     for {
       id <- Ident.randomId[F]
@@ -43,7 +43,7 @@ object RClassifierModel {
     val id = Column[Ident]("id", this)
     val cid = Column[Ident]("cid", this)
     val name = Column[String]("name", this)
-    val fileId = Column[Ident]("file_id", this)
+    val fileId = Column[FileKey]("file_id", this)
     val created = Column[Timestamp]("created", this)
 
     val all = NonEmptyList.of[Column[_]](id, cid, name, fileId, created)
@@ -61,7 +61,7 @@ object RClassifierModel {
       fr"${v.id},${v.cid},${v.name},${v.fileId},${v.created}"
     )
 
-  def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
+  def updateFile(coll: Ident, name: String, fid: FileKey): ConnectionIO[Int] =
     for {
       now <- Timestamp.current[ConnectionIO]
       n <- DML.update(
diff --git a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
index 02ace94b..8263e875 100644
--- a/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
+++ b/modules/store/src/main/scala/docspell/store/records/RFileMeta.scala
@@ -9,7 +9,7 @@ package docspell.store.records
 import cats.data.NonEmptyList
 import cats.implicits._
 
-import docspell.common._
+import docspell.common.{FileKey, _}
 import docspell.store.qb.DSL._
 import docspell.store.qb._
 
@@ -18,7 +18,7 @@ import doobie.implicits._
 import scodec.bits.ByteVector
 
 final case class RFileMeta(
-    id: Ident,
+    id: FileKey,
     created: Timestamp,
     mimetype: MimeType,
     length: ByteSize,
@@ -29,7 +29,7 @@ object RFileMeta {
   final case class Table(alias: Option[String]) extends TableDef {
     val tableName = "filemeta"
 
-    val id = Column[Ident]("file_id", this)
+    val id = Column[FileKey]("file_id", this)
     val timestamp = Column[Timestamp]("created", this)
     val mimetype = Column[MimeType]("mimetype", this)
     val length = Column[ByteSize]("length", this)
@@ -47,10 +47,10 @@ object RFileMeta {
   def insert(r: RFileMeta): ConnectionIO[Int] =
     DML.insert(T, T.all, fr"${r.id},${r.created},${r.mimetype},${r.length},${r.checksum}")
 
-  def findById(fid: Ident): ConnectionIO[Option[RFileMeta]] =
+  def findById(fid: FileKey): ConnectionIO[Option[RFileMeta]] =
     run(select(T.all), from(T), T.id === fid).query[RFileMeta].option
 
-  def findByIds(ids: List[Ident]): ConnectionIO[Vector[RFileMeta]] =
+  def findByIds(ids: List[FileKey]): ConnectionIO[Vector[RFileMeta]] =
     NonEmptyList.fromList(ids) match {
       case Some(nel) =>
         run(select(T.all), from(T), T.id.in(nel)).query[RFileMeta].to[Vector]
@@ -58,11 +58,11 @@ object RFileMeta {
         Vector.empty[RFileMeta].pure[ConnectionIO]
     }
 
-  def findMime(fid: Ident): ConnectionIO[Option[MimeType]] =
+  def findMime(fid: FileKey): ConnectionIO[Option[MimeType]] =
     run(select(T.mimetype), from(T), T.id === fid)
       .query[MimeType]
       .option
 
-  def delete(id: Ident): ConnectionIO[Int] =
+  def delete(id: FileKey): ConnectionIO[Int] =
     DML.delete(T, T.id === id)
 }
diff --git a/modules/store/src/test/scala/docspell/store/StoreFixture.scala b/modules/store/src/test/scala/docspell/store/StoreFixture.scala
index 6f741230..91441701 100644
--- a/modules/store/src/test/scala/docspell/store/StoreFixture.scala
+++ b/modules/store/src/test/scala/docspell/store/StoreFixture.scala
@@ -11,7 +11,7 @@ import javax.sql.DataSource
 import cats.effect._
 
 import docspell.common.LenientUri
-import docspell.store.file.FileStore
+import docspell.store.file.FileRepository
 import docspell.store.impl.StoreImpl
 import docspell.store.migrate.FlywayMigrate
 
@@ -67,7 +67,8 @@ object StoreFixture {
     for {
       ds <- dataSource(jdbc)
       xa <- makeXA(ds)
-      store = new StoreImpl[IO](FileStore[IO](xa, ds, 64 * 1024), jdbc, xa)
+      fr = FileRepository.genericJDBC[IO](xa, ds, 64 * 1024)
+      store = new StoreImpl[IO](fr, jdbc, xa)
       _ <- Resource.eval(store.migrate)
     } yield store
 }
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 865d33f6..f7810fa2 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -282,7 +282,8 @@ object Dependencies {
 
   val binny = Seq(
     "com.github.eikek" %% "binny-core" % BinnyVersion,
-    "com.github.eikek" %% "binny-jdbc" % BinnyVersion
+    "com.github.eikek" %% "binny-jdbc" % BinnyVersion,
+    "com.github.eikek" %% "binny-minio" % BinnyVersion
   )
 
   // https://github.com/flyway/flyway

From 55700992c2fd789f3a3e084afc29e33b8c2e2e99 Mon Sep 17 00:00:00 2001
From: eikek <eike.kettner@posteo.de>
Date: Sun, 13 Feb 2022 19:55:02 +0100
Subject: [PATCH 2/2] Migrate file ids

Issue: #1379
---
 .../h2/V1.33.0__reorganize_file_ids.sql       | 150 ++++++++++++++++++
 .../mariadb/V1.33.0__reorganize_file_ids.sql  | 148 +++++++++++++++++
 .../V1.33.0__reorganize_file_ids.sql          | 150 ++++++++++++++++++
 3 files changed, 448 insertions(+)
 create mode 100644 modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql
 create mode 100644 modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql
 create mode 100644 modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql

diff --git a/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql
new file mode 100644
index 00000000..5c6161b9
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/h2/V1.33.0__reorganize_file_ids.sql
@@ -0,0 +1,150 @@
+drop table if exists file_migration_temp;
+create table file_migration_temp (
+  id bigserial primary key,
+  original_file varchar(254) not null unique,
+  cid varchar(254) not null,
+  category varchar(254) not null,
+  new_file varchar(254) not null unique
+);
+
+-- Source files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    i.cid || '/attachmentsource/' || rs.file_id as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Archive files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select distinct
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    i.cid || '/attachmentsource/' || rs.file_id as new_file
+  from attachment_archive rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Converted files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ra.filemetaid as original_file,
+    i.cid,
+    'attachmentconvert' as category,
+    i.cid || '/attachmentconvert/' || ra.filemetaid as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+  where rs.file_id <> ra.filemetaid
+;
+
+-- Preview image
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ap.file_id as original_file,
+    i.cid,
+    'previewimage' as category,
+    i.cid || '/previewimage/' || ap.file_id as new_file
+  from attachment_preview ap
+  inner join attachment ra on ra.attachid = ap.id
+  inner join item i on i.itemid = ra.itemid
+  order by id
+;
+
+-- classifier
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    file_id as original_file,
+    cid,
+    'classifier' as category,
+    cid || '/classifier/' || file_id as new_file
+  from classifier_model
+;
+
+
+-- save obsolete/orphaned files
+drop table if exists obsolete_files;
+create table obsolete_files(
+  file_id varchar(254) not null,
+  mimetype varchar(254) not null,
+  length bigint not null,
+  checksum varchar(254) not null,
+  created timestamp not null
+);
+
+with
+  missing_ids as (
+    select file_id from filemeta
+    except
+    select original_file as file_id from file_migration_temp)
+insert into obsolete_files (file_id, mimetype, length, checksum, created)
+  select file_id, mimetype, length, checksum, created from filemeta
+  where file_id in (select file_id from missing_ids)
+;
+
+
+-- duplicate each filemeta with the new id
+insert into filemeta (file_id, mimetype, length, checksum, created)
+  select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created
+  from file_migration_temp mm
+  inner join filemeta fm on fm.file_id = mm.original_file
+;
+
+
+-- update each reference to the new id
+update attachment_source
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_source.id is not null)
+;
+
+update attachment
+  set filemetaid = (select new_file
+                    from file_migration_temp
+                    where original_file = filemetaid and attachment.attachid is not null)
+;
+
+update attachment_archive
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_archive.id is not null)
+;
+
+update attachment_preview
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_preview.id is not null)
+;
+
+update classifier_model
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and classifier_model.id is not null)
+;
+
+-- delete old filemeta and filechunk rows
+delete from filemeta
+where file_id in (select original_file from file_migration_temp);
+
+delete from filemeta
+where file_id in (select file_id from obsolete_files);
+
+delete from filechunk
+where file_id in (select file_id from obsolete_files);
+
+-- update chunks
+update filechunk
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and filechunk.file_id is not null)
+;
+
+-- drop temp table
+drop table file_migration_temp;
+drop table obsolete_files;
diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql
new file mode 100644
index 00000000..62ab8670
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/mariadb/V1.33.0__reorganize_file_ids.sql
@@ -0,0 +1,148 @@
+drop table if exists file_migration_temp;
+create table file_migration_temp (
+  id bigint auto_increment primary key,
+  original_file varchar(254) not null unique,
+  cid varchar(254) not null,
+  category varchar(254) not null,
+  new_file varchar(254) not null unique
+);
+
+-- Source files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    concat(i.cid, '/attachmentsource/', rs.file_id) as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Archive files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select distinct
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    concat(i.cid, '/attachmentsource/', rs.file_id) as new_file
+  from attachment_archive rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Converted files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ra.filemetaid as original_file,
+    i.cid,
+    'attachmentconvert' as category,
+    concat(i.cid, '/attachmentconvert/', ra.filemetaid) as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+  where rs.file_id <> ra.filemetaid
+;
+
+-- Preview image
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ap.file_id as original_file,
+    i.cid,
+    'previewimage' as category,
+    concat(i.cid, '/previewimage/', ap.file_id) as new_file
+  from attachment_preview ap
+  inner join attachment ra on ra.attachid = ap.id
+  inner join item i on i.itemid = ra.itemid
+  order by id
+;
+
+-- classifier
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    file_id as original_file,
+    cid,
+    'classifier' as category,
+    concat(cid, '/classifier/', file_id) as new_file
+  from classifier_model
+;
+
+
+-- save obsolete/orphaned files
+drop table if exists obsolete_files;
+create table obsolete_files(
+  file_id varchar(254) not null,
+  mimetype varchar(254) not null,
+  length bigint not null,
+  checksum varchar(254) not null,
+  created timestamp not null
+);
+
+insert into obsolete_files (file_id, mimetype, length, checksum, created)
+  select file_id, mimetype, length, checksum, created from filemeta
+  where file_id in (
+    select file_id from filemeta
+    except
+    select original_file as file_id from file_migration_temp
+  );
+
+
+-- duplicate each filemeta with the new id
+insert into filemeta (file_id, mimetype, length, checksum, created)
+  select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created
+  from file_migration_temp mm
+  inner join filemeta fm on fm.file_id = mm.original_file
+;
+
+
+-- update each reference to the new id
+update attachment_source
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_source.id is not null)
+;
+
+update attachment
+  set filemetaid = (select new_file
+                    from file_migration_temp
+                    where original_file = filemetaid and attachment.attachid is not null)
+;
+
+update attachment_archive
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_archive.id is not null)
+;
+
+update attachment_preview
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_preview.id is not null)
+;
+
+update classifier_model
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and classifier_model.id is not null)
+;
+
+-- delete old filemeta and filechunk rows
+delete from filemeta
+where file_id in (select original_file from file_migration_temp);
+
+delete from filemeta
+where file_id in (select file_id from obsolete_files);
+
+delete from filechunk
+where file_id in (select file_id from obsolete_files);
+
+-- update chunks
+update filechunk
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and filechunk.file_id is not null)
+;
+
+-- drop temp table
+drop table file_migration_temp;
+drop table obsolete_files;
diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql
new file mode 100644
index 00000000..5c6161b9
--- /dev/null
+++ b/modules/store/src/main/resources/db/migration/postgresql/V1.33.0__reorganize_file_ids.sql
@@ -0,0 +1,150 @@
+drop table if exists file_migration_temp;
+create table file_migration_temp (
+  id bigserial primary key,
+  original_file varchar(254) not null unique,
+  cid varchar(254) not null,
+  category varchar(254) not null,
+  new_file varchar(254) not null unique
+);
+
+-- Source files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    i.cid || '/attachmentsource/' || rs.file_id as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Archive files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select distinct
+    rs.file_id as original_file,
+    i.cid,
+    'attachmentsource' as category,
+    i.cid || '/attachmentsource/' || rs.file_id as new_file
+  from attachment_archive rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+;
+
+-- Converted files
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ra.filemetaid as original_file,
+    i.cid,
+    'attachmentconvert' as category,
+    i.cid || '/attachmentconvert/' || ra.filemetaid as new_file
+  from attachment_source rs
+  inner join attachment ra on rs.id = ra.attachid
+  inner join item i on ra.itemid = i.itemid
+  where rs.file_id <> ra.filemetaid
+;
+
+-- Preview image
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    ap.file_id as original_file,
+    i.cid,
+    'previewimage' as category,
+    i.cid || '/previewimage/' || ap.file_id as new_file
+  from attachment_preview ap
+  inner join attachment ra on ra.attachid = ap.id
+  inner join item i on i.itemid = ra.itemid
+  order by id
+;
+
+-- classifier
+insert into file_migration_temp (original_file, cid, category, new_file)
+  select
+    file_id as original_file,
+    cid,
+    'classifier' as category,
+    cid || '/classifier/' || file_id as new_file
+  from classifier_model
+;
+
+
+-- save obsolete/orphaned files
+drop table if exists obsolete_files;
+create table obsolete_files(
+  file_id varchar(254) not null,
+  mimetype varchar(254) not null,
+  length bigint not null,
+  checksum varchar(254) not null,
+  created timestamp not null
+);
+
+with
+  missing_ids as (
+    select file_id from filemeta
+    except
+    select original_file as file_id from file_migration_temp)
+insert into obsolete_files (file_id, mimetype, length, checksum, created)
+  select file_id, mimetype, length, checksum, created from filemeta
+  where file_id in (select file_id from missing_ids)
+;
+
+
+-- duplicate each filemeta with the new id
+insert into filemeta (file_id, mimetype, length, checksum, created)
+  select mm.new_file, fm.mimetype, fm.length, fm.checksum, fm.created
+  from file_migration_temp mm
+  inner join filemeta fm on fm.file_id = mm.original_file
+;
+
+
+-- update each reference to the new id
+update attachment_source
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_source.id is not null)
+;
+
+update attachment
+  set filemetaid = (select new_file
+                    from file_migration_temp
+                    where original_file = filemetaid and attachment.attachid is not null)
+;
+
+update attachment_archive
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_archive.id is not null)
+;
+
+update attachment_preview
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and attachment_preview.id is not null)
+;
+
+update classifier_model
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and classifier_model.id is not null)
+;
+
+-- delete old filemeta and filechunk rows
+delete from filemeta
+where file_id in (select original_file from file_migration_temp);
+
+delete from filemeta
+where file_id in (select file_id from obsolete_files);
+
+delete from filechunk
+where file_id in (select file_id from obsolete_files);
+
+-- update chunks
+update filechunk
+  set file_id = (select new_file
+                 from file_migration_temp
+                 where original_file = file_id and filechunk.file_id is not null)
+;
+
+-- drop temp table
+drop table file_migration_temp;
+drop table obsolete_files;