mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-03 18:00:11 +00:00 
			
		
		
		
	Add support for archive files
Each attachment is now first extracted into potentially multiple ones, if it is recognized as an archive. This is the first step in processing. The original archive file is also stored and the resulting attachments are associated to their original archive. First support is implemented for zip files.
This commit is contained in:
		
							
								
								
									
										48
									
								
								modules/files/src/main/scala/docspell/files/Zip.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								modules/files/src/main/scala/docspell/files/Zip.scala
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
package docspell.files
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import fs2.{Pipe, Stream}
 | 
			
		||||
import java.io.InputStream
 | 
			
		||||
import java.util.zip.ZipInputStream
 | 
			
		||||
import java.nio.file.Paths
 | 
			
		||||
 | 
			
		||||
object Zip {
 | 
			
		||||
 | 
			
		||||
  case class Entry[F[_]](name: String, data: Stream[F, Byte])
 | 
			
		||||
 | 
			
		||||
  def unzipP[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      chunkSize: Int,
 | 
			
		||||
      blocker: Blocker
 | 
			
		||||
  ): Pipe[F, Byte, Entry[F]] =
 | 
			
		||||
    s => unzip[F](chunkSize, blocker)(s)
 | 
			
		||||
 | 
			
		||||
  def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)(
 | 
			
		||||
      data: Stream[F, Byte]
 | 
			
		||||
  ): Stream[F, Entry[F]] =
 | 
			
		||||
    data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker))
 | 
			
		||||
 | 
			
		||||
  def unzipJava[F[_]: Sync: ContextShift](
 | 
			
		||||
      in: InputStream,
 | 
			
		||||
      chunkSize: Int,
 | 
			
		||||
      blocker: Blocker
 | 
			
		||||
  ): Stream[F, Entry[F]] = {
 | 
			
		||||
    val zin = new ZipInputStream(in)
 | 
			
		||||
 | 
			
		||||
    val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) {
 | 
			
		||||
      case Some(_) => Sync[F].delay(zin.closeEntry())
 | 
			
		||||
      case None    => ().pure[F]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Stream
 | 
			
		||||
      .resource(nextEntry)
 | 
			
		||||
      .repeat
 | 
			
		||||
      .unNoneTerminate
 | 
			
		||||
      .map { ze =>
 | 
			
		||||
        val name = Paths.get(ze.getName()).getFileName.toString
 | 
			
		||||
        val data =
 | 
			
		||||
          fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false)
 | 
			
		||||
        Entry(name, data)
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								modules/files/src/test/resources/letters.zip
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								modules/files/src/test/resources/letters.zip
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										30
									
								
								modules/files/src/test/scala/docspell/files/ZipTest.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								modules/files/src/test/scala/docspell/files/ZipTest.scala
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
			
		||||
package docspell.files
 | 
			
		||||
 | 
			
		||||
import minitest._
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import scala.concurrent.ExecutionContext
 | 
			
		||||
 | 
			
		||||
object ZipTest extends SimpleTestSuite {
 | 
			
		||||
 | 
			
		||||
  val blocker     = Blocker.liftExecutionContext(ExecutionContext.global)
 | 
			
		||||
  implicit val CS = IO.contextShift(ExecutionContext.global)
 | 
			
		||||
 | 
			
		||||
  test("unzip") {
 | 
			
		||||
    val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker)
 | 
			
		||||
    val uncomp = zipFile.through(Zip.unzip(8192, blocker))
 | 
			
		||||
 | 
			
		||||
    uncomp.evalMap(entry => {
 | 
			
		||||
      val x = entry.data.map(_ => 1).foldMonoid.compile.lastOrError
 | 
			
		||||
      x.map(size => {
 | 
			
		||||
        if (entry.name.endsWith(".pdf")) {
 | 
			
		||||
          assertEquals(entry.name, "letter-de.pdf")
 | 
			
		||||
          assertEquals(size, 34815)
 | 
			
		||||
        } else {
 | 
			
		||||
          assertEquals(entry.name, "letter-en.txt")
 | 
			
		||||
          assertEquals(size, 1131)
 | 
			
		||||
        }
 | 
			
		||||
      })
 | 
			
		||||
    }).compile.drain.unsafeRunSync
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -64,7 +64,7 @@ object CreateItem {
 | 
			
		||||
      } yield ItemData(it, fm, Vector.empty, Vector.empty, fm.map(a => a.id -> a.fileId).toMap)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  def insertAttachment[F[_]: Sync](ctx: Context[F, ProcessItemArgs])(ra: RAttachment): F[Int] = {
 | 
			
		||||
  def insertAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = {
 | 
			
		||||
    val rs = RAttachmentSource.of(ra)
 | 
			
		||||
    ctx.store.transact(for {
 | 
			
		||||
      n <- RAttachment.insert(ra)
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,169 @@
 | 
			
		||||
package docspell.joex.process
 | 
			
		||||
 | 
			
		||||
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
 | 
			
		||||
import cats.Functor
 | 
			
		||||
import cats.data.OptionT
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import fs2.Stream
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.joex.scheduler._
 | 
			
		||||
import docspell.store.records._
 | 
			
		||||
import docspell.files.Zip
 | 
			
		||||
import cats.kernel.Monoid
 | 
			
		||||
 | 
			
		||||
/** Goes through all attachments and extracts archive files, like zip
 | 
			
		||||
  * files. The process is recursive, until all archives have been
 | 
			
		||||
  * extracted.
 | 
			
		||||
  *
 | 
			
		||||
  * The archive file is stored as a `attachment_archive` record that
 | 
			
		||||
  * references all its elements. If there are inner archive, only the
 | 
			
		||||
  * outer archive file is preserved.
 | 
			
		||||
  *
 | 
			
		||||
  * This step assumes an existing premature item, it traverses its
 | 
			
		||||
  * attachments.
 | 
			
		||||
  */
 | 
			
		||||
object ExtractArchive {
 | 
			
		||||
 | 
			
		||||
  def apply[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      item: ItemData
 | 
			
		||||
  ): Task[F, ProcessItemArgs, ItemData] =
 | 
			
		||||
    multiPass(item, None).map(_._2)
 | 
			
		||||
 | 
			
		||||
  def multiPass[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      item: ItemData,
 | 
			
		||||
      archive: Option[RAttachmentArchive]
 | 
			
		||||
  ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
 | 
			
		||||
    singlePass(item, archive).flatMap { t =>
 | 
			
		||||
      if (t._1 == None) Task.pure(t)
 | 
			
		||||
      else multiPass(t._2, t._1)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  def singlePass[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      item: ItemData,
 | 
			
		||||
      archive: Option[RAttachmentArchive]
 | 
			
		||||
  ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] =
 | 
			
		||||
    Task { ctx =>
 | 
			
		||||
      def extract(ra: RAttachment) =
 | 
			
		||||
        findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, m))
 | 
			
		||||
 | 
			
		||||
      for {
 | 
			
		||||
        ras <- item.attachments.traverse(extract)
 | 
			
		||||
        nra = ras.flatMap(_.files).zipWithIndex.map(t => t._1.copy(position = t._2))
 | 
			
		||||
        _ <- nra.traverse(storeAttachment(ctx))
 | 
			
		||||
        naa = ras.flatMap(_.archives)
 | 
			
		||||
        _ <- naa.traverse(storeArchive(ctx))
 | 
			
		||||
      } yield naa.headOption -> item.copy(
 | 
			
		||||
        attachments = nra,
 | 
			
		||||
        originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap
 | 
			
		||||
      )
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
 | 
			
		||||
    OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
 | 
			
		||||
      .map(_.mimetype)
 | 
			
		||||
      .getOrElse(Mimetype.`application/octet-stream`)
 | 
			
		||||
 | 
			
		||||
  def extractSafe[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      ctx: Context[F, ProcessItemArgs],
 | 
			
		||||
      archive: Option[RAttachmentArchive]
 | 
			
		||||
  )(ra: RAttachment, mime: Mimetype): F[Extracted] =
 | 
			
		||||
    mime match {
 | 
			
		||||
      case Mimetype.`application/zip` if ra.name.exists(_.endsWith(".zip")) =>
 | 
			
		||||
        ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("<noname>")}.") *>
 | 
			
		||||
          extractZip(ctx, archive)(ra)
 | 
			
		||||
            .flatTap(_ => cleanupParents(ctx, ra, archive))
 | 
			
		||||
 | 
			
		||||
      case _ =>
 | 
			
		||||
        ctx.logger.debug(s"Not an archive: ${mime.asString}") *>
 | 
			
		||||
          Extracted.noArchive(ra).pure[F]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  def cleanupParents[F[_]: Sync](
 | 
			
		||||
      ctx: Context[F, _],
 | 
			
		||||
      ra: RAttachment,
 | 
			
		||||
      archive: Option[RAttachmentArchive]
 | 
			
		||||
  ): F[Unit] =
 | 
			
		||||
    archive match {
 | 
			
		||||
      case Some(_) =>
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- ctx.logger.debug(
 | 
			
		||||
            s"Extracted inner attachment ${ra.name}. Remove it completely."
 | 
			
		||||
          )
 | 
			
		||||
          _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id))
 | 
			
		||||
          _ <- ctx.store.transact(RAttachment.delete(ra.id))
 | 
			
		||||
          _ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain
 | 
			
		||||
        } yield ()
 | 
			
		||||
      case None =>
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- ctx.logger.debug(
 | 
			
		||||
            s"Extracted attachment ${ra.name}. Remove it from the item."
 | 
			
		||||
          )
 | 
			
		||||
          _ <- ctx.store.transact(RAttachment.delete(ra.id))
 | 
			
		||||
        } yield ()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  def extractZip[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      ctx: Context[F, _],
 | 
			
		||||
      archive: Option[RAttachmentArchive]
 | 
			
		||||
  )(ra: RAttachment): F[Extracted] = {
 | 
			
		||||
    val zipData = ctx.store.bitpeace
 | 
			
		||||
      .get(ra.fileId.id)
 | 
			
		||||
      .unNoneTerminate
 | 
			
		||||
      .through(ctx.store.bitpeace.fetchData2(RangeDef.all))
 | 
			
		||||
 | 
			
		||||
    zipData
 | 
			
		||||
      .through(Zip.unzipP[F](8192, ctx.blocker))
 | 
			
		||||
      .flatMap { entry =>
 | 
			
		||||
        val mimeHint = MimetypeHint.filename(entry.name)
 | 
			
		||||
        val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint)
 | 
			
		||||
        Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >>
 | 
			
		||||
          fileMeta.evalMap { fm =>
 | 
			
		||||
            Ident.randomId.map { id =>
 | 
			
		||||
              val nra = RAttachment(
 | 
			
		||||
                id,
 | 
			
		||||
                ra.itemId,
 | 
			
		||||
                Ident.unsafe(fm.id),
 | 
			
		||||
                0, //position is updated afterwards
 | 
			
		||||
                ra.created,
 | 
			
		||||
                Option(entry.name).map(_.trim).filter(_.nonEmpty)
 | 
			
		||||
              )
 | 
			
		||||
              val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id)
 | 
			
		||||
              Extracted.of(nra, aa)
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
      }
 | 
			
		||||
      .foldMonoid
 | 
			
		||||
      .compile
 | 
			
		||||
      .lastOrError
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = {
 | 
			
		||||
    val insert = CreateItem.insertAttachment(ctx)(ra)
 | 
			
		||||
    for {
 | 
			
		||||
      n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position))
 | 
			
		||||
      n2 <- if (n1 > 0) 0.pure[F] else insert
 | 
			
		||||
    } yield n1 + n2
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def storeArchive[F[_]: Sync](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] =
 | 
			
		||||
    ctx.store.transact(RAttachmentArchive.insert(aa))
 | 
			
		||||
 | 
			
		||||
  case class Extracted(files: Vector[RAttachment], archives: Vector[RAttachmentArchive]) {
 | 
			
		||||
    def ++(e: Extracted) =
 | 
			
		||||
      Extracted(files ++ e.files, archives ++ e.archives)
 | 
			
		||||
  }
 | 
			
		||||
  object Extracted {
 | 
			
		||||
    val empty = Extracted(Vector.empty, Vector.empty)
 | 
			
		||||
 | 
			
		||||
    def noArchive(ra: RAttachment): Extracted =
 | 
			
		||||
      Extracted(Vector(ra), Vector.empty)
 | 
			
		||||
 | 
			
		||||
    def of(ra: RAttachment, aa: RAttachmentArchive): Extracted =
 | 
			
		||||
      Extracted(Vector(ra), Vector(aa))
 | 
			
		||||
 | 
			
		||||
    implicit val extractedMonoid: Monoid[Extracted] =
 | 
			
		||||
      Monoid.instance(empty, _ ++ _)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
package docspell.joex.process
 | 
			
		||||
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import cats.effect.{ContextShift, Sync}
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import docspell.common.{ItemState, ProcessItemArgs}
 | 
			
		||||
import docspell.joex.Config
 | 
			
		||||
import docspell.joex.scheduler.{Context, Task}
 | 
			
		||||
@@ -12,7 +12,7 @@ object ItemHandler {
 | 
			
		||||
  def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
 | 
			
		||||
    logWarn("Now cancelling. Deleting potentially created data.").flatMap(_ => deleteByFileIds)
 | 
			
		||||
 | 
			
		||||
  def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, ProcessItemArgs, Unit] =
 | 
			
		||||
  def apply[F[_]: ConcurrentEffect: ContextShift](cfg: Config): Task[F, ProcessItemArgs, Unit] =
 | 
			
		||||
    CreateItem[F]
 | 
			
		||||
      .flatMap(itemStateTask(ItemState.Processing))
 | 
			
		||||
      .flatMap(safeProcess[F](cfg))
 | 
			
		||||
@@ -27,7 +27,7 @@ object ItemHandler {
 | 
			
		||||
      last = ctx.config.retries == current.getOrElse(0)
 | 
			
		||||
    } yield last
 | 
			
		||||
 | 
			
		||||
  def safeProcess[F[_]: Sync: ContextShift](
 | 
			
		||||
  def safeProcess[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      cfg: Config
 | 
			
		||||
  )(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
 | 
			
		||||
    Task(isLastRetry[F, ProcessItemArgs] _).flatMap {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,23 +1,20 @@
 | 
			
		||||
package docspell.joex.process
 | 
			
		||||
 | 
			
		||||
import cats.effect.{ContextShift, Sync}
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import docspell.common.ProcessItemArgs
 | 
			
		||||
import docspell.joex.scheduler.Task
 | 
			
		||||
import docspell.joex.Config
 | 
			
		||||
 | 
			
		||||
object ProcessItem {
 | 
			
		||||
 | 
			
		||||
  def apply[F[_]: Sync: ContextShift](
 | 
			
		||||
  def apply[F[_]: ConcurrentEffect: ContextShift](
 | 
			
		||||
      cfg: Config
 | 
			
		||||
  )(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
 | 
			
		||||
    ConvertPdf(cfg.convert, item)
 | 
			
		||||
    ExtractArchive(item)
 | 
			
		||||
      .flatMap(ConvertPdf(cfg.convert, _))
 | 
			
		||||
      .flatMap(TextExtraction(cfg.extraction, _))
 | 
			
		||||
      .flatMap(Task.setProgress(25))
 | 
			
		||||
      .flatMap(TextAnalysis[F])
 | 
			
		||||
      .flatMap(Task.setProgress(50))
 | 
			
		||||
      .flatMap(FindProposal[F])
 | 
			
		||||
      .flatMap(EvalProposals[F])
 | 
			
		||||
      .flatMap(SaveProposals[F])
 | 
			
		||||
      .flatMap(analysisOnly[F])
 | 
			
		||||
      .flatMap(Task.setProgress(75))
 | 
			
		||||
      .flatMap(LinkProposal[F])
 | 
			
		||||
      .flatMap(Task.setProgress(99))
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,8 @@
 | 
			
		||||
CREATE TABLE `attachment_archive` (
 | 
			
		||||
  `id` varchar(254) not null primary key,
 | 
			
		||||
  `file_id` varchar(254) not null,
 | 
			
		||||
  `filename` varchar(254),
 | 
			
		||||
  `created` timestamp not null,
 | 
			
		||||
  foreign key (`file_id`) references `filemeta`(`id`),
 | 
			
		||||
  foreign key (`id`) references `attachment`(`attachid`)
 | 
			
		||||
);
 | 
			
		||||
@@ -0,0 +1,8 @@
 | 
			
		||||
CREATE TABLE "attachment_archive" (
 | 
			
		||||
  "id" varchar(254) not null primary key,
 | 
			
		||||
  "file_id" varchar(254) not null,
 | 
			
		||||
  "filename" varchar(254),
 | 
			
		||||
  "created" timestamp not null,
 | 
			
		||||
  foreign key ("file_id") references "filemeta"("id"),
 | 
			
		||||
  foreign key ("id") references "attachment"("attachid")
 | 
			
		||||
);
 | 
			
		||||
@@ -3,14 +3,17 @@ package docspell.store.queries
 | 
			
		||||
import fs2.Stream
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import cats.effect.Sync
 | 
			
		||||
import cats.data.OptionT
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
import docspell.common.{Ident, MetaProposalList}
 | 
			
		||||
import docspell.store.Store
 | 
			
		||||
import docspell.store.impl.Implicits._
 | 
			
		||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RAttachmentSource, RItem}
 | 
			
		||||
import docspell.store.records._
 | 
			
		||||
import docspell.common.syntax.all._
 | 
			
		||||
 | 
			
		||||
object QAttachment {
 | 
			
		||||
  private[this] val logger = org.log4s.getLogger
 | 
			
		||||
 | 
			
		||||
  def deleteById[F[_]: Sync](store: Store[F])(attachId: Ident, coll: Ident): F[Int] =
 | 
			
		||||
    for {
 | 
			
		||||
@@ -20,9 +23,12 @@ object QAttachment {
 | 
			
		||||
      rsFile <- store
 | 
			
		||||
        .transact(RAttachmentSource.findByIdAndCollective(attachId, coll))
 | 
			
		||||
        .map(_.map(_.fileId))
 | 
			
		||||
      aaFile <- store
 | 
			
		||||
        .transact(RAttachmentArchive.findByIdAndCollective(attachId, coll))
 | 
			
		||||
        .map(_.map(_.fileId))
 | 
			
		||||
      n <- store.transact(RAttachment.delete(attachId))
 | 
			
		||||
      f <- Stream
 | 
			
		||||
        .emits(raFile.toSeq ++ rsFile.toSeq)
 | 
			
		||||
        .emits(raFile.toSeq ++ rsFile.toSeq ++ aaFile.toSeq)
 | 
			
		||||
        .map(_.id)
 | 
			
		||||
        .flatMap(store.bitpeace.delete)
 | 
			
		||||
        .map(flag => if (flag) 1 else 0)
 | 
			
		||||
@@ -32,20 +38,45 @@ object QAttachment {
 | 
			
		||||
 | 
			
		||||
  def deleteAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] =
 | 
			
		||||
    for {
 | 
			
		||||
      _ <- logger.fdebug[F](s"Deleting attachment: ${ra.id.id}")
 | 
			
		||||
      s <- store.transact(RAttachmentSource.findById(ra.id))
 | 
			
		||||
      n <- store.transact(RAttachment.delete(ra.id))
 | 
			
		||||
      _ <- logger.fdebug[F](
 | 
			
		||||
        s"Deleted $n meta records (source, meta, archive). Deleting binaries now."
 | 
			
		||||
      )
 | 
			
		||||
      f <- Stream
 | 
			
		||||
        .emits(ra.fileId.id +: s.map(_.fileId.id).toSeq)
 | 
			
		||||
        .emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq))
 | 
			
		||||
        .flatMap(store.bitpeace.delete)
 | 
			
		||||
        .map(flag => if (flag) 1 else 0)
 | 
			
		||||
        .compile
 | 
			
		||||
        .foldMonoid
 | 
			
		||||
    } yield n + f
 | 
			
		||||
 | 
			
		||||
  def deleteItemAttachments[F[_]: Sync](store: Store[F])(itemId: Ident, coll: Ident): F[Int] =
 | 
			
		||||
  def deleteArchive[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
 | 
			
		||||
    (for {
 | 
			
		||||
      aa <- OptionT(store.transact(RAttachmentArchive.findById(attachId)))
 | 
			
		||||
      n  <- OptionT.liftF(store.transact(RAttachmentArchive.deleteAll(aa.fileId)))
 | 
			
		||||
      _ <- OptionT.liftF(
 | 
			
		||||
        Stream
 | 
			
		||||
          .emit(aa.fileId.id)
 | 
			
		||||
          .flatMap(store.bitpeace.delete)
 | 
			
		||||
          .compile
 | 
			
		||||
          .drain
 | 
			
		||||
      )
 | 
			
		||||
    } yield n).getOrElse(0)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def deleteItemAttachments[F[_]: Sync](
 | 
			
		||||
      store: Store[F]
 | 
			
		||||
  )(itemId: Ident, coll: Ident): F[Int] =
 | 
			
		||||
    for {
 | 
			
		||||
      ras <- store.transact(RAttachment.findByItemAndCollective(itemId, coll))
 | 
			
		||||
      ns  <- ras.traverse(deleteAttachment[F](store))
 | 
			
		||||
      _ <- logger.finfo[F](
 | 
			
		||||
        s"Have ${ras.size} attachments to delete. Must first delete archive entries"
 | 
			
		||||
      )
 | 
			
		||||
      a  <- ras.traverse(a => deleteArchive(store)(a.id))
 | 
			
		||||
      _  <- logger.fdebug[F](s"Deleted ${a.sum} archive entries")
 | 
			
		||||
      ns <- ras.traverse(deleteAttachment[F](store))
 | 
			
		||||
    } yield ns.sum
 | 
			
		||||
 | 
			
		||||
  def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
 | 
			
		||||
@@ -56,8 +87,12 @@ object QAttachment {
 | 
			
		||||
    val q = fr"SELECT" ++ MC.proposals
 | 
			
		||||
      .prefix("m")
 | 
			
		||||
      .f ++ fr"FROM" ++ RAttachmentMeta.table ++ fr"m" ++
 | 
			
		||||
      fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id.prefix("a").is(MC.id.prefix("m")) ++
 | 
			
		||||
      fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId.prefix("a").is(IC.id.prefix("i")) ++
 | 
			
		||||
      fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id
 | 
			
		||||
      .prefix("a")
 | 
			
		||||
      .is(MC.id.prefix("m")) ++
 | 
			
		||||
      fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId
 | 
			
		||||
      .prefix("a")
 | 
			
		||||
      .is(IC.id.prefix("i")) ++
 | 
			
		||||
      fr"WHERE" ++ and(AC.itemId.prefix("a").is(itemId), IC.cid.prefix("i").is(coll))
 | 
			
		||||
 | 
			
		||||
    for {
 | 
			
		||||
@@ -73,14 +108,18 @@ object QAttachment {
 | 
			
		||||
    val MC = RAttachmentMeta.Columns
 | 
			
		||||
    val IC = RItem.Columns
 | 
			
		||||
 | 
			
		||||
    val q = fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
 | 
			
		||||
      fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id
 | 
			
		||||
      .prefix("i")
 | 
			
		||||
      .is(AC.itemId.prefix("a")) ++
 | 
			
		||||
      fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id
 | 
			
		||||
      .prefix("a")
 | 
			
		||||
      .is(MC.id.prefix("m")) ++
 | 
			
		||||
      fr"WHERE" ++ and(AC.id.prefix("a").is(attachId), IC.cid.prefix("i").is(collective))
 | 
			
		||||
    val q =
 | 
			
		||||
      fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
 | 
			
		||||
        fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id
 | 
			
		||||
        .prefix("i")
 | 
			
		||||
        .is(AC.itemId.prefix("a")) ++
 | 
			
		||||
        fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id
 | 
			
		||||
        .prefix("a")
 | 
			
		||||
        .is(MC.id.prefix("m")) ++
 | 
			
		||||
        fr"WHERE" ++ and(
 | 
			
		||||
        AC.id.prefix("a").is(attachId),
 | 
			
		||||
        IC.cid.prefix("i").is(collective)
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
    q.query[RAttachmentMeta].option
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,9 @@ object RAttachment {
 | 
			
		||||
  def updateFileIdAndName(attachId: Ident, fId: Ident, fname: Option[String]): ConnectionIO[Int] =
 | 
			
		||||
    updateRow(table, id.is(attachId), commas(fileId.setTo(fId), name.setTo(fname))).update.run
 | 
			
		||||
 | 
			
		||||
  def updatePosition(attachId: Ident, pos: Int): ConnectionIO[Int] =
 | 
			
		||||
    updateRow(table, id.is(attachId), position.setTo(pos)).update.run
 | 
			
		||||
 | 
			
		||||
  def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] =
 | 
			
		||||
    selectSimple(all, table, id.is(attachId)).query[RAttachment].option
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,90 @@
 | 
			
		||||
package docspell.store.records
 | 
			
		||||
 | 
			
		||||
import bitpeace.FileMeta
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.store.impl._
 | 
			
		||||
import docspell.store.impl.Implicits._
 | 
			
		||||
 | 
			
		||||
/** The archive file of some attachment. The `id` is shared with the
 | 
			
		||||
  * attachment, to create a 0..1-1 relationship.
 | 
			
		||||
  */
 | 
			
		||||
case class RAttachmentArchive(
 | 
			
		||||
    id: Ident, //same as RAttachment.id
 | 
			
		||||
    fileId: Ident,
 | 
			
		||||
    name: Option[String],
 | 
			
		||||
    created: Timestamp
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
object RAttachmentArchive {
 | 
			
		||||
 | 
			
		||||
  val table = fr"attachment_archive"
 | 
			
		||||
 | 
			
		||||
  object Columns {
 | 
			
		||||
    val id      = Column("id")
 | 
			
		||||
    val fileId  = Column("file_id")
 | 
			
		||||
    val name    = Column("filename")
 | 
			
		||||
    val created = Column("created")
 | 
			
		||||
 | 
			
		||||
    val all = List(id, fileId, name, created)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  import Columns._
 | 
			
		||||
 | 
			
		||||
  def of(ra: RAttachment): RAttachmentArchive =
 | 
			
		||||
    RAttachmentArchive(ra.id, ra.fileId, ra.name, ra.created)
 | 
			
		||||
 | 
			
		||||
  def insert(v: RAttachmentArchive): ConnectionIO[Int] =
 | 
			
		||||
    insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run
 | 
			
		||||
 | 
			
		||||
  def findById(attachId: Ident): ConnectionIO[Option[RAttachmentArchive]] =
 | 
			
		||||
    selectSimple(all, table, id.is(attachId)).query[RAttachmentArchive].option
 | 
			
		||||
 | 
			
		||||
  def delete(attachId: Ident): ConnectionIO[Int] =
 | 
			
		||||
    deleteFrom(table, id.is(attachId)).update.run
 | 
			
		||||
 | 
			
		||||
  def deleteAll(fId: Ident): ConnectionIO[Int] =
 | 
			
		||||
    deleteFrom(table, fileId.is(fId)).update.run
 | 
			
		||||
 | 
			
		||||
  def findByIdAndCollective(
 | 
			
		||||
      attachId: Ident,
 | 
			
		||||
      collective: Ident
 | 
			
		||||
  ): ConnectionIO[Option[RAttachmentArchive]] = {
 | 
			
		||||
    val bId   = RAttachment.Columns.id.prefix("b")
 | 
			
		||||
    val aId   = Columns.id.prefix("a")
 | 
			
		||||
    val bItem = RAttachment.Columns.itemId.prefix("b")
 | 
			
		||||
    val iId   = RItem.Columns.id.prefix("i")
 | 
			
		||||
    val iColl = RItem.Columns.cid.prefix("i")
 | 
			
		||||
 | 
			
		||||
    val from = table ++ fr"a INNER JOIN" ++
 | 
			
		||||
      RAttachment.table ++ fr"b ON" ++ aId.is(bId) ++
 | 
			
		||||
      fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ bItem.is(iId)
 | 
			
		||||
 | 
			
		||||
    val where = and(aId.is(attachId), bId.is(attachId), iColl.is(collective))
 | 
			
		||||
 | 
			
		||||
    selectSimple(all.map(_.prefix("a")), from, where).query[RAttachmentArchive].option
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachmentArchive, FileMeta)]] = {
 | 
			
		||||
    import bitpeace.sql._
 | 
			
		||||
 | 
			
		||||
    val aId       = Columns.id.prefix("a")
 | 
			
		||||
    val afileMeta = fileId.prefix("a")
 | 
			
		||||
    val bPos      = RAttachment.Columns.position.prefix("b")
 | 
			
		||||
    val bId       = RAttachment.Columns.id.prefix("b")
 | 
			
		||||
    val bItem     = RAttachment.Columns.itemId.prefix("b")
 | 
			
		||||
    val mId       = RFileMeta.Columns.id.prefix("m")
 | 
			
		||||
 | 
			
		||||
    val cols = all.map(_.prefix("a")) ++ RFileMeta.Columns.all.map(_.prefix("m"))
 | 
			
		||||
    val from = table ++ fr"a INNER JOIN" ++
 | 
			
		||||
      RFileMeta.table ++ fr"m ON" ++ afileMeta.is(mId) ++ fr"INNER JOIN" ++
 | 
			
		||||
      RAttachment.table ++ fr"b ON" ++ aId.is(bId)
 | 
			
		||||
    val where = bItem.is(id)
 | 
			
		||||
 | 
			
		||||
    (selectSimple(cols, from, where) ++ orderBy(bPos.asc))
 | 
			
		||||
      .query[(RAttachmentArchive, FileMeta)]
 | 
			
		||||
      .to[Vector]
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user