Add support for archive files

Each attachment is now first extracted into potentially multiple ones,
if it is recognized as an archive. This is the first step in
processing. The original archive file is also stored and the resulting
attachments are associated to their original archive.

First support is implemented for zip files.
This commit is contained in:
Eike Kettner
2020-03-19 22:42:27 +01:00
parent 2a7066650f
commit 4ed7a137f7
12 changed files with 419 additions and 27 deletions

View File

@ -0,0 +1,8 @@
CREATE TABLE `attachment_archive` (
`id` varchar(254) not null primary key,
`file_id` varchar(254) not null,
`filename` varchar(254),
`created` timestamp not null,
foreign key (`file_id`) references `filemeta`(`id`),
foreign key (`id`) references `attachment`(`attachid`)
);

View File

@ -0,0 +1,8 @@
CREATE TABLE "attachment_archive" (
"id" varchar(254) not null primary key,
"file_id" varchar(254) not null,
"filename" varchar(254),
"created" timestamp not null,
foreign key ("file_id") references "filemeta"("id"),
foreign key ("id") references "attachment"("attachid")
);

View File

@ -3,14 +3,17 @@ package docspell.store.queries
import fs2.Stream
import cats.implicits._
import cats.effect.Sync
import cats.data.OptionT
import doobie._
import doobie.implicits._
import docspell.common.{Ident, MetaProposalList}
import docspell.store.Store
import docspell.store.impl.Implicits._
import docspell.store.records.{RAttachment, RAttachmentMeta, RAttachmentSource, RItem}
import docspell.store.records._
import docspell.common.syntax.all._
object QAttachment {
private[this] val logger = org.log4s.getLogger
def deleteById[F[_]: Sync](store: Store[F])(attachId: Ident, coll: Ident): F[Int] =
for {
@ -20,9 +23,12 @@ object QAttachment {
rsFile <- store
.transact(RAttachmentSource.findByIdAndCollective(attachId, coll))
.map(_.map(_.fileId))
aaFile <- store
.transact(RAttachmentArchive.findByIdAndCollective(attachId, coll))
.map(_.map(_.fileId))
n <- store.transact(RAttachment.delete(attachId))
f <- Stream
.emits(raFile.toSeq ++ rsFile.toSeq)
.emits(raFile.toSeq ++ rsFile.toSeq ++ aaFile.toSeq)
.map(_.id)
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
@ -32,20 +38,45 @@ object QAttachment {
def deleteAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] =
for {
_ <- logger.fdebug[F](s"Deleting attachment: ${ra.id.id}")
s <- store.transact(RAttachmentSource.findById(ra.id))
n <- store.transact(RAttachment.delete(ra.id))
_ <- logger.fdebug[F](
s"Deleted $n meta records (source, meta, archive). Deleting binaries now."
)
f <- Stream
.emits(ra.fileId.id +: s.map(_.fileId.id).toSeq)
.emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq))
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
.compile
.foldMonoid
} yield n + f
def deleteItemAttachments[F[_]: Sync](store: Store[F])(itemId: Ident, coll: Ident): F[Int] =
def deleteArchive[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
(for {
aa <- OptionT(store.transact(RAttachmentArchive.findById(attachId)))
n <- OptionT.liftF(store.transact(RAttachmentArchive.deleteAll(aa.fileId)))
_ <- OptionT.liftF(
Stream
.emit(aa.fileId.id)
.flatMap(store.bitpeace.delete)
.compile
.drain
)
} yield n).getOrElse(0)
}
def deleteItemAttachments[F[_]: Sync](
store: Store[F]
)(itemId: Ident, coll: Ident): F[Int] =
for {
ras <- store.transact(RAttachment.findByItemAndCollective(itemId, coll))
ns <- ras.traverse(deleteAttachment[F](store))
_ <- logger.finfo[F](
s"Have ${ras.size} attachments to delete. Must first delete archive entries"
)
a <- ras.traverse(a => deleteArchive(store)(a.id))
_ <- logger.fdebug[F](s"Deleted ${a.sum} archive entries")
ns <- ras.traverse(deleteAttachment[F](store))
} yield ns.sum
def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = {
@ -56,8 +87,12 @@ object QAttachment {
val q = fr"SELECT" ++ MC.proposals
.prefix("m")
.f ++ fr"FROM" ++ RAttachmentMeta.table ++ fr"m" ++
fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id.prefix("a").is(MC.id.prefix("m")) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId.prefix("a").is(IC.id.prefix("i")) ++
fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id
.prefix("a")
.is(MC.id.prefix("m")) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId
.prefix("a")
.is(IC.id.prefix("i")) ++
fr"WHERE" ++ and(AC.itemId.prefix("a").is(itemId), IC.cid.prefix("i").is(coll))
for {
@ -73,14 +108,18 @@ object QAttachment {
val MC = RAttachmentMeta.Columns
val IC = RItem.Columns
val q = fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id
.prefix("i")
.is(AC.itemId.prefix("a")) ++
fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id
.prefix("a")
.is(MC.id.prefix("m")) ++
fr"WHERE" ++ and(AC.id.prefix("a").is(attachId), IC.cid.prefix("i").is(collective))
val q =
fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id
.prefix("i")
.is(AC.itemId.prefix("a")) ++
fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id
.prefix("a")
.is(MC.id.prefix("m")) ++
fr"WHERE" ++ and(
AC.id.prefix("a").is(attachId),
IC.cid.prefix("i").is(collective)
)
q.query[RAttachmentMeta].option
}

View File

@ -41,6 +41,9 @@ object RAttachment {
def updateFileIdAndName(attachId: Ident, fId: Ident, fname: Option[String]): ConnectionIO[Int] =
updateRow(table, id.is(attachId), commas(fileId.setTo(fId), name.setTo(fname))).update.run
def updatePosition(attachId: Ident, pos: Int): ConnectionIO[Int] =
updateRow(table, id.is(attachId), position.setTo(pos)).update.run
def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] =
selectSimple(all, table, id.is(attachId)).query[RAttachment].option

View File

@ -0,0 +1,90 @@
package docspell.store.records
import bitpeace.FileMeta
import doobie._
import doobie.implicits._
import docspell.common._
import docspell.store.impl._
import docspell.store.impl.Implicits._
/** The archive file of some attachment. The `id` is shared with the
* attachment, to create a 0..1-1 relationship.
*/
case class RAttachmentArchive(
id: Ident, //same as RAttachment.id
fileId: Ident,
name: Option[String],
created: Timestamp
)
object RAttachmentArchive {
val table = fr"attachment_archive"
object Columns {
val id = Column("id")
val fileId = Column("file_id")
val name = Column("filename")
val created = Column("created")
val all = List(id, fileId, name, created)
}
import Columns._
def of(ra: RAttachment): RAttachmentArchive =
RAttachmentArchive(ra.id, ra.fileId, ra.name, ra.created)
def insert(v: RAttachmentArchive): ConnectionIO[Int] =
insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run
def findById(attachId: Ident): ConnectionIO[Option[RAttachmentArchive]] =
selectSimple(all, table, id.is(attachId)).query[RAttachmentArchive].option
def delete(attachId: Ident): ConnectionIO[Int] =
deleteFrom(table, id.is(attachId)).update.run
def deleteAll(fId: Ident): ConnectionIO[Int] =
deleteFrom(table, fileId.is(fId)).update.run
def findByIdAndCollective(
attachId: Ident,
collective: Ident
): ConnectionIO[Option[RAttachmentArchive]] = {
val bId = RAttachment.Columns.id.prefix("b")
val aId = Columns.id.prefix("a")
val bItem = RAttachment.Columns.itemId.prefix("b")
val iId = RItem.Columns.id.prefix("i")
val iColl = RItem.Columns.cid.prefix("i")
val from = table ++ fr"a INNER JOIN" ++
RAttachment.table ++ fr"b ON" ++ aId.is(bId) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ bItem.is(iId)
val where = and(aId.is(attachId), bId.is(attachId), iColl.is(collective))
selectSimple(all.map(_.prefix("a")), from, where).query[RAttachmentArchive].option
}
def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachmentArchive, FileMeta)]] = {
import bitpeace.sql._
val aId = Columns.id.prefix("a")
val afileMeta = fileId.prefix("a")
val bPos = RAttachment.Columns.position.prefix("b")
val bId = RAttachment.Columns.id.prefix("b")
val bItem = RAttachment.Columns.itemId.prefix("b")
val mId = RFileMeta.Columns.id.prefix("m")
val cols = all.map(_.prefix("a")) ++ RFileMeta.Columns.all.map(_.prefix("m"))
val from = table ++ fr"a INNER JOIN" ++
RFileMeta.table ++ fr"m ON" ++ afileMeta.is(mId) ++ fr"INNER JOIN" ++
RAttachment.table ++ fr"b ON" ++ aId.is(bId)
val where = bItem.is(id)
(selectSimple(cols, from, where) ++ orderBy(bPos.asc))
.query[(RAttachmentArchive, FileMeta)]
.to[Vector]
}
}