From 97dfcece975ffd1c925ff05cb32a4cdb34d0b4fe Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 2 Jan 2021 21:08:16 +0100 Subject: [PATCH] Fix duplicate check on restarts Issue: #530 --- .../scala/docspell/backend/ops/OItemSearch.scala | 4 ++-- .../docspell/joex/process/DuplicateCheck.scala | 6 ++++-- .../main/scala/docspell/store/queries/QItem.scala | 15 +++++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala index 9061b87a..46ec929d 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala @@ -258,12 +258,12 @@ object OItemSearch { store.transact(QAttachment.getAttachmentMeta(id, collective)) def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]] = - store.transact(QItem.findByChecksum(checksum, collective)) + store.transact(QItem.findByChecksum(checksum, collective, Set.empty)) def findByFileSource(checksum: String, sourceId: Ident): F[Vector[RItem]] = store.transact((for { coll <- OptionT(RSource.findCollective(sourceId)) - items <- OptionT.liftF(QItem.findByChecksum(checksum, coll)) + items <- OptionT.liftF(QItem.findByChecksum(checksum, coll, Set.empty)) } yield items).getOrElse(Vector.empty)) }) diff --git a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala index 5b30d3ab..eee06755 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/DuplicateCheck.scala @@ -63,10 +63,12 @@ object DuplicateCheck { private def checkDuplicate[F[_]]( ctx: Context[F, Args] - )(fm: FileMeta): ConnectionIO[FileMetaDupes] = + )(fm: FileMeta): ConnectionIO[FileMetaDupes] = { + val excludes = ctx.args.files.map(_.fileMetaId).toSet QItem - .findByChecksum(fm.checksum, ctx.args.meta.collective) + .findByChecksum(fm.checksum, ctx.args.meta.collective, excludes) .map(v => FileMetaDupes(fm, v.nonEmpty)) + } case class FileMetaDupes(fm: FileMeta, exists: Boolean) } diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index c3807351..80c7bf2d 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -454,7 +454,7 @@ object QItem { (a.fileId.in(fileMetaIds) || s.fileId.in(fileMetaIds) || r.fileId.in(fileMetaIds)) &&? states.map(nel => i.state.in(nel)) - ) + ).distinct } def findOneByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Option[RItem]] = @@ -476,7 +476,11 @@ object QItem { Vector.empty[RItem].pure[ConnectionIO] } - def findByChecksum(checksum: String, collective: Ident): ConnectionIO[Vector[RItem]] = { + def findByChecksum( + checksum: String, + collective: Ident, + excludeFileMeta: Set[Ident] + ): ConnectionIO[Vector[RItem]] = { val m1 = RFileMeta.as("m1") val m2 = RFileMeta.as("m2") val m3 = RFileMeta.as("m3") @@ -496,9 +500,12 @@ object QItem { .leftJoin(m3, m3.id === r.fileId), where( i.cid === collective && - (m1.checksum === checksum || m2.checksum === checksum || m3.checksum === checksum) + (m1.checksum === checksum || m2.checksum === checksum || m3.checksum === checksum) &&? + Nel + .fromList(excludeFileMeta.toList) + .map(excl => m1.id.notIn(excl) && m2.id.notIn(excl) && m3.id.notIn(excl)) ) - ).build.query[RItem].to[Vector] + ).distinct.build.query[RItem].to[Vector] } final case class NameAndNotes(