Prepend a duplicate check when uploading files

This commit is contained in:
Eike Kettner
2020-09-23 23:00:19 +02:00
parent 10c16bf319
commit f6f63000be
9 changed files with 123 additions and 11 deletions

View File

@ -0,0 +1,62 @@
package docspell.joex.process
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.scheduler.{Context, Task}
import docspell.store.queries.QItem
import docspell.store.records.RFileMeta
import bitpeace.FileMeta
import doobie._
object DuplicateCheck {
type Args = ProcessItemArgs
def apply[F[_]: Sync]: Task[F, Args, Args] =
Task { ctx =>
if (ctx.args.meta.skipDuplicate)
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx)
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
}
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] =
for {
fileMetas <- findDuplicates(ctx)
_ <- fileMetas.traverse(deleteDuplicate(ctx))
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
} yield ctx.args.copy(files =
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id))
)
private def deleteDuplicate[F[_]: Sync](
ctx: Context[F, Args]
)(fd: FileMetaDupes): F[Unit] = {
val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name)
if (fd.exists)
ctx.logger
.info(s"Deleting duplicate file ${fname}!") *> ctx.store.bitpeace
.delete(fd.fm.id)
.compile
.drain
else ().pure[F]
}
private def findDuplicates[F[_]: Sync](
ctx: Context[F, Args]
): F[Vector[FileMetaDupes]] =
ctx.store.transact(for {
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
dupes <- fileMetas.traverse(checkDuplicate(ctx))
} yield dupes)
private def checkDuplicate[F[_]](
ctx: Context[F, Args]
)(fm: FileMeta): ConnectionIO[FileMetaDupes] =
QItem
.findByChecksum(fm.checksum, ctx.args.meta.collective)
.map(v => FileMetaDupes(fm, v.nonEmpty))
case class FileMetaDupes(fm: FileMeta, exists: Boolean)
}

View File

@ -35,10 +35,18 @@ object ItemHandler {
analyser: TextAnalyser[F],
regexNer: RegexNerFile[F]
): Task[F, Args, Unit] =
CreateItem[F]
.flatMap(itemStateTask(ItemState.Processing))
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
.map(_ => ())
DuplicateCheck[F]
.flatMap(args =>
if (args.files.isEmpty) logNoFiles
else {
val create: Task[F, Args, ItemData] =
CreateItem[F].contramap(_ => args.pure[F])
create
.flatMap(itemStateTask(ItemState.Processing))
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
.map(_ => ())
}
)
def itemStateTask[F[_]: Sync, A](
state: ItemState
@ -121,4 +129,10 @@ object ItemHandler {
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
Task(_.logger.warn(msg))
private def logNoFiles[F[_]]: Task[F, Args, Unit] =
logWarn(
"No files to process! Either no files were given or duplicate check removed all."
)
}

View File

@ -90,7 +90,8 @@ object ReProcessItem {
None, //direction
"", //source-id
None, //folder
Seq.empty
Seq.empty,
false
),
Nil
).pure[F]

View File

@ -254,7 +254,8 @@ object ScanMailboxTask {
Some(dir),
s"mailbox-${ctx.args.account.user.id}",
args.itemFolder,
Seq.empty
Seq.empty,
true
)
data = OUpload.UploadData(
multiple = false,