mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-02-15 20:33:26 +00:00
Prepend a duplicate check when uploading files
This commit is contained in:
parent
10c16bf319
commit
f6f63000be
@ -59,7 +59,8 @@ object OUpload {
|
||||
direction: Option[Direction],
|
||||
sourceAbbrev: String,
|
||||
folderId: Option[Ident],
|
||||
validFileTypes: Seq[MimeType]
|
||||
validFileTypes: Seq[MimeType],
|
||||
skipDuplicates: Boolean
|
||||
)
|
||||
|
||||
case class UploadData[F[_]](
|
||||
@ -125,7 +126,8 @@ object OUpload {
|
||||
data.meta.direction,
|
||||
data.meta.sourceAbbrev,
|
||||
data.meta.folderId,
|
||||
data.meta.validFileTypes
|
||||
data.meta.validFileTypes,
|
||||
data.meta.skipDuplicates
|
||||
)
|
||||
args =
|
||||
if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f)))
|
||||
|
@ -37,7 +37,8 @@ object ProcessItemArgs {
|
||||
direction: Option[Direction],
|
||||
sourceAbbrev: String,
|
||||
folderId: Option[Ident],
|
||||
validFileTypes: Seq[MimeType]
|
||||
validFileTypes: Seq[MimeType],
|
||||
skipDuplicate: Boolean
|
||||
)
|
||||
|
||||
object ProcessMeta {
|
||||
|
@ -0,0 +1,62 @@
|
||||
package docspell.joex.process
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.{Context, Task}
|
||||
import docspell.store.queries.QItem
|
||||
import docspell.store.records.RFileMeta
|
||||
|
||||
import bitpeace.FileMeta
|
||||
import doobie._
|
||||
|
||||
object DuplicateCheck {
|
||||
type Args = ProcessItemArgs
|
||||
|
||||
def apply[F[_]: Sync]: Task[F, Args, Args] =
|
||||
Task { ctx =>
|
||||
if (ctx.args.meta.skipDuplicate)
|
||||
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx)
|
||||
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
|
||||
}
|
||||
|
||||
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] =
|
||||
for {
|
||||
fileMetas <- findDuplicates(ctx)
|
||||
_ <- fileMetas.traverse(deleteDuplicate(ctx))
|
||||
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
|
||||
} yield ctx.args.copy(files =
|
||||
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id))
|
||||
)
|
||||
|
||||
private def deleteDuplicate[F[_]: Sync](
|
||||
ctx: Context[F, Args]
|
||||
)(fd: FileMetaDupes): F[Unit] = {
|
||||
val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name)
|
||||
if (fd.exists)
|
||||
ctx.logger
|
||||
.info(s"Deleting duplicate file ${fname}!") *> ctx.store.bitpeace
|
||||
.delete(fd.fm.id)
|
||||
.compile
|
||||
.drain
|
||||
else ().pure[F]
|
||||
}
|
||||
|
||||
private def findDuplicates[F[_]: Sync](
|
||||
ctx: Context[F, Args]
|
||||
): F[Vector[FileMetaDupes]] =
|
||||
ctx.store.transact(for {
|
||||
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
|
||||
dupes <- fileMetas.traverse(checkDuplicate(ctx))
|
||||
} yield dupes)
|
||||
|
||||
private def checkDuplicate[F[_]](
|
||||
ctx: Context[F, Args]
|
||||
)(fm: FileMeta): ConnectionIO[FileMetaDupes] =
|
||||
QItem
|
||||
.findByChecksum(fm.checksum, ctx.args.meta.collective)
|
||||
.map(v => FileMetaDupes(fm, v.nonEmpty))
|
||||
|
||||
case class FileMetaDupes(fm: FileMeta, exists: Boolean)
|
||||
}
|
@ -35,10 +35,18 @@ object ItemHandler {
|
||||
analyser: TextAnalyser[F],
|
||||
regexNer: RegexNerFile[F]
|
||||
): Task[F, Args, Unit] =
|
||||
CreateItem[F]
|
||||
.flatMap(itemStateTask(ItemState.Processing))
|
||||
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
|
||||
.map(_ => ())
|
||||
DuplicateCheck[F]
|
||||
.flatMap(args =>
|
||||
if (args.files.isEmpty) logNoFiles
|
||||
else {
|
||||
val create: Task[F, Args, ItemData] =
|
||||
CreateItem[F].contramap(_ => args.pure[F])
|
||||
create
|
||||
.flatMap(itemStateTask(ItemState.Processing))
|
||||
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
|
||||
.map(_ => ())
|
||||
}
|
||||
)
|
||||
|
||||
def itemStateTask[F[_]: Sync, A](
|
||||
state: ItemState
|
||||
@ -121,4 +129,10 @@ object ItemHandler {
|
||||
|
||||
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
||||
Task(_.logger.warn(msg))
|
||||
|
||||
private def logNoFiles[F[_]]: Task[F, Args, Unit] =
|
||||
logWarn(
|
||||
"No files to process! Either no files were given or duplicate check removed all."
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -90,7 +90,8 @@ object ReProcessItem {
|
||||
None, //direction
|
||||
"", //source-id
|
||||
None, //folder
|
||||
Seq.empty
|
||||
Seq.empty,
|
||||
false
|
||||
),
|
||||
Nil
|
||||
).pure[F]
|
||||
|
@ -254,7 +254,8 @@ object ScanMailboxTask {
|
||||
Some(dir),
|
||||
s"mailbox-${ctx.args.account.user.id}",
|
||||
args.itemFolder,
|
||||
Seq.empty
|
||||
Seq.empty,
|
||||
true
|
||||
)
|
||||
data = OUpload.UploadData(
|
||||
multiple = false,
|
||||
|
@ -3660,6 +3660,7 @@ components:
|
||||
description: DateTime
|
||||
type: integer
|
||||
format: date-time
|
||||
|
||||
ItemUploadMeta:
|
||||
description: |
|
||||
Meta information for an item upload. The user can specify some
|
||||
@ -3674,6 +3675,7 @@ components:
|
||||
|
||||
A folderId can be given, the item is placed into this folder
|
||||
after creation.
|
||||
|
||||
required:
|
||||
- multiple
|
||||
properties:
|
||||
@ -3686,6 +3688,10 @@ components:
|
||||
folder:
|
||||
type: string
|
||||
format: ident
|
||||
skipDuplicates:
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
Collective:
|
||||
description: |
|
||||
Information about a collective.
|
||||
|
@ -294,10 +294,21 @@ trait Conversions {
|
||||
.map(p => parseMeta(p.body))
|
||||
.map(fm =>
|
||||
fm.map(m =>
|
||||
(m.multiple, UploadMeta(m.direction, "webapp", m.folder, validFileTypes))
|
||||
(
|
||||
m.multiple,
|
||||
UploadMeta(
|
||||
m.direction,
|
||||
"webapp",
|
||||
m.folder,
|
||||
validFileTypes,
|
||||
m.skipDuplicates.getOrElse(false)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
.getOrElse((true, UploadMeta(None, "webapp", None, validFileTypes)).pure[F])
|
||||
.getOrElse(
|
||||
(true, UploadMeta(None, "webapp", None, validFileTypes, false)).pure[F]
|
||||
)
|
||||
|
||||
val files = mp.parts
|
||||
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
|
||||
|
@ -1,5 +1,8 @@
|
||||
package docspell.store.records
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.store.impl.Implicits._
|
||||
import docspell.store.impl._
|
||||
@ -33,6 +36,17 @@ object RFileMeta {
|
||||
selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
|
||||
}
|
||||
|
||||
def findByIds(ids: List[Ident]): ConnectionIO[Vector[FileMeta]] = {
|
||||
import bitpeace.sql._
|
||||
|
||||
NonEmptyList.fromList(ids) match {
|
||||
case Some(nel) =>
|
||||
selectSimple(Columns.all, table, Columns.id.isIn(nel)).query[FileMeta].to[Vector]
|
||||
case None =>
|
||||
Vector.empty[FileMeta].pure[ConnectionIO]
|
||||
}
|
||||
}
|
||||
|
||||
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
|
||||
import bitpeace.sql._
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user