mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-01 04:52:52 +00:00
Prepend a duplicate check when uploading files
This commit is contained in:
parent
10c16bf319
commit
f6f63000be
@ -59,7 +59,8 @@ object OUpload {
|
|||||||
direction: Option[Direction],
|
direction: Option[Direction],
|
||||||
sourceAbbrev: String,
|
sourceAbbrev: String,
|
||||||
folderId: Option[Ident],
|
folderId: Option[Ident],
|
||||||
validFileTypes: Seq[MimeType]
|
validFileTypes: Seq[MimeType],
|
||||||
|
skipDuplicates: Boolean
|
||||||
)
|
)
|
||||||
|
|
||||||
case class UploadData[F[_]](
|
case class UploadData[F[_]](
|
||||||
@ -125,7 +126,8 @@ object OUpload {
|
|||||||
data.meta.direction,
|
data.meta.direction,
|
||||||
data.meta.sourceAbbrev,
|
data.meta.sourceAbbrev,
|
||||||
data.meta.folderId,
|
data.meta.folderId,
|
||||||
data.meta.validFileTypes
|
data.meta.validFileTypes,
|
||||||
|
data.meta.skipDuplicates
|
||||||
)
|
)
|
||||||
args =
|
args =
|
||||||
if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f)))
|
if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f)))
|
||||||
|
@ -37,7 +37,8 @@ object ProcessItemArgs {
|
|||||||
direction: Option[Direction],
|
direction: Option[Direction],
|
||||||
sourceAbbrev: String,
|
sourceAbbrev: String,
|
||||||
folderId: Option[Ident],
|
folderId: Option[Ident],
|
||||||
validFileTypes: Seq[MimeType]
|
validFileTypes: Seq[MimeType],
|
||||||
|
skipDuplicate: Boolean
|
||||||
)
|
)
|
||||||
|
|
||||||
object ProcessMeta {
|
object ProcessMeta {
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
package docspell.joex.process
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.joex.scheduler.{Context, Task}
|
||||||
|
import docspell.store.queries.QItem
|
||||||
|
import docspell.store.records.RFileMeta
|
||||||
|
|
||||||
|
import bitpeace.FileMeta
|
||||||
|
import doobie._
|
||||||
|
|
||||||
|
object DuplicateCheck {
|
||||||
|
type Args = ProcessItemArgs
|
||||||
|
|
||||||
|
def apply[F[_]: Sync]: Task[F, Args, Args] =
|
||||||
|
Task { ctx =>
|
||||||
|
if (ctx.args.meta.skipDuplicate)
|
||||||
|
ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx)
|
||||||
|
else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F]
|
||||||
|
}
|
||||||
|
|
||||||
|
def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] =
|
||||||
|
for {
|
||||||
|
fileMetas <- findDuplicates(ctx)
|
||||||
|
_ <- fileMetas.traverse(deleteDuplicate(ctx))
|
||||||
|
ids = fileMetas.filter(_.exists).map(_.fm.id).toSet
|
||||||
|
} yield ctx.args.copy(files =
|
||||||
|
ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id))
|
||||||
|
)
|
||||||
|
|
||||||
|
private def deleteDuplicate[F[_]: Sync](
|
||||||
|
ctx: Context[F, Args]
|
||||||
|
)(fd: FileMetaDupes): F[Unit] = {
|
||||||
|
val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name)
|
||||||
|
if (fd.exists)
|
||||||
|
ctx.logger
|
||||||
|
.info(s"Deleting duplicate file ${fname}!") *> ctx.store.bitpeace
|
||||||
|
.delete(fd.fm.id)
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
else ().pure[F]
|
||||||
|
}
|
||||||
|
|
||||||
|
private def findDuplicates[F[_]: Sync](
|
||||||
|
ctx: Context[F, Args]
|
||||||
|
): F[Vector[FileMetaDupes]] =
|
||||||
|
ctx.store.transact(for {
|
||||||
|
fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId))
|
||||||
|
dupes <- fileMetas.traverse(checkDuplicate(ctx))
|
||||||
|
} yield dupes)
|
||||||
|
|
||||||
|
private def checkDuplicate[F[_]](
|
||||||
|
ctx: Context[F, Args]
|
||||||
|
)(fm: FileMeta): ConnectionIO[FileMetaDupes] =
|
||||||
|
QItem
|
||||||
|
.findByChecksum(fm.checksum, ctx.args.meta.collective)
|
||||||
|
.map(v => FileMetaDupes(fm, v.nonEmpty))
|
||||||
|
|
||||||
|
case class FileMetaDupes(fm: FileMeta, exists: Boolean)
|
||||||
|
}
|
@ -35,10 +35,18 @@ object ItemHandler {
|
|||||||
analyser: TextAnalyser[F],
|
analyser: TextAnalyser[F],
|
||||||
regexNer: RegexNerFile[F]
|
regexNer: RegexNerFile[F]
|
||||||
): Task[F, Args, Unit] =
|
): Task[F, Args, Unit] =
|
||||||
CreateItem[F]
|
DuplicateCheck[F]
|
||||||
.flatMap(itemStateTask(ItemState.Processing))
|
.flatMap(args =>
|
||||||
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
|
if (args.files.isEmpty) logNoFiles
|
||||||
.map(_ => ())
|
else {
|
||||||
|
val create: Task[F, Args, ItemData] =
|
||||||
|
CreateItem[F].contramap(_ => args.pure[F])
|
||||||
|
create
|
||||||
|
.flatMap(itemStateTask(ItemState.Processing))
|
||||||
|
.flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer))
|
||||||
|
.map(_ => ())
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def itemStateTask[F[_]: Sync, A](
|
def itemStateTask[F[_]: Sync, A](
|
||||||
state: ItemState
|
state: ItemState
|
||||||
@ -121,4 +129,10 @@ object ItemHandler {
|
|||||||
|
|
||||||
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] =
|
||||||
Task(_.logger.warn(msg))
|
Task(_.logger.warn(msg))
|
||||||
|
|
||||||
|
private def logNoFiles[F[_]]: Task[F, Args, Unit] =
|
||||||
|
logWarn(
|
||||||
|
"No files to process! Either no files were given or duplicate check removed all."
|
||||||
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,8 @@ object ReProcessItem {
|
|||||||
None, //direction
|
None, //direction
|
||||||
"", //source-id
|
"", //source-id
|
||||||
None, //folder
|
None, //folder
|
||||||
Seq.empty
|
Seq.empty,
|
||||||
|
false
|
||||||
),
|
),
|
||||||
Nil
|
Nil
|
||||||
).pure[F]
|
).pure[F]
|
||||||
|
@ -254,7 +254,8 @@ object ScanMailboxTask {
|
|||||||
Some(dir),
|
Some(dir),
|
||||||
s"mailbox-${ctx.args.account.user.id}",
|
s"mailbox-${ctx.args.account.user.id}",
|
||||||
args.itemFolder,
|
args.itemFolder,
|
||||||
Seq.empty
|
Seq.empty,
|
||||||
|
true
|
||||||
)
|
)
|
||||||
data = OUpload.UploadData(
|
data = OUpload.UploadData(
|
||||||
multiple = false,
|
multiple = false,
|
||||||
|
@ -3660,6 +3660,7 @@ components:
|
|||||||
description: DateTime
|
description: DateTime
|
||||||
type: integer
|
type: integer
|
||||||
format: date-time
|
format: date-time
|
||||||
|
|
||||||
ItemUploadMeta:
|
ItemUploadMeta:
|
||||||
description: |
|
description: |
|
||||||
Meta information for an item upload. The user can specify some
|
Meta information for an item upload. The user can specify some
|
||||||
@ -3674,6 +3675,7 @@ components:
|
|||||||
|
|
||||||
A folderId can be given, the item is placed into this folder
|
A folderId can be given, the item is placed into this folder
|
||||||
after creation.
|
after creation.
|
||||||
|
|
||||||
required:
|
required:
|
||||||
- multiple
|
- multiple
|
||||||
properties:
|
properties:
|
||||||
@ -3686,6 +3688,10 @@ components:
|
|||||||
folder:
|
folder:
|
||||||
type: string
|
type: string
|
||||||
format: ident
|
format: ident
|
||||||
|
skipDuplicates:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
|
||||||
Collective:
|
Collective:
|
||||||
description: |
|
description: |
|
||||||
Information about a collective.
|
Information about a collective.
|
||||||
|
@ -294,10 +294,21 @@ trait Conversions {
|
|||||||
.map(p => parseMeta(p.body))
|
.map(p => parseMeta(p.body))
|
||||||
.map(fm =>
|
.map(fm =>
|
||||||
fm.map(m =>
|
fm.map(m =>
|
||||||
(m.multiple, UploadMeta(m.direction, "webapp", m.folder, validFileTypes))
|
(
|
||||||
|
m.multiple,
|
||||||
|
UploadMeta(
|
||||||
|
m.direction,
|
||||||
|
"webapp",
|
||||||
|
m.folder,
|
||||||
|
validFileTypes,
|
||||||
|
m.skipDuplicates.getOrElse(false)
|
||||||
|
)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.getOrElse((true, UploadMeta(None, "webapp", None, validFileTypes)).pure[F])
|
.getOrElse(
|
||||||
|
(true, UploadMeta(None, "webapp", None, validFileTypes, false)).pure[F]
|
||||||
|
)
|
||||||
|
|
||||||
val files = mp.parts
|
val files = mp.parts
|
||||||
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
|
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package docspell.store.records
|
package docspell.store.records
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.store.impl.Implicits._
|
import docspell.store.impl.Implicits._
|
||||||
import docspell.store.impl._
|
import docspell.store.impl._
|
||||||
@ -33,6 +36,17 @@ object RFileMeta {
|
|||||||
selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
|
selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def findByIds(ids: List[Ident]): ConnectionIO[Vector[FileMeta]] = {
|
||||||
|
import bitpeace.sql._
|
||||||
|
|
||||||
|
NonEmptyList.fromList(ids) match {
|
||||||
|
case Some(nel) =>
|
||||||
|
selectSimple(Columns.all, table, Columns.id.isIn(nel)).query[FileMeta].to[Vector]
|
||||||
|
case None =>
|
||||||
|
Vector.empty[FileMeta].pure[ConnectionIO]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
|
def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = {
|
||||||
import bitpeace.sql._
|
import bitpeace.sql._
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user