mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-10-31 09:30:12 +00:00 
			
		
		
		
	Prepend a duplicate check when uploading files
This commit is contained in:
		| @@ -59,7 +59,8 @@ object OUpload { | ||||
|       direction: Option[Direction], | ||||
|       sourceAbbrev: String, | ||||
|       folderId: Option[Ident], | ||||
|       validFileTypes: Seq[MimeType] | ||||
|       validFileTypes: Seq[MimeType], | ||||
|       skipDuplicates: Boolean | ||||
|   ) | ||||
|  | ||||
|   case class UploadData[F[_]]( | ||||
| @@ -125,7 +126,8 @@ object OUpload { | ||||
|             data.meta.direction, | ||||
|             data.meta.sourceAbbrev, | ||||
|             data.meta.folderId, | ||||
|             data.meta.validFileTypes | ||||
|             data.meta.validFileTypes, | ||||
|             data.meta.skipDuplicates | ||||
|           ) | ||||
|           args = | ||||
|             if (data.multiple) files.map(f => ProcessItemArgs(meta, List(f))) | ||||
|   | ||||
| @@ -37,7 +37,8 @@ object ProcessItemArgs { | ||||
|       direction: Option[Direction], | ||||
|       sourceAbbrev: String, | ||||
|       folderId: Option[Ident], | ||||
|       validFileTypes: Seq[MimeType] | ||||
|       validFileTypes: Seq[MimeType], | ||||
|       skipDuplicate: Boolean | ||||
|   ) | ||||
|  | ||||
|   object ProcessMeta { | ||||
|   | ||||
| @@ -0,0 +1,62 @@ | ||||
| package docspell.joex.process | ||||
|  | ||||
| import cats.effect._ | ||||
| import cats.implicits._ | ||||
|  | ||||
| import docspell.common._ | ||||
| import docspell.joex.scheduler.{Context, Task} | ||||
| import docspell.store.queries.QItem | ||||
| import docspell.store.records.RFileMeta | ||||
|  | ||||
| import bitpeace.FileMeta | ||||
| import doobie._ | ||||
|  | ||||
| object DuplicateCheck { | ||||
|   type Args = ProcessItemArgs | ||||
|  | ||||
|   def apply[F[_]: Sync]: Task[F, Args, Args] = | ||||
|     Task { ctx => | ||||
|       if (ctx.args.meta.skipDuplicate) | ||||
|         ctx.logger.debug("Checking for duplicate files") *> removeDuplicates(ctx) | ||||
|       else ctx.logger.debug("Not checking for duplicates") *> ctx.args.pure[F] | ||||
|     } | ||||
|  | ||||
|   def removeDuplicates[F[_]: Sync](ctx: Context[F, Args]): F[ProcessItemArgs] = | ||||
|     for { | ||||
|       fileMetas <- findDuplicates(ctx) | ||||
|       _         <- fileMetas.traverse(deleteDuplicate(ctx)) | ||||
|       ids = fileMetas.filter(_.exists).map(_.fm.id).toSet | ||||
|     } yield ctx.args.copy(files = | ||||
|       ctx.args.files.filterNot(f => ids.contains(f.fileMetaId.id)) | ||||
|     ) | ||||
|  | ||||
|   private def deleteDuplicate[F[_]: Sync]( | ||||
|       ctx: Context[F, Args] | ||||
|   )(fd: FileMetaDupes): F[Unit] = { | ||||
|     val fname = ctx.args.files.find(_.fileMetaId.id == fd.fm.id).flatMap(_.name) | ||||
|     if (fd.exists) | ||||
|       ctx.logger | ||||
|         .info(s"Deleting duplicate file ${fname}!") *> ctx.store.bitpeace | ||||
|         .delete(fd.fm.id) | ||||
|         .compile | ||||
|         .drain | ||||
|     else ().pure[F] | ||||
|   } | ||||
|  | ||||
|   private def findDuplicates[F[_]: Sync]( | ||||
|       ctx: Context[F, Args] | ||||
|   ): F[Vector[FileMetaDupes]] = | ||||
|     ctx.store.transact(for { | ||||
|       fileMetas <- RFileMeta.findByIds(ctx.args.files.map(_.fileMetaId)) | ||||
|       dupes     <- fileMetas.traverse(checkDuplicate(ctx)) | ||||
|     } yield dupes) | ||||
|  | ||||
|   private def checkDuplicate[F[_]]( | ||||
|       ctx: Context[F, Args] | ||||
|   )(fm: FileMeta): ConnectionIO[FileMetaDupes] = | ||||
|     QItem | ||||
|       .findByChecksum(fm.checksum, ctx.args.meta.collective) | ||||
|       .map(v => FileMetaDupes(fm, v.nonEmpty)) | ||||
|  | ||||
|   case class FileMetaDupes(fm: FileMeta, exists: Boolean) | ||||
| } | ||||
| @@ -35,10 +35,18 @@ object ItemHandler { | ||||
|       analyser: TextAnalyser[F], | ||||
|       regexNer: RegexNerFile[F] | ||||
|   ): Task[F, Args, Unit] = | ||||
|     CreateItem[F] | ||||
|       .flatMap(itemStateTask(ItemState.Processing)) | ||||
|       .flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer)) | ||||
|       .map(_ => ()) | ||||
|     DuplicateCheck[F] | ||||
|       .flatMap(args => | ||||
|         if (args.files.isEmpty) logNoFiles | ||||
|         else { | ||||
|           val create: Task[F, Args, ItemData] = | ||||
|             CreateItem[F].contramap(_ => args.pure[F]) | ||||
|           create | ||||
|             .flatMap(itemStateTask(ItemState.Processing)) | ||||
|             .flatMap(safeProcess[F](cfg, itemOps, fts, analyser, regexNer)) | ||||
|             .map(_ => ()) | ||||
|         } | ||||
|       ) | ||||
|  | ||||
|   def itemStateTask[F[_]: Sync, A]( | ||||
|       state: ItemState | ||||
| @@ -121,4 +129,10 @@ object ItemHandler { | ||||
|  | ||||
|   private def logWarn[F[_]](msg: => String): Task[F, Args, Unit] = | ||||
|     Task(_.logger.warn(msg)) | ||||
|  | ||||
|   private def logNoFiles[F[_]]: Task[F, Args, Unit] = | ||||
|     logWarn( | ||||
|       "No files to process! Either no files were given or duplicate check removed all." | ||||
|     ) | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -90,7 +90,8 @@ object ReProcessItem { | ||||
|               None, //direction | ||||
|               "",   //source-id | ||||
|               None, //folder | ||||
|               Seq.empty | ||||
|               Seq.empty, | ||||
|               false | ||||
|             ), | ||||
|             Nil | ||||
|           ).pure[F] | ||||
|   | ||||
| @@ -254,7 +254,8 @@ object ScanMailboxTask { | ||||
|           Some(dir), | ||||
|           s"mailbox-${ctx.args.account.user.id}", | ||||
|           args.itemFolder, | ||||
|           Seq.empty | ||||
|           Seq.empty, | ||||
|           true | ||||
|         ) | ||||
|         data = OUpload.UploadData( | ||||
|           multiple = false, | ||||
|   | ||||
| @@ -3660,6 +3660,7 @@ components: | ||||
|           description: DateTime | ||||
|           type: integer | ||||
|           format: date-time | ||||
|  | ||||
|     ItemUploadMeta: | ||||
|       description: | | ||||
|         Meta information for an item upload. The user can specify some | ||||
| @@ -3674,6 +3675,7 @@ components: | ||||
|  | ||||
|         A folderId can be given, the item is placed into this folder | ||||
|         after creation. | ||||
|  | ||||
|       required: | ||||
|         - multiple | ||||
|       properties: | ||||
| @@ -3686,6 +3688,10 @@ components: | ||||
|         folder: | ||||
|           type: string | ||||
|           format: ident | ||||
|         skipDuplicates: | ||||
|           type: boolean | ||||
|           default: false | ||||
|  | ||||
|     Collective: | ||||
|       description: | | ||||
|         Information about a collective. | ||||
|   | ||||
| @@ -294,10 +294,21 @@ trait Conversions { | ||||
|       .map(p => parseMeta(p.body)) | ||||
|       .map(fm => | ||||
|         fm.map(m => | ||||
|           (m.multiple, UploadMeta(m.direction, "webapp", m.folder, validFileTypes)) | ||||
|           ( | ||||
|             m.multiple, | ||||
|             UploadMeta( | ||||
|               m.direction, | ||||
|               "webapp", | ||||
|               m.folder, | ||||
|               validFileTypes, | ||||
|               m.skipDuplicates.getOrElse(false) | ||||
|             ) | ||||
|           ) | ||||
|         ) | ||||
|       ) | ||||
|       .getOrElse((true, UploadMeta(None, "webapp", None, validFileTypes)).pure[F]) | ||||
|       .getOrElse( | ||||
|         (true, UploadMeta(None, "webapp", None, validFileTypes, false)).pure[F] | ||||
|       ) | ||||
|  | ||||
|     val files = mp.parts | ||||
|       .filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta"))) | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| package docspell.store.records | ||||
|  | ||||
| import cats.data.NonEmptyList | ||||
| import cats.implicits._ | ||||
|  | ||||
| import docspell.common._ | ||||
| import docspell.store.impl.Implicits._ | ||||
| import docspell.store.impl._ | ||||
| @@ -33,6 +36,17 @@ object RFileMeta { | ||||
|     selectSimple(Columns.all, table, Columns.id.is(fid)).query[FileMeta].option | ||||
|   } | ||||
|  | ||||
|   def findByIds(ids: List[Ident]): ConnectionIO[Vector[FileMeta]] = { | ||||
|     import bitpeace.sql._ | ||||
|  | ||||
|     NonEmptyList.fromList(ids) match { | ||||
|       case Some(nel) => | ||||
|         selectSimple(Columns.all, table, Columns.id.isIn(nel)).query[FileMeta].to[Vector] | ||||
|       case None => | ||||
|         Vector.empty[FileMeta].pure[ConnectionIO] | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   def findMime(fid: Ident): ConnectionIO[Option[MimeType]] = { | ||||
|     import bitpeace.sql._ | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user