mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-10-30 21:40:12 +00:00 
			
		
		
		
	Skip pdf conversion if a converted file exists
For images the conversion also returns the extracted text. If this would have failed to be saved, it is extracted in the following text-extraction step.
This commit is contained in:
		| @@ -38,10 +38,20 @@ object ConvertPdf { | ||||
|       item: ItemData | ||||
|   ): Task[F, ProcessItemArgs, ItemData] = | ||||
|     Task { ctx => | ||||
|       def convert(ra: RAttachment) = | ||||
|         findMime(ctx)(ra).flatMap(m => | ||||
|           convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) | ||||
|         ) | ||||
|       def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = | ||||
|         isConverted(ctx)(ra).flatMap { | ||||
|           case true => | ||||
|             ctx.logger.info( | ||||
|               s"Conversion to pdf already done for attachment ${ra.name}." | ||||
|             ) *> | ||||
|               ctx.store | ||||
|                 .transact(RAttachmentMeta.findById(ra.id)) | ||||
|                 .map(rmOpt => (ra, rmOpt)) | ||||
|           case false => | ||||
|             findMime(ctx)(ra).flatMap(m => | ||||
|               convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) | ||||
|             ) | ||||
|         } | ||||
|  | ||||
|       for { | ||||
|         ras <- item.attachments.traverse(convert) | ||||
| @@ -51,6 +61,11 @@ object ConvertPdf { | ||||
|  | ||||
|     } | ||||
|  | ||||
|   def isConverted[F[_]: Sync](ctx: Context[F, ProcessItemArgs])( | ||||
|       ra: RAttachment | ||||
|   ): F[Boolean] = | ||||
|     ctx.store.transact(RAttachmentSource.isConverted(ra.id)) | ||||
|  | ||||
|   def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = | ||||
|     OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) | ||||
|       .map(_.mimetype) | ||||
|   | ||||
| @@ -44,6 +44,9 @@ case class Column(name: String, ns: String = "", alias: String = "") { | ||||
|   def isNot[A: Put](value: A): Fragment = | ||||
|     f ++ fr"<> $value" | ||||
|  | ||||
|   def isNot(c: Column): Fragment = | ||||
|     f ++ fr"<>" ++ c.f | ||||
|  | ||||
|   def isNull: Fragment = | ||||
|     f ++ fr"is null" | ||||
|  | ||||
|   | ||||
| @@ -46,6 +46,9 @@ object RAttachmentMeta { | ||||
|   def exists(attachId: Ident): ConnectionIO[Boolean] = | ||||
|     selectCount(id, table, id.is(attachId)).query[Int].unique.map(_ > 0) | ||||
|  | ||||
|   def findById(attachId: Ident): ConnectionIO[Option[RAttachmentMeta]] = | ||||
|     selectSimple(all, table, id.is(attachId)).query[RAttachmentMeta].option | ||||
|  | ||||
|   def upsert(v: RAttachmentMeta): ConnectionIO[Int] = | ||||
|     for { | ||||
|       n0 <- update(v) | ||||
|   | ||||
| @@ -48,6 +48,21 @@ object RAttachmentSource { | ||||
|       .unique | ||||
|       .map(_ > 0) | ||||
|  | ||||
|   def isConverted(attachId: Ident): ConnectionIO[Boolean] = { | ||||
|     val sId   = Columns.id.prefix("s") | ||||
|     val sFile = Columns.fileId.prefix("s") | ||||
|     val aId   = RAttachment.Columns.id.prefix("a") | ||||
|     val aFile = RAttachment.Columns.fileId.prefix("a") | ||||
|  | ||||
|     val from = table ++ fr"s INNER JOIN" ++ | ||||
|       RAttachment.table ++ fr"a ON" ++ aId.is(sId) | ||||
|  | ||||
|     selectCount(aId, from, and(aId.is(attachId), aFile.isNot(sFile))) | ||||
|       .query[Int] | ||||
|       .unique | ||||
|       .map(_ > 0) | ||||
|   } | ||||
|  | ||||
|   def delete(attachId: Ident): ConnectionIO[Int] = | ||||
|     deleteFrom(table, id.is(attachId)).update.run | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user