diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 572a18bb..17cca3e0 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -38,10 +38,20 @@ object ConvertPdf { item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => - def convert(ra: RAttachment) = - findMime(ctx)(ra).flatMap(m => - convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) - ) + def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] = + isConverted(ctx)(ra).flatMap { + case true => + ctx.logger.info( + s"Conversion to pdf already done for attachment ${ra.name}." + ) *> + ctx.store + .transact(RAttachmentMeta.findById(ra.id)) + .map(rmOpt => (ra, rmOpt)) + case false => + findMime(ctx)(ra).flatMap(m => + convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m) + ) + } for { ras <- item.attachments.traverse(convert) @@ -51,6 +61,11 @@ object ConvertPdf { } + def isConverted[F[_]: Sync](ctx: Context[F, ProcessItemArgs])( + ra: RAttachment + ): F[Boolean] = + ctx.store.transact(RAttachmentSource.isConverted(ra.id)) + def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) .map(_.mimetype) diff --git a/modules/store/src/main/scala/docspell/store/impl/Column.scala b/modules/store/src/main/scala/docspell/store/impl/Column.scala index 578dd213..2357664b 100644 --- a/modules/store/src/main/scala/docspell/store/impl/Column.scala +++ b/modules/store/src/main/scala/docspell/store/impl/Column.scala @@ -44,6 +44,9 @@ case class Column(name: String, ns: String = "", alias: String = "") { def isNot[A: Put](value: A): Fragment = f ++ fr"<> $value" + def isNot(c: Column): Fragment = + f ++ fr"<>" ++ c.f + def isNull: Fragment = f ++ fr"is null" diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala index 72223180..d1cb79ea 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentMeta.scala @@ -46,6 +46,9 @@ object RAttachmentMeta { def exists(attachId: Ident): ConnectionIO[Boolean] = selectCount(id, table, id.is(attachId)).query[Int].unique.map(_ > 0) + def findById(attachId: Ident): ConnectionIO[Option[RAttachmentMeta]] = + selectSimple(all, table, id.is(attachId)).query[RAttachmentMeta].option + def upsert(v: RAttachmentMeta): ConnectionIO[Int] = for { n0 <- update(v) diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala index d732ecff..f67a805f 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentSource.scala @@ -48,6 +48,21 @@ object RAttachmentSource { .unique .map(_ > 0) + def isConverted(attachId: Ident): ConnectionIO[Boolean] = { + val sId = Columns.id.prefix("s") + val sFile = Columns.fileId.prefix("s") + val aId = RAttachment.Columns.id.prefix("a") + val aFile = RAttachment.Columns.fileId.prefix("a") + + val from = table ++ fr"s INNER JOIN" ++ + RAttachment.table ++ fr"a ON" ++ aId.is(sId) + + selectCount(aId, from, and(aId.is(attachId), aFile.isNot(sFile))) + .query[Int] + .unique + .map(_ > 0) + } + def delete(attachId: Ident): ConnectionIO[Int] = deleteFrom(table, id.is(attachId)).update.run