mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Skip pdf conversion if a converted file exists
For images the conversion also returns the extracted text. If this would have failed to be saved, it is extracted in the following text-extraction step.
This commit is contained in:
parent
b6f23b038a
commit
d4354b8b49
@ -38,10 +38,20 @@ object ConvertPdf {
|
|||||||
item: ItemData
|
item: ItemData
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, ProcessItemArgs, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
def convert(ra: RAttachment) =
|
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||||
findMime(ctx)(ra).flatMap(m =>
|
isConverted(ctx)(ra).flatMap {
|
||||||
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
case true =>
|
||||||
)
|
ctx.logger.info(
|
||||||
|
s"Conversion to pdf already done for attachment ${ra.name}."
|
||||||
|
) *>
|
||||||
|
ctx.store
|
||||||
|
.transact(RAttachmentMeta.findById(ra.id))
|
||||||
|
.map(rmOpt => (ra, rmOpt))
|
||||||
|
case false =>
|
||||||
|
findMime(ctx)(ra).flatMap(m =>
|
||||||
|
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ras <- item.attachments.traverse(convert)
|
ras <- item.attachments.traverse(convert)
|
||||||
@ -51,6 +61,11 @@ object ConvertPdf {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def isConverted[F[_]: Sync](ctx: Context[F, ProcessItemArgs])(
|
||||||
|
ra: RAttachment
|
||||||
|
): F[Boolean] =
|
||||||
|
ctx.store.transact(RAttachmentSource.isConverted(ra.id))
|
||||||
|
|
||||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
||||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
|
@ -44,6 +44,9 @@ case class Column(name: String, ns: String = "", alias: String = "") {
|
|||||||
def isNot[A: Put](value: A): Fragment =
|
def isNot[A: Put](value: A): Fragment =
|
||||||
f ++ fr"<> $value"
|
f ++ fr"<> $value"
|
||||||
|
|
||||||
|
def isNot(c: Column): Fragment =
|
||||||
|
f ++ fr"<>" ++ c.f
|
||||||
|
|
||||||
def isNull: Fragment =
|
def isNull: Fragment =
|
||||||
f ++ fr"is null"
|
f ++ fr"is null"
|
||||||
|
|
||||||
|
@ -46,6 +46,9 @@ object RAttachmentMeta {
|
|||||||
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
||||||
selectCount(id, table, id.is(attachId)).query[Int].unique.map(_ > 0)
|
selectCount(id, table, id.is(attachId)).query[Int].unique.map(_ > 0)
|
||||||
|
|
||||||
|
def findById(attachId: Ident): ConnectionIO[Option[RAttachmentMeta]] =
|
||||||
|
selectSimple(all, table, id.is(attachId)).query[RAttachmentMeta].option
|
||||||
|
|
||||||
def upsert(v: RAttachmentMeta): ConnectionIO[Int] =
|
def upsert(v: RAttachmentMeta): ConnectionIO[Int] =
|
||||||
for {
|
for {
|
||||||
n0 <- update(v)
|
n0 <- update(v)
|
||||||
|
@ -48,6 +48,21 @@ object RAttachmentSource {
|
|||||||
.unique
|
.unique
|
||||||
.map(_ > 0)
|
.map(_ > 0)
|
||||||
|
|
||||||
|
def isConverted(attachId: Ident): ConnectionIO[Boolean] = {
|
||||||
|
val sId = Columns.id.prefix("s")
|
||||||
|
val sFile = Columns.fileId.prefix("s")
|
||||||
|
val aId = RAttachment.Columns.id.prefix("a")
|
||||||
|
val aFile = RAttachment.Columns.fileId.prefix("a")
|
||||||
|
|
||||||
|
val from = table ++ fr"s INNER JOIN" ++
|
||||||
|
RAttachment.table ++ fr"a ON" ++ aId.is(sId)
|
||||||
|
|
||||||
|
selectCount(aId, from, and(aId.is(attachId), aFile.isNot(sFile)))
|
||||||
|
.query[Int]
|
||||||
|
.unique
|
||||||
|
.map(_ > 0)
|
||||||
|
}
|
||||||
|
|
||||||
def delete(attachId: Ident): ConnectionIO[Int] =
|
def delete(attachId: Ident): ConnectionIO[Int] =
|
||||||
deleteFrom(table, id.is(attachId)).update.run
|
deleteFrom(table, id.is(attachId)).update.run
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user