mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Skip pdf conversion if a converted file exists
For images the conversion also returns the extracted text. If this would have failed to be saved, it is extracted in the following text-extraction step.
This commit is contained in:
parent
b6f23b038a
commit
d4354b8b49
@ -38,10 +38,20 @@ object ConvertPdf {
|
||||
item: ItemData
|
||||
): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { ctx =>
|
||||
def convert(ra: RAttachment) =
|
||||
findMime(ctx)(ra).flatMap(m =>
|
||||
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
||||
)
|
||||
def convert(ra: RAttachment): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||
isConverted(ctx)(ra).flatMap {
|
||||
case true =>
|
||||
ctx.logger.info(
|
||||
s"Conversion to pdf already done for attachment ${ra.name}."
|
||||
) *>
|
||||
ctx.store
|
||||
.transact(RAttachmentMeta.findById(ra.id))
|
||||
.map(rmOpt => (ra, rmOpt))
|
||||
case false =>
|
||||
findMime(ctx)(ra).flatMap(m =>
|
||||
convertSafe(cfg, JsoupSanitizer.clean, ctx, item)(ra, m)
|
||||
)
|
||||
}
|
||||
|
||||
for {
|
||||
ras <- item.attachments.traverse(convert)
|
||||
@ -51,6 +61,11 @@ object ConvertPdf {
|
||||
|
||||
}
|
||||
|
||||
def isConverted[F[_]: Sync](ctx: Context[F, ProcessItemArgs])(
|
||||
ra: RAttachment
|
||||
): F[Boolean] =
|
||||
ctx.store.transact(RAttachmentSource.isConverted(ra.id))
|
||||
|
||||
def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] =
|
||||
OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId)))
|
||||
.map(_.mimetype)
|
||||
|
@ -44,6 +44,9 @@ case class Column(name: String, ns: String = "", alias: String = "") {
|
||||
def isNot[A: Put](value: A): Fragment =
|
||||
f ++ fr"<> $value"
|
||||
|
||||
def isNot(c: Column): Fragment =
|
||||
f ++ fr"<>" ++ c.f
|
||||
|
||||
def isNull: Fragment =
|
||||
f ++ fr"is null"
|
||||
|
||||
|
@ -46,6 +46,9 @@ object RAttachmentMeta {
|
||||
def exists(attachId: Ident): ConnectionIO[Boolean] =
|
||||
selectCount(id, table, id.is(attachId)).query[Int].unique.map(_ > 0)
|
||||
|
||||
def findById(attachId: Ident): ConnectionIO[Option[RAttachmentMeta]] =
|
||||
selectSimple(all, table, id.is(attachId)).query[RAttachmentMeta].option
|
||||
|
||||
def upsert(v: RAttachmentMeta): ConnectionIO[Int] =
|
||||
for {
|
||||
n0 <- update(v)
|
||||
|
@ -48,6 +48,21 @@ object RAttachmentSource {
|
||||
.unique
|
||||
.map(_ > 0)
|
||||
|
||||
def isConverted(attachId: Ident): ConnectionIO[Boolean] = {
|
||||
val sId = Columns.id.prefix("s")
|
||||
val sFile = Columns.fileId.prefix("s")
|
||||
val aId = RAttachment.Columns.id.prefix("a")
|
||||
val aFile = RAttachment.Columns.fileId.prefix("a")
|
||||
|
||||
val from = table ++ fr"s INNER JOIN" ++
|
||||
RAttachment.table ++ fr"a ON" ++ aId.is(sId)
|
||||
|
||||
selectCount(aId, from, and(aId.is(attachId), aFile.isNot(sFile)))
|
||||
.query[Int]
|
||||
.unique
|
||||
.map(_ > 0)
|
||||
}
|
||||
|
||||
def delete(attachId: Ident): ConnectionIO[Int] =
|
||||
deleteFrom(table, id.is(attachId)).update.run
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user