mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-28 17:55:06 +00:00
Create tasks to generate all previews
There is a task to generate preview images per attachment. It can either add them (if not present yet) or overwrite them (e.g. some config has changed). There is a task that selects all attachments without previews and submits a task to create it. This is submitted on start automatically to generate previews for all existing attachments.
This commit is contained in:
parent
eede194352
commit
709848244c
@ -0,0 +1,26 @@
|
|||||||
|
package docspell.common
|
||||||
|
|
||||||
|
import io.circe.generic.semiauto._
|
||||||
|
import io.circe.{Decoder, Encoder}
|
||||||
|
|
||||||
|
/** Arguments for the `AllPreviewsTask` that submits tasks to
|
||||||
|
* generates a preview image for attachments.
|
||||||
|
*
|
||||||
|
* It can replace the current preview image or only generate one, if
|
||||||
|
* it is missing. If no collective is specified, it considers all
|
||||||
|
* attachments.
|
||||||
|
*/
|
||||||
|
case class AllPreviewsArgs(
|
||||||
|
collective: Option[Ident],
|
||||||
|
storeMode: MakePreviewArgs.StoreMode
|
||||||
|
)
|
||||||
|
|
||||||
|
object AllPreviewsArgs {
|
||||||
|
|
||||||
|
val taskName = Ident.unsafe("all-previews")
|
||||||
|
|
||||||
|
implicit val jsonEncoder: Encoder[AllPreviewsArgs] =
|
||||||
|
deriveEncoder[AllPreviewsArgs]
|
||||||
|
implicit val jsonDecoder: Decoder[AllPreviewsArgs] =
|
||||||
|
deriveDecoder[AllPreviewsArgs]
|
||||||
|
}
|
@ -2,8 +2,8 @@ package docspell.common
|
|||||||
|
|
||||||
object DocspellSystem {
|
object DocspellSystem {
|
||||||
|
|
||||||
val user = Ident.unsafe("docspell-system")
|
val user = Ident.unsafe("docspell-system")
|
||||||
val taskGroup = user
|
val taskGroup = user
|
||||||
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
|
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
|
||||||
|
val allPreviewTaskTracker = Ident.unsafe("generate-all-previews")
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,53 @@
|
|||||||
|
package docspell.common
|
||||||
|
|
||||||
|
import io.circe.generic.semiauto._
|
||||||
|
import io.circe.{Decoder, Encoder}
|
||||||
|
|
||||||
|
/** Arguments for the `MakePreviewTask` that generates a preview image
|
||||||
|
* for an attachment.
|
||||||
|
*
|
||||||
|
* It can replace the current preview image or only generate one, if
|
||||||
|
* it is missing.
|
||||||
|
*/
|
||||||
|
case class MakePreviewArgs(
|
||||||
|
attachment: Ident,
|
||||||
|
store: MakePreviewArgs.StoreMode
|
||||||
|
)
|
||||||
|
|
||||||
|
object MakePreviewArgs {
|
||||||
|
|
||||||
|
val taskName = Ident.unsafe("make-preview")
|
||||||
|
|
||||||
|
sealed trait StoreMode extends Product {
|
||||||
|
final def name: String =
|
||||||
|
productPrefix.toLowerCase()
|
||||||
|
}
|
||||||
|
object StoreMode {
|
||||||
|
|
||||||
|
/** Replace any preview file that may already exist. */
|
||||||
|
case object Replace extends StoreMode
|
||||||
|
|
||||||
|
/** Only create a preview image, if it is missing. */
|
||||||
|
case object WhenMissing extends StoreMode
|
||||||
|
|
||||||
|
def fromString(str: String): Either[String, StoreMode] =
|
||||||
|
Option(str).map(_.trim.toLowerCase()) match {
|
||||||
|
case Some("replace") => Right(Replace)
|
||||||
|
case Some("whenmissing") => Right(WhenMissing)
|
||||||
|
case _ => Left(s"Invalid store mode: $str")
|
||||||
|
}
|
||||||
|
|
||||||
|
implicit val jsonEncoder: Encoder[StoreMode] =
|
||||||
|
Encoder.encodeString.contramap(_.name)
|
||||||
|
|
||||||
|
implicit val jsonDecoder: Decoder[StoreMode] =
|
||||||
|
Decoder.decodeString.emap(fromString)
|
||||||
|
}
|
||||||
|
|
||||||
|
implicit val jsonEncoder: Encoder[MakePreviewArgs] =
|
||||||
|
deriveEncoder[MakePreviewArgs]
|
||||||
|
|
||||||
|
implicit val jsonDecoder: Decoder[MakePreviewArgs] =
|
||||||
|
deriveDecoder[MakePreviewArgs]
|
||||||
|
|
||||||
|
}
|
@ -18,6 +18,7 @@ import docspell.joex.learn.LearnClassifierTask
|
|||||||
import docspell.joex.notify._
|
import docspell.joex.notify._
|
||||||
import docspell.joex.pdfconv.ConvertAllPdfTask
|
import docspell.joex.pdfconv.ConvertAllPdfTask
|
||||||
import docspell.joex.pdfconv.PdfConvTask
|
import docspell.joex.pdfconv.PdfConvTask
|
||||||
|
import docspell.joex.preview._
|
||||||
import docspell.joex.process.ItemHandler
|
import docspell.joex.process.ItemHandler
|
||||||
import docspell.joex.process.ReProcessItem
|
import docspell.joex.process.ReProcessItem
|
||||||
import docspell.joex.scanmailbox._
|
import docspell.joex.scanmailbox._
|
||||||
@ -68,7 +69,10 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
|
|||||||
HouseKeepingTask
|
HouseKeepingTask
|
||||||
.periodicTask[F](cfg.houseKeeping.schedule)
|
.periodicTask[F](cfg.houseKeeping.schedule)
|
||||||
.flatMap(pstore.insert) *>
|
.flatMap(pstore.insert) *>
|
||||||
MigrationTask.job.flatMap(queue.insertIfNew)
|
MigrationTask.job.flatMap(queue.insertIfNew) *>
|
||||||
|
AllPreviewsTask
|
||||||
|
.job(MakePreviewArgs.StoreMode.WhenMissing, None)
|
||||||
|
.flatMap(queue.insertIfNew)
|
||||||
}
|
}
|
||||||
|
|
||||||
object JoexAppImpl {
|
object JoexAppImpl {
|
||||||
@ -167,6 +171,20 @@ object JoexAppImpl {
|
|||||||
LearnClassifierTask.onCancel[F]
|
LearnClassifierTask.onCancel[F]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
.withTask(
|
||||||
|
JobTask.json(
|
||||||
|
MakePreviewArgs.taskName,
|
||||||
|
MakePreviewTask[F](cfg.convert),
|
||||||
|
MakePreviewTask.onCancel[F]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.withTask(
|
||||||
|
JobTask.json(
|
||||||
|
AllPreviewsArgs.taskName,
|
||||||
|
AllPreviewsTask[F](queue, joex),
|
||||||
|
AllPreviewsTask.onCancel[F]
|
||||||
|
)
|
||||||
|
)
|
||||||
.resource
|
.resource
|
||||||
psch <- PeriodicScheduler.create(
|
psch <- PeriodicScheduler.create(
|
||||||
cfg.periodicScheduler,
|
cfg.periodicScheduler,
|
||||||
|
@ -0,0 +1,86 @@
|
|||||||
|
package docspell.joex.preview
|
||||||
|
|
||||||
|
import fs2.{Chunk, Stream}
|
||||||
|
import docspell.common._
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
import docspell.store.queue.JobQueue
|
||||||
|
import docspell.backend.ops.OJoex
|
||||||
|
import docspell.joex.scheduler.Task
|
||||||
|
import docspell.joex.scheduler.Context
|
||||||
|
import docspell.store.records.RAttachment
|
||||||
|
import docspell.store.records.RJob
|
||||||
|
|
||||||
|
object AllPreviewsTask {
|
||||||
|
|
||||||
|
type Args = AllPreviewsArgs
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] =
|
||||||
|
Task { ctx =>
|
||||||
|
for {
|
||||||
|
_ <- ctx.logger.info("Generating previews for attachments")
|
||||||
|
n <- submitConversionJobs(ctx, queue)
|
||||||
|
_ <- ctx.logger.info(s"Submitted $n jobs")
|
||||||
|
_ <- joex.notifyAllNodes
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
|
||||||
|
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
|
||||||
|
Task.log(_.warn("Cancelling all-previews task"))
|
||||||
|
|
||||||
|
def submitConversionJobs[F[_]: Sync](
|
||||||
|
ctx: Context[F, Args],
|
||||||
|
queue: JobQueue[F]
|
||||||
|
): F[Int] =
|
||||||
|
ctx.store
|
||||||
|
.transact(RAttachment.findWithoutPreview(ctx.args.collective, 50))
|
||||||
|
.chunks
|
||||||
|
.flatMap(createJobs[F](ctx))
|
||||||
|
.chunks
|
||||||
|
.evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size))
|
||||||
|
.evalTap(n => ctx.logger.debug(s"Submitted $n jobs …"))
|
||||||
|
.compile
|
||||||
|
.foldMonoid
|
||||||
|
|
||||||
|
private def createJobs[F[_]: Sync](
|
||||||
|
ctx: Context[F, Args]
|
||||||
|
)(ras: Chunk[RAttachment]): Stream[F, RJob] = {
|
||||||
|
val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup)
|
||||||
|
|
||||||
|
def mkJob(ra: RAttachment): F[RJob] =
|
||||||
|
for {
|
||||||
|
id <- Ident.randomId[F]
|
||||||
|
now <- Timestamp.current[F]
|
||||||
|
} yield RJob.newJob(
|
||||||
|
id,
|
||||||
|
MakePreviewArgs.taskName,
|
||||||
|
collectiveOrSystem,
|
||||||
|
MakePreviewArgs(ra.id, ctx.args.storeMode),
|
||||||
|
s"Create preview ${ra.id.id}/${ra.name.getOrElse("-")}",
|
||||||
|
now,
|
||||||
|
collectiveOrSystem,
|
||||||
|
Priority.Low,
|
||||||
|
Some(MakePreviewArgs.taskName / ra.id)
|
||||||
|
)
|
||||||
|
|
||||||
|
val jobs = ras.traverse(mkJob)
|
||||||
|
Stream.evalUnChunk(jobs)
|
||||||
|
}
|
||||||
|
|
||||||
|
def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] =
|
||||||
|
for {
|
||||||
|
id <- Ident.randomId[F]
|
||||||
|
now <- Timestamp.current[F]
|
||||||
|
} yield RJob.newJob(
|
||||||
|
id,
|
||||||
|
AllPreviewsArgs.taskName,
|
||||||
|
cid.getOrElse(DocspellSystem.taskGroup),
|
||||||
|
AllPreviewsArgs(cid, storeMode),
|
||||||
|
"Create preview images",
|
||||||
|
now,
|
||||||
|
DocspellSystem.taskGroup,
|
||||||
|
Priority.Low,
|
||||||
|
Some(DocspellSystem.allPreviewTaskTracker)
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
package docspell.joex.preview
|
||||||
|
|
||||||
|
import cats.implicits._
|
||||||
|
import cats.effect._
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.joex.scheduler.Task
|
||||||
|
import docspell.store.records.RAttachmentPreview
|
||||||
|
import docspell.joex.scheduler.Context
|
||||||
|
import docspell.joex.process.AttachmentPreview
|
||||||
|
import docspell.convert.ConvertConfig
|
||||||
|
import docspell.extract.pdfbox.PdfboxPreview
|
||||||
|
import docspell.store.records.RAttachment
|
||||||
|
|
||||||
|
object MakePreviewTask {
|
||||||
|
|
||||||
|
type Args = MakePreviewArgs
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](cfg: ConvertConfig): Task[F, Args, Unit] =
|
||||||
|
Task { ctx =>
|
||||||
|
for {
|
||||||
|
exists <- previewExists(ctx)
|
||||||
|
preview <- PdfboxPreview(30)
|
||||||
|
_ <-
|
||||||
|
if (exists)
|
||||||
|
ctx.logger.info(
|
||||||
|
s"Preview already exists for attachment ${ctx.args.attachment}. Skipping."
|
||||||
|
)
|
||||||
|
else
|
||||||
|
ctx.logger.info(
|
||||||
|
s"Generating preview image for attachment ${ctx.args.attachment}"
|
||||||
|
) *> generatePreview(ctx, preview, cfg)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
|
||||||
|
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
|
||||||
|
Task.log(_.warn("Cancelling make-preview task"))
|
||||||
|
|
||||||
|
private def generatePreview[F[_]: Sync](
|
||||||
|
ctx: Context[F, Args],
|
||||||
|
preview: PdfboxPreview[F],
|
||||||
|
cfg: ConvertConfig
|
||||||
|
): F[Unit] =
|
||||||
|
for {
|
||||||
|
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
|
||||||
|
_ <- ra
|
||||||
|
.map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize))
|
||||||
|
.getOrElse(().pure[F])
|
||||||
|
} yield ()
|
||||||
|
|
||||||
|
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] =
|
||||||
|
if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing)
|
||||||
|
ctx.store.transact(
|
||||||
|
RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined)
|
||||||
|
)
|
||||||
|
else
|
||||||
|
false.pure[F]
|
||||||
|
}
|
@ -15,6 +15,7 @@ import docspell.store.records._
|
|||||||
import docspell.store.syntax.MimeTypes._
|
import docspell.store.syntax.MimeTypes._
|
||||||
|
|
||||||
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
|
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
|
||||||
|
import docspell.store.queries.QAttachment
|
||||||
|
|
||||||
/** Goes through all attachments that must be already converted into a
|
/** Goes through all attachments that must be already converted into a
|
||||||
* pdf. If it is a pdf, the first page is converted into a small
|
* pdf. If it is a pdf, the first page is converted into a small
|
||||||
@ -31,14 +32,14 @@ object AttachmentPreview {
|
|||||||
s"Creating preview images for ${item.attachments.size} files…"
|
s"Creating preview images for ${item.attachments.size} files…"
|
||||||
)
|
)
|
||||||
preview <- PdfboxPreview(24)
|
preview <- PdfboxPreview(24)
|
||||||
_ <- item.attachments.traverse(createPreview(ctx, preview, cfg))
|
_ <- item.attachments.traverse(createPreview(ctx, preview, cfg.chunkSize))
|
||||||
} yield item
|
} yield item
|
||||||
}
|
}
|
||||||
|
|
||||||
def createPreview[F[_]: Sync](
|
def createPreview[F[_]: Sync](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
preview: PdfboxPreview[F],
|
preview: PdfboxPreview[F],
|
||||||
cfg: ConvertConfig
|
chunkSize: Int
|
||||||
)(
|
)(
|
||||||
ra: RAttachment
|
ra: RAttachment
|
||||||
): F[Option[RAttachmentPreview]] =
|
): F[Option[RAttachmentPreview]] =
|
||||||
@ -46,7 +47,7 @@ object AttachmentPreview {
|
|||||||
case MimeType.PdfMatch(_) =>
|
case MimeType.PdfMatch(_) =>
|
||||||
preview.previewPNG(loadFile(ctx)(ra)).flatMap {
|
preview.previewPNG(loadFile(ctx)(ra)).flatMap {
|
||||||
case Some(out) =>
|
case Some(out) =>
|
||||||
createRecord(ctx, out, ra, cfg.chunkSize).map(_.some)
|
createRecord(ctx, out, ra, chunkSize).map(_.some)
|
||||||
case None =>
|
case None =>
|
||||||
(None: Option[RAttachmentPreview]).pure[F]
|
(None: Option[RAttachmentPreview]).pure[F]
|
||||||
}
|
}
|
||||||
@ -55,7 +56,7 @@ object AttachmentPreview {
|
|||||||
(None: Option[RAttachmentPreview]).pure[F]
|
(None: Option[RAttachmentPreview]).pure[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
def createRecord[F[_]: Sync](
|
private def createRecord[F[_]: Sync](
|
||||||
ctx: Context[F, _],
|
ctx: Context[F, _],
|
||||||
png: Stream[F, Byte],
|
png: Stream[F, Byte],
|
||||||
ra: RAttachment,
|
ra: RAttachment,
|
||||||
@ -75,6 +76,7 @@ object AttachmentPreview {
|
|||||||
.lastOrError
|
.lastOrError
|
||||||
now <- Timestamp.current[F]
|
now <- Timestamp.current[F]
|
||||||
rp = RAttachmentPreview(ra.id, Ident.unsafe(fileMeta.id), name.map(_.fullName), now)
|
rp = RAttachmentPreview(ra.id, Ident.unsafe(fileMeta.id), name.map(_.fullName), now)
|
||||||
|
_ <- QAttachment.deletePreview(ctx.store)(ra.id)
|
||||||
_ <- ctx.store.transact(RAttachmentPreview.insert(rp))
|
_ <- ctx.store.transact(RAttachmentPreview.insert(rp))
|
||||||
} yield rp
|
} yield rp
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,22 @@ import doobie.implicits._
|
|||||||
object QAttachment {
|
object QAttachment {
|
||||||
private[this] val logger = org.log4s.getLogger
|
private[this] val logger = org.log4s.getLogger
|
||||||
|
|
||||||
|
def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = {
|
||||||
|
val findPreview =
|
||||||
|
for {
|
||||||
|
rp <- RAttachmentPreview.findById(attachId)
|
||||||
|
} yield rp.toSeq
|
||||||
|
|
||||||
|
Stream
|
||||||
|
.evalSeq(store.transact(findPreview))
|
||||||
|
.map(_.fileId.id)
|
||||||
|
.flatMap(store.bitpeace.delete)
|
||||||
|
.map(flag => if (flag) 1 else 0)
|
||||||
|
.evalMap(_ => store.transact(RAttachmentPreview.delete(attachId)))
|
||||||
|
.compile
|
||||||
|
.foldMonoid
|
||||||
|
}
|
||||||
|
|
||||||
/** Deletes an attachment, its related source and meta data records.
|
/** Deletes an attachment, its related source and meta data records.
|
||||||
* It will only delete an related archive file, if this is the last
|
* It will only delete an related archive file, if this is the last
|
||||||
* attachment in that archive.
|
* attachment in that archive.
|
||||||
|
@ -231,6 +231,38 @@ object RAttachment {
|
|||||||
def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
|
def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
|
||||||
selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option
|
selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option
|
||||||
|
|
||||||
|
def findWithoutPreview(
|
||||||
|
coll: Option[Ident],
|
||||||
|
chunkSize: Int
|
||||||
|
): Stream[ConnectionIO, RAttachment] = {
|
||||||
|
val aId = Columns.id.prefix("a")
|
||||||
|
val aItem = Columns.itemId.prefix("a")
|
||||||
|
val pId = RAttachmentPreview.Columns.id.prefix("p")
|
||||||
|
val iId = RItem.Columns.id.prefix("i")
|
||||||
|
val iColl = RItem.Columns.cid.prefix("i")
|
||||||
|
|
||||||
|
val cols = all.map(_.prefix("a"))
|
||||||
|
val baseJoin =
|
||||||
|
table ++ fr"a LEFT OUTER JOIN" ++
|
||||||
|
RAttachmentPreview.table ++ fr"p ON" ++ pId.is(aId)
|
||||||
|
|
||||||
|
val baseCond =
|
||||||
|
Seq(pId.isNull)
|
||||||
|
|
||||||
|
coll match {
|
||||||
|
case Some(cid) =>
|
||||||
|
val join = baseJoin ++ fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem)
|
||||||
|
val cond = and(baseCond ++ Seq(iColl.is(cid)))
|
||||||
|
selectSimple(cols, join, cond)
|
||||||
|
.query[RAttachment]
|
||||||
|
.streamWithChunkSize(chunkSize)
|
||||||
|
case None =>
|
||||||
|
selectSimple(cols, baseJoin, and(baseCond))
|
||||||
|
.query[RAttachment]
|
||||||
|
.streamWithChunkSize(chunkSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
def findNonConvertedPdf(
|
def findNonConvertedPdf(
|
||||||
coll: Option[Ident],
|
coll: Option[Ident],
|
||||||
chunkSize: Int
|
chunkSize: Int
|
||||||
|
Loading…
x
Reference in New Issue
Block a user