mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Provide endpoints to submit tasks to re-generate previews
The scaling factor can be given in the config file. When this changes, images can be regenerated via POSTing to certain endpoints. It is possible to regenerate just one attachment preview or all within a collective.
This commit is contained in:
@ -172,6 +172,18 @@ docspell.joex {
|
||||
min-text-len = 500
|
||||
}
|
||||
|
||||
preview {
|
||||
# When rendering a pdf page, use this dpi. This results in
|
||||
# scaling the image. A standard A4 page rendered at 96dpi
|
||||
# results in roughly 790x1100px image. Using 32 results in
|
||||
# roughly 200x300px image.
|
||||
#
|
||||
# Note, when this is changed, you might want to re-generate
|
||||
# preview images. Check the api for this, there is an endpoint
|
||||
# to regenerate all for a collective.
|
||||
dpi = 32
|
||||
}
|
||||
|
||||
# Extracting text using OCR works for image and pdf files. It will
|
||||
# first run ghostscript to create a gray image from a pdf. Then
|
||||
# unpaper is run to optimize the image for the upcoming ocr, which
|
||||
|
@ -174,7 +174,7 @@ object JoexAppImpl {
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
MakePreviewArgs.taskName,
|
||||
MakePreviewTask[F](cfg.convert),
|
||||
MakePreviewTask[F](cfg.convert, cfg.extraction.preview),
|
||||
MakePreviewTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
|
@ -1,13 +1,16 @@
|
||||
package docspell.joex.preview
|
||||
|
||||
import fs2.{Chunk, Stream}
|
||||
import docspell.common._
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.store.queue.JobQueue
|
||||
import fs2.{Chunk, Stream}
|
||||
|
||||
import docspell.backend.JobFactory
|
||||
import docspell.backend.ops.OJoex
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.common.MakePreviewArgs.StoreMode
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.queue.JobQueue
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records.RJob
|
||||
|
||||
@ -33,7 +36,7 @@ object AllPreviewsTask {
|
||||
queue: JobQueue[F]
|
||||
): F[Int] =
|
||||
ctx.store
|
||||
.transact(RAttachment.findWithoutPreview(ctx.args.collective, 50))
|
||||
.transact(findAttachments(ctx))
|
||||
.chunks
|
||||
.flatMap(createJobs[F](ctx))
|
||||
.chunks
|
||||
@ -42,6 +45,14 @@ object AllPreviewsTask {
|
||||
.compile
|
||||
.foldMonoid
|
||||
|
||||
private def findAttachments[F[_]](ctx: Context[F, Args]) =
|
||||
ctx.args.storeMode match {
|
||||
case StoreMode.Replace =>
|
||||
RAttachment.findAll(ctx.args.collective, 50)
|
||||
case StoreMode.WhenMissing =>
|
||||
RAttachment.findWithoutPreview(ctx.args.collective, 50)
|
||||
}
|
||||
|
||||
private def createJobs[F[_]: Sync](
|
||||
ctx: Context[F, Args]
|
||||
)(ras: Chunk[RAttachment]): Stream[F, RJob] = {
|
||||
@ -68,19 +79,6 @@ object AllPreviewsTask {
|
||||
}
|
||||
|
||||
def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
AllPreviewsArgs.taskName,
|
||||
cid.getOrElse(DocspellSystem.taskGroup),
|
||||
AllPreviewsArgs(cid, storeMode),
|
||||
"Create preview images",
|
||||
now,
|
||||
DocspellSystem.taskGroup,
|
||||
Priority.Low,
|
||||
Some(DocspellSystem.allPreviewTaskTracker)
|
||||
)
|
||||
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None)
|
||||
|
||||
}
|
||||
|
@ -1,25 +1,27 @@
|
||||
package docspell.joex.preview
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachmentPreview
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.process.AttachmentPreview
|
||||
import docspell.convert.ConvertConfig
|
||||
import docspell.extract.pdfbox.PdfboxPreview
|
||||
import docspell.extract.pdfbox.PreviewConfig
|
||||
import docspell.joex.process.AttachmentPreview
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records.RAttachmentPreview
|
||||
|
||||
object MakePreviewTask {
|
||||
|
||||
type Args = MakePreviewArgs
|
||||
|
||||
def apply[F[_]: Sync](cfg: ConvertConfig): Task[F, Args, Unit] =
|
||||
def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
exists <- previewExists(ctx)
|
||||
preview <- PdfboxPreview(30)
|
||||
preview <- PdfboxPreview(pcfg)
|
||||
_ <-
|
||||
if (exists)
|
||||
ctx.logger.info(
|
||||
@ -44,7 +46,9 @@ object MakePreviewTask {
|
||||
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
|
||||
_ <- ra
|
||||
.map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize))
|
||||
.getOrElse(().pure[F])
|
||||
.getOrElse(
|
||||
ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}")
|
||||
)
|
||||
} yield ()
|
||||
|
||||
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] =
|
||||
|
@ -9,13 +9,14 @@ import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert._
|
||||
import docspell.extract.pdfbox.PdfboxPreview
|
||||
import docspell.extract.pdfbox.PreviewConfig
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.queries.QAttachment
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
|
||||
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
|
||||
import docspell.store.queries.QAttachment
|
||||
|
||||
/** Goes through all attachments that must be already converted into a
|
||||
* pdf. If it is a pdf, the first page is converted into a small
|
||||
@ -23,7 +24,7 @@ import docspell.store.queries.QAttachment
|
||||
*/
|
||||
object AttachmentPreview {
|
||||
|
||||
def apply[F[_]: Sync: ContextShift](cfg: ConvertConfig)(
|
||||
def apply[F[_]: Sync: ContextShift](cfg: ConvertConfig, pcfg: PreviewConfig)(
|
||||
item: ItemData
|
||||
): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { ctx =>
|
||||
@ -31,7 +32,7 @@ object AttachmentPreview {
|
||||
_ <- ctx.logger.info(
|
||||
s"Creating preview images for ${item.attachments.size} files…"
|
||||
)
|
||||
preview <- PdfboxPreview(24)
|
||||
preview <- PdfboxPreview(pcfg)
|
||||
_ <- item.attachments
|
||||
.traverse(createPreview(ctx, preview, cfg.chunkSize))
|
||||
.attempt
|
||||
|
@ -54,7 +54,7 @@ object ProcessItem {
|
||||
ConvertPdf(cfg.convert, item)
|
||||
.flatMap(Task.setProgress(progress._1))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(AttachmentPreview(cfg.convert))
|
||||
.flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview))
|
||||
.flatMap(Task.setProgress(progress._2))
|
||||
.flatMap(analysisOnly[F](cfg, analyser, regexNer))
|
||||
.flatMap(Task.setProgress(progress._3))
|
||||
|
Reference in New Issue
Block a user