mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-25 16:45:05 +00:00
Provide endpoints to submit tasks to re-generate previews
The scaling factor can be given in the config file. When this changes, images can be regenerated via POSTing to certain endpoints. It is possible to regenerate just one attachment preview or all within a collective.
This commit is contained in:
parent
6037b54959
commit
f4e50c5229
@ -8,6 +8,45 @@ import docspell.store.records.RJob
|
||||
|
||||
object JobFactory {
|
||||
|
||||
def makePreview[F[_]: Sync](
|
||||
args: MakePreviewArgs,
|
||||
account: Option[AccountId]
|
||||
): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
job = RJob.newJob(
|
||||
id,
|
||||
MakePreviewArgs.taskName,
|
||||
account.map(_.collective).getOrElse(DocspellSystem.taskGroup),
|
||||
args,
|
||||
s"Generate preview image",
|
||||
now,
|
||||
account.map(_.user).getOrElse(DocspellSystem.user),
|
||||
Priority.Low,
|
||||
Some(MakePreviewArgs.taskName / args.attachment)
|
||||
)
|
||||
} yield job
|
||||
|
||||
def allPreviews[F[_]: Sync](
|
||||
args: AllPreviewsArgs,
|
||||
submitter: Option[Ident]
|
||||
): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
AllPreviewsArgs.taskName,
|
||||
args.collective.getOrElse(DocspellSystem.taskGroup),
|
||||
args,
|
||||
"Create preview images",
|
||||
now,
|
||||
submitter.getOrElse(DocspellSystem.taskGroup),
|
||||
Priority.Low,
|
||||
Some(DocspellSystem.allPreviewTaskTracker)
|
||||
)
|
||||
|
||||
def convertAllPdfs[F[_]: Sync](
|
||||
collective: Option[Ident],
|
||||
account: AccountId,
|
||||
|
@ -4,9 +4,11 @@ import cats.effect.{Effect, Resource}
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.backend.JobFactory
|
||||
import docspell.backend.PasswordCrypt
|
||||
import docspell.backend.ops.OCollective._
|
||||
import docspell.common._
|
||||
import docspell.store.UpdateResult
|
||||
import docspell.store.queries.QCollective
|
||||
import docspell.store.queue.JobQueue
|
||||
import docspell.store.records._
|
||||
@ -51,6 +53,15 @@ trait OCollective[F[_]] {
|
||||
def findEnabledSource(sourceId: Ident): F[Option[RSource]]
|
||||
|
||||
def startLearnClassifier(collective: Ident): F[Unit]
|
||||
|
||||
/** Submits a task that (re)generates the preview images for all
|
||||
* attachments of the given collective.
|
||||
*/
|
||||
def generatePreviews(
|
||||
storeMode: MakePreviewArgs.StoreMode,
|
||||
account: AccountId,
|
||||
notifyJoex: Boolean
|
||||
): F[UpdateResult]
|
||||
}
|
||||
|
||||
object OCollective {
|
||||
@ -210,5 +221,20 @@ object OCollective {
|
||||
|
||||
def findEnabledSource(sourceId: Ident): F[Option[RSource]] =
|
||||
store.transact(RSource.findEnabled(sourceId))
|
||||
|
||||
def generatePreviews(
|
||||
storeMode: MakePreviewArgs.StoreMode,
|
||||
account: AccountId,
|
||||
notifyJoex: Boolean
|
||||
): F[UpdateResult] =
|
||||
for {
|
||||
job <- JobFactory.allPreviews[F](
|
||||
AllPreviewsArgs(Some(account.collective), storeMode),
|
||||
Some(account.user)
|
||||
)
|
||||
_ <- queue.insertIfNew(job)
|
||||
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
|
||||
} yield UpdateResult.success
|
||||
|
||||
})
|
||||
}
|
||||
|
@ -175,6 +175,15 @@ trait OItem[F[_]] {
|
||||
account: AccountId,
|
||||
notifyJoex: Boolean
|
||||
): F[UpdateResult]
|
||||
|
||||
/** Submits a task that (re)generates the preview image for an
|
||||
* attachment.
|
||||
*/
|
||||
def generatePreview(
|
||||
args: MakePreviewArgs,
|
||||
account: AccountId,
|
||||
notifyJoex: Boolean
|
||||
): F[UpdateResult]
|
||||
}
|
||||
|
||||
object OItem {
|
||||
@ -656,6 +665,17 @@ object OItem {
|
||||
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
|
||||
} yield UpdateResult.success
|
||||
|
||||
def generatePreview(
|
||||
args: MakePreviewArgs,
|
||||
account: AccountId,
|
||||
notifyJoex: Boolean
|
||||
): F[UpdateResult] =
|
||||
for {
|
||||
job <- JobFactory.makePreview[F](args, account.some)
|
||||
_ <- queue.insertIfNew(job)
|
||||
_ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F]
|
||||
} yield UpdateResult.success
|
||||
|
||||
private def onSuccessIgnoreError(update: F[Unit])(ar: UpdateResult): F[Unit] =
|
||||
ar match {
|
||||
case UpdateResult.Success =>
|
||||
|
@ -18,6 +18,12 @@ object MakePreviewArgs {
|
||||
|
||||
val taskName = Ident.unsafe("make-preview")
|
||||
|
||||
def replace(attach: Ident): MakePreviewArgs =
|
||||
MakePreviewArgs(attach, StoreMode.Replace)
|
||||
|
||||
def whenMissing(attach: Ident): MakePreviewArgs =
|
||||
MakePreviewArgs(attach, StoreMode.WhenMissing)
|
||||
|
||||
sealed trait StoreMode extends Product {
|
||||
final def name: String =
|
||||
productPrefix.toLowerCase()
|
||||
|
@ -1,5 +1,6 @@
|
||||
package docspell.extract
|
||||
|
||||
import docspell.extract.ocr.OcrConfig
|
||||
import docspell.extract.pdfbox.PreviewConfig
|
||||
|
||||
case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig)
|
||||
case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig, preview: PreviewConfig)
|
||||
|
@ -21,11 +21,13 @@ trait PdfboxPreview[F[_]] {
|
||||
|
||||
object PdfboxPreview {
|
||||
|
||||
def apply[F[_]: Sync](dpi: Float): F[PdfboxPreview[F]] =
|
||||
def apply[F[_]: Sync](cfg: PreviewConfig): F[PdfboxPreview[F]] =
|
||||
Sync[F].pure(new PdfboxPreview[F] {
|
||||
|
||||
def previewImage(pdf: Stream[F, Byte]): F[Option[BufferedImage]] =
|
||||
PdfLoader.withDocumentStream(pdf)(doc => Sync[F].delay(getPageImage(doc, 0, dpi)))
|
||||
PdfLoader.withDocumentStream(pdf)(doc =>
|
||||
Sync[F].delay(getPageImage(doc, 0, cfg.dpi))
|
||||
)
|
||||
|
||||
def previewPNG(pdf: Stream[F, Byte]): F[Option[Stream[F, Byte]]] =
|
||||
previewImage(pdf).map(_.map(pngStream[F]))
|
||||
|
@ -0,0 +1,3 @@
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
case class PreviewConfig(dpi: Float)
|
@ -21,7 +21,7 @@ object PdfboxPreviewTest extends SimpleTestSuite {
|
||||
val data = file.readURL[IO](8192, blocker)
|
||||
val sha256out =
|
||||
Stream
|
||||
.eval(PdfboxPreview[IO](48))
|
||||
.eval(PdfboxPreview[IO](PreviewConfig(48)))
|
||||
.evalMap(_.previewPNG(data))
|
||||
.flatMap(_.get)
|
||||
.through(fs2.hash.sha256)
|
||||
|
@ -172,6 +172,18 @@ docspell.joex {
|
||||
min-text-len = 500
|
||||
}
|
||||
|
||||
preview {
|
||||
# When rendering a pdf page, use this dpi. This results in
|
||||
# scaling the image. A standard A4 page rendered at 96dpi
|
||||
# results in roughly 790x1100px image. Using 32 results in
|
||||
# roughly 200x300px image.
|
||||
#
|
||||
# Note, when this is changed, you might want to re-generate
|
||||
# preview images. Check the api for this, there is an endpoint
|
||||
# to regenerate all for a collective.
|
||||
dpi = 32
|
||||
}
|
||||
|
||||
# Extracting text using OCR works for image and pdf files. It will
|
||||
# first run ghostscript to create a gray image from a pdf. Then
|
||||
# unpaper is run to optimize the image for the upcoming ocr, which
|
||||
|
@ -174,7 +174,7 @@ object JoexAppImpl {
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
MakePreviewArgs.taskName,
|
||||
MakePreviewTask[F](cfg.convert),
|
||||
MakePreviewTask[F](cfg.convert, cfg.extraction.preview),
|
||||
MakePreviewTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
|
@ -1,13 +1,16 @@
|
||||
package docspell.joex.preview
|
||||
|
||||
import fs2.{Chunk, Stream}
|
||||
import docspell.common._
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.store.queue.JobQueue
|
||||
import fs2.{Chunk, Stream}
|
||||
|
||||
import docspell.backend.JobFactory
|
||||
import docspell.backend.ops.OJoex
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.common.MakePreviewArgs.StoreMode
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.queue.JobQueue
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records.RJob
|
||||
|
||||
@ -33,7 +36,7 @@ object AllPreviewsTask {
|
||||
queue: JobQueue[F]
|
||||
): F[Int] =
|
||||
ctx.store
|
||||
.transact(RAttachment.findWithoutPreview(ctx.args.collective, 50))
|
||||
.transact(findAttachments(ctx))
|
||||
.chunks
|
||||
.flatMap(createJobs[F](ctx))
|
||||
.chunks
|
||||
@ -42,6 +45,14 @@ object AllPreviewsTask {
|
||||
.compile
|
||||
.foldMonoid
|
||||
|
||||
private def findAttachments[F[_]](ctx: Context[F, Args]) =
|
||||
ctx.args.storeMode match {
|
||||
case StoreMode.Replace =>
|
||||
RAttachment.findAll(ctx.args.collective, 50)
|
||||
case StoreMode.WhenMissing =>
|
||||
RAttachment.findWithoutPreview(ctx.args.collective, 50)
|
||||
}
|
||||
|
||||
private def createJobs[F[_]: Sync](
|
||||
ctx: Context[F, Args]
|
||||
)(ras: Chunk[RAttachment]): Stream[F, RJob] = {
|
||||
@ -68,19 +79,6 @@ object AllPreviewsTask {
|
||||
}
|
||||
|
||||
def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
AllPreviewsArgs.taskName,
|
||||
cid.getOrElse(DocspellSystem.taskGroup),
|
||||
AllPreviewsArgs(cid, storeMode),
|
||||
"Create preview images",
|
||||
now,
|
||||
DocspellSystem.taskGroup,
|
||||
Priority.Low,
|
||||
Some(DocspellSystem.allPreviewTaskTracker)
|
||||
)
|
||||
JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None)
|
||||
|
||||
}
|
||||
|
@ -1,25 +1,27 @@
|
||||
package docspell.joex.preview
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachmentPreview
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.process.AttachmentPreview
|
||||
import docspell.convert.ConvertConfig
|
||||
import docspell.extract.pdfbox.PdfboxPreview
|
||||
import docspell.extract.pdfbox.PreviewConfig
|
||||
import docspell.joex.process.AttachmentPreview
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records.RAttachmentPreview
|
||||
|
||||
object MakePreviewTask {
|
||||
|
||||
type Args = MakePreviewArgs
|
||||
|
||||
def apply[F[_]: Sync](cfg: ConvertConfig): Task[F, Args, Unit] =
|
||||
def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
exists <- previewExists(ctx)
|
||||
preview <- PdfboxPreview(30)
|
||||
preview <- PdfboxPreview(pcfg)
|
||||
_ <-
|
||||
if (exists)
|
||||
ctx.logger.info(
|
||||
@ -44,7 +46,9 @@ object MakePreviewTask {
|
||||
ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment))
|
||||
_ <- ra
|
||||
.map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize))
|
||||
.getOrElse(().pure[F])
|
||||
.getOrElse(
|
||||
ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}")
|
||||
)
|
||||
} yield ()
|
||||
|
||||
private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] =
|
||||
|
@ -9,13 +9,14 @@ import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert._
|
||||
import docspell.extract.pdfbox.PdfboxPreview
|
||||
import docspell.extract.pdfbox.PreviewConfig
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.queries.QAttachment
|
||||
import docspell.store.records.RAttachment
|
||||
import docspell.store.records._
|
||||
import docspell.store.syntax.MimeTypes._
|
||||
|
||||
import bitpeace.{Mimetype, MimetypeHint, RangeDef}
|
||||
import docspell.store.queries.QAttachment
|
||||
|
||||
/** Goes through all attachments that must be already converted into a
|
||||
* pdf. If it is a pdf, the first page is converted into a small
|
||||
@ -23,7 +24,7 @@ import docspell.store.queries.QAttachment
|
||||
*/
|
||||
object AttachmentPreview {
|
||||
|
||||
def apply[F[_]: Sync: ContextShift](cfg: ConvertConfig)(
|
||||
def apply[F[_]: Sync: ContextShift](cfg: ConvertConfig, pcfg: PreviewConfig)(
|
||||
item: ItemData
|
||||
): Task[F, ProcessItemArgs, ItemData] =
|
||||
Task { ctx =>
|
||||
@ -31,7 +32,7 @@ object AttachmentPreview {
|
||||
_ <- ctx.logger.info(
|
||||
s"Creating preview images for ${item.attachments.size} files…"
|
||||
)
|
||||
preview <- PdfboxPreview(24)
|
||||
preview <- PdfboxPreview(pcfg)
|
||||
_ <- item.attachments
|
||||
.traverse(createPreview(ctx, preview, cfg.chunkSize))
|
||||
.attempt
|
||||
|
@ -54,7 +54,7 @@ object ProcessItem {
|
||||
ConvertPdf(cfg.convert, item)
|
||||
.flatMap(Task.setProgress(progress._1))
|
||||
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||
.flatMap(AttachmentPreview(cfg.convert))
|
||||
.flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview))
|
||||
.flatMap(Task.setProgress(progress._2))
|
||||
.flatMap(analysisOnly[F](cfg, analyser, regexNer))
|
||||
.flatMap(Task.setProgress(progress._3))
|
||||
|
@ -2526,6 +2526,24 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
post:
|
||||
tags: [ Attachment ]
|
||||
summary: (Re)generate a preview image.
|
||||
description: |
|
||||
Submits a task that generates a preview image for this
|
||||
attachment. The existing preview will be replaced.
|
||||
security:
|
||||
- authTokenHeader: []
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/id"
|
||||
responses:
|
||||
200:
|
||||
description: Ok
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/BasicResult"
|
||||
|
||||
/sec/attachment/{id}/meta:
|
||||
get:
|
||||
tags: [ Attachment ]
|
||||
|
@ -7,6 +7,7 @@ import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.backend.ops._
|
||||
import docspell.common.Ident
|
||||
import docspell.common.MakePreviewArgs
|
||||
import docspell.restapi.model._
|
||||
import docspell.restserver.conv.Conversions
|
||||
import docspell.restserver.http4s.BinaryUtil
|
||||
@ -129,6 +130,18 @@ object AttachmentRoutes {
|
||||
.getOrElse(NotFound(BasicResult(false, "Not found")))
|
||||
} yield resp
|
||||
|
||||
case POST -> Root / Ident(id) / "preview" =>
|
||||
for {
|
||||
res <- backend.item.generatePreview(
|
||||
MakePreviewArgs.replace(id),
|
||||
user.account,
|
||||
true
|
||||
)
|
||||
resp <- Ok(
|
||||
Conversions.basicResult(res, "Generating preview image task submitted.")
|
||||
)
|
||||
} yield resp
|
||||
|
||||
case GET -> Root / Ident(id) / "view" =>
|
||||
// this route exists to provide a stable url
|
||||
// it redirects currently to viewerjs
|
||||
|
@ -6,6 +6,7 @@ import cats.implicits._
|
||||
import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.backend.ops.OCollective
|
||||
import docspell.common.MakePreviewArgs
|
||||
import docspell.restapi.model._
|
||||
import docspell.restserver.conv.Conversions
|
||||
import docspell.restserver.http4s._
|
||||
@ -94,6 +95,18 @@ object CollectiveRoutes {
|
||||
resp <- Ok(BasicResult(true, "Task submitted"))
|
||||
} yield resp
|
||||
|
||||
case POST -> Root / "previews" =>
|
||||
for {
|
||||
res <- backend.collective.generatePreviews(
|
||||
MakePreviewArgs.StoreMode.Replace,
|
||||
user.account,
|
||||
true
|
||||
)
|
||||
resp <- Ok(
|
||||
Conversions.basicResult(res, "Generate all previews task submitted.")
|
||||
)
|
||||
} yield resp
|
||||
|
||||
case GET -> Root =>
|
||||
for {
|
||||
collDb <- backend.collective.find(user.account.collective)
|
||||
|
@ -26,9 +26,9 @@ object QAttachment {
|
||||
Stream
|
||||
.evalSeq(store.transact(findPreview))
|
||||
.map(_.fileId.id)
|
||||
.evalTap(_ => store.transact(RAttachmentPreview.delete(attachId)))
|
||||
.flatMap(store.bitpeace.delete)
|
||||
.map(flag => if (flag) 1 else 0)
|
||||
.evalMap(_ => store.transact(RAttachmentPreview.delete(attachId)))
|
||||
.compile
|
||||
.foldMonoid
|
||||
}
|
||||
|
@ -231,6 +231,30 @@ object RAttachment {
|
||||
def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
|
||||
selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option
|
||||
|
||||
def findAll(
|
||||
coll: Option[Ident],
|
||||
chunkSize: Int
|
||||
): Stream[ConnectionIO, RAttachment] = {
|
||||
val aItem = Columns.itemId.prefix("a")
|
||||
val iId = RItem.Columns.id.prefix("i")
|
||||
val iColl = RItem.Columns.cid.prefix("i")
|
||||
|
||||
val cols = all.map(_.prefix("a"))
|
||||
|
||||
coll match {
|
||||
case Some(cid) =>
|
||||
val join = table ++ fr"a INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem)
|
||||
val cond = iColl.is(cid)
|
||||
selectSimple(cols, join, cond)
|
||||
.query[RAttachment]
|
||||
.streamWithChunkSize(chunkSize)
|
||||
case None =>
|
||||
selectSimple(cols, table, Fragment.empty)
|
||||
.query[RAttachment]
|
||||
.streamWithChunkSize(chunkSize)
|
||||
}
|
||||
}
|
||||
|
||||
def findWithoutPreview(
|
||||
coll: Option[Ident],
|
||||
chunkSize: Int
|
||||
|
@ -97,7 +97,7 @@
|
||||
background: #fff;
|
||||
}
|
||||
.default-layout img.preview-image {
|
||||
max-width: 200px;
|
||||
max-width: 160px;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user