mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 10:59:33 +00:00
Merge pull request #1282 from eikek/fix/high-dpi
Allow subsampling when generating preview images
This commit is contained in:
commit
147b6f9023
@ -6,10 +6,13 @@
|
|||||||
|
|
||||||
package docspell.extract.pdfbox
|
package docspell.extract.pdfbox
|
||||||
|
|
||||||
|
import java.awt.RenderingHints
|
||||||
import java.awt.image.BufferedImage
|
import java.awt.image.BufferedImage
|
||||||
import java.awt.image.RenderedImage
|
import java.awt.image.RenderedImage
|
||||||
import javax.imageio.ImageIO
|
import javax.imageio.ImageIO
|
||||||
|
|
||||||
|
import scala.jdk.CollectionConverters._
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import fs2.Chunk
|
import fs2.Chunk
|
||||||
@ -17,6 +20,8 @@ import fs2.Stream
|
|||||||
|
|
||||||
import org.apache.commons.io.output.ByteArrayOutputStream
|
import org.apache.commons.io.output.ByteArrayOutputStream
|
||||||
import org.apache.pdfbox.pdmodel.PDDocument
|
import org.apache.pdfbox.pdmodel.PDDocument
|
||||||
|
import org.apache.pdfbox.pdmodel.PDPage
|
||||||
|
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject
|
||||||
import org.apache.pdfbox.rendering.PDFRenderer
|
import org.apache.pdfbox.rendering.PDFRenderer
|
||||||
import scodec.bits.ByteVector
|
import scodec.bits.ByteVector
|
||||||
|
|
||||||
@ -27,6 +32,7 @@ trait PdfboxPreview[F[_]] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object PdfboxPreview {
|
object PdfboxPreview {
|
||||||
|
private[this] val logger = org.log4s.getLogger
|
||||||
|
|
||||||
def apply[F[_]: Sync](cfg: PreviewConfig): F[PdfboxPreview[F]] =
|
def apply[F[_]: Sync](cfg: PreviewConfig): F[PdfboxPreview[F]] =
|
||||||
Sync[F].pure(new PdfboxPreview[F] {
|
Sync[F].pure(new PdfboxPreview[F] {
|
||||||
@ -50,14 +56,52 @@ object PdfboxPreview {
|
|||||||
if (count <= 0 || page < 0 || count <= page) None
|
if (count <= 0 || page < 0 || count <= page) None
|
||||||
else {
|
else {
|
||||||
val renderer = new PDFRenderer(pdoc)
|
val renderer = new PDFRenderer(pdoc)
|
||||||
|
renderer.setImageDownscalingOptimizationThreshold(0.85f)
|
||||||
|
val hints = new RenderingHints(
|
||||||
|
RenderingHints.KEY_RENDERING,
|
||||||
|
RenderingHints.VALUE_RENDER_QUALITY
|
||||||
|
)
|
||||||
|
hints.put(
|
||||||
|
RenderingHints.KEY_COLOR_RENDERING,
|
||||||
|
RenderingHints.VALUE_COLOR_RENDER_QUALITY
|
||||||
|
)
|
||||||
|
hints.put(
|
||||||
|
RenderingHints.KEY_INTERPOLATION,
|
||||||
|
RenderingHints.VALUE_INTERPOLATION_BICUBIC
|
||||||
|
)
|
||||||
|
hints.put(
|
||||||
|
RenderingHints.KEY_TEXT_ANTIALIASING,
|
||||||
|
RenderingHints.VALUE_TEXT_ANTIALIAS_ON
|
||||||
|
)
|
||||||
|
hints.put(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON)
|
||||||
|
renderer.setRenderingHints(hints)
|
||||||
|
renderer.setSubsamplingAllowed(enableSubsampling(pdoc.getPage(page)))
|
||||||
|
logger.debug(s"Converting pdf page $page to image at dpi $dpi")
|
||||||
Option(renderer.renderImageWithDPI(page, dpi))
|
Option(renderer.renderImageWithDPI(page, dpi))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def enableSubsampling(page: PDPage): Boolean = {
|
||||||
|
val res = page.getResources()
|
||||||
|
val largestImage =
|
||||||
|
res
|
||||||
|
.getXObjectNames()
|
||||||
|
.asScala
|
||||||
|
.map(name => res.getXObject(name))
|
||||||
|
.collect { case xobj: PDImageXObject => xobj }
|
||||||
|
.map { imgobj =>
|
||||||
|
val w = imgobj.getWidth()
|
||||||
|
val h = imgobj.getHeight()
|
||||||
|
logger.debug(s"Found image in pdf of size ${w}x${h} (${w * h}px)")
|
||||||
|
w * h
|
||||||
|
}
|
||||||
|
.maxOption
|
||||||
|
largestImage.exists(_ > 10 * 1024 * 1024)
|
||||||
|
}
|
||||||
|
|
||||||
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
|
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
|
||||||
val out = new ByteArrayOutputStream()
|
val out = new ByteArrayOutputStream()
|
||||||
ImageIO.write(img, "PNG", out)
|
ImageIO.write(img, "PNG", out)
|
||||||
Stream.chunk(Chunk.byteVector(ByteVector.view(out.toByteArray())))
|
Stream.chunk(Chunk.byteVector(ByteVector.view(out.toByteArray())))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user