Add simple pdf page preview function

This commit is contained in:
Eike Kettner
2020-11-07 23:27:31 +01:00
parent 36317a3a95
commit 350a271b22
3 changed files with 124 additions and 0 deletions

View File

@ -0,0 +1,24 @@
package docspell.extract.pdfbox
import cats.effect._
import cats.implicits._
import fs2.Stream
import org.apache.pdfbox.pdmodel.PDDocument
object PdfLoader {
private def readBytes1[F[_]: Sync](bytes: Array[Byte]): F[PDDocument] =
Sync[F].delay(PDDocument.load(bytes))
private def closePDDocument[F[_]: Sync](pd: PDDocument): F[Unit] =
Sync[F].delay(pd.close())
def withDocumentBytes[F[_]: Sync, A](pdf: Array[Byte])(f: PDDocument => F[A]): F[A] =
Sync[F].bracket(readBytes1(pdf))(f)(pd => closePDDocument(pd))
def withDocumentStream[F[_]: Sync, A](pdf: Stream[F, Byte])(
f: PDDocument => F[A]
): F[A] =
pdf.compile.to(Array).flatMap(bytes => withDocumentBytes(bytes)(f))
}

View File

@ -0,0 +1,54 @@
package docspell.extract.pdfbox
import java.awt.image.BufferedImage
import java.awt.image.RenderedImage
import javax.imageio.ImageIO
import cats.effect._
import cats.implicits._
import fs2.Chunk
import fs2.Stream
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.rendering.PDFRenderer
trait PdfboxPreview[F[_]] {
def previewPNG(pdf: Stream[F, Byte]): F[Option[Stream[F, Byte]]]
}
object PdfboxPreview {
def apply[F[_]: Sync](dpi: Float): F[PdfboxPreview[F]] =
Sync[F].pure(new PdfboxPreview[F] {
def previewImage(pdf: Stream[F, Byte]): F[Option[BufferedImage]] =
PdfLoader.withDocumentStream(pdf)(doc => Sync[F].delay(getPageImage(doc, 0, dpi)))
def previewPNG(pdf: Stream[F, Byte]): F[Option[Stream[F, Byte]]] =
previewImage(pdf).map(_.map(pngStream[F]))
})
private def getPageImage(
pdoc: PDDocument,
page: Int,
dpi: Float
): Option[BufferedImage] = {
val count = pdoc.getNumberOfPages
if (count <= 0 || page < 0 || count <= page) None
else {
val renderer = new PDFRenderer(pdoc)
Option(renderer.renderImageWithDPI(page, dpi))
}
}
private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = {
val out = new ByteArrayOutputStream()
ImageIO.write(img, "PNG", out)
Stream.chunk(Chunk.bytes(out.toByteArray()))
}
}