mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Update pdfbox to 3.0.0
This commit is contained in:
@ -11,11 +11,12 @@ import cats.implicits._
|
||||
import fs2.Stream
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument
|
||||
import org.apache.pdfbox.{Loader => PdfboxLoader}
|
||||
|
||||
object PdfLoader {
|
||||
|
||||
private def readBytes1[F[_]: Sync](bytes: Array[Byte]): F[PDDocument] =
|
||||
Sync[F].delay(PDDocument.load(bytes))
|
||||
Sync[F].delay(PdfboxLoader.loadPDF(bytes))
|
||||
|
||||
private def closePDDocument[F[_]: Sync](pd: PDDocument): F[Unit] =
|
||||
Sync[F].delay(pd.close())
|
||||
|
@ -6,8 +6,6 @@
|
||||
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
import java.io.InputStream
|
||||
|
||||
import scala.util.{Try, Using}
|
||||
|
||||
import cats.effect.Sync
|
||||
@ -20,6 +18,7 @@ import docspell.extract.internal.Text
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument
|
||||
import org.apache.pdfbox.text.PDFTextStripper
|
||||
import org.apache.pdfbox.{Loader => PdfboxLoader}
|
||||
|
||||
object PdfboxExtract {
|
||||
|
||||
@ -44,11 +43,8 @@ object PdfboxExtract {
|
||||
.attempt
|
||||
.map(_.flatten)
|
||||
|
||||
def getText(is: InputStream): Either[Throwable, Text] =
|
||||
Using(PDDocument.load(is))(readText).toEither.flatten
|
||||
|
||||
def getText(inFile: Path): Either[Throwable, Text] =
|
||||
Using(PDDocument.load(inFile.toNioPath.toFile))(readText).toEither.flatten
|
||||
Using(PdfboxLoader.loadPDF(inFile.toNioPath.toFile))(readText).toEither.flatten
|
||||
|
||||
private def readText(doc: PDDocument): Either[Throwable, Text] =
|
||||
Try {
|
||||
@ -64,11 +60,8 @@ object PdfboxExtract {
|
||||
.attempt
|
||||
.map(_.flatten)
|
||||
|
||||
def getMetaData(is: InputStream): Either[Throwable, PdfMetaData] =
|
||||
Using(PDDocument.load(is))(readMetaData).toEither.flatten
|
||||
|
||||
def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] =
|
||||
Using(PDDocument.load(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
|
||||
Using(PdfboxLoader.loadPDF(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
|
||||
|
||||
private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] =
|
||||
Try {
|
||||
@ -83,7 +76,7 @@ object PdfboxExtract {
|
||||
mkValue(info.getKeywords),
|
||||
mkValue(info.getCreator),
|
||||
Option(info.getCreationDate).map(c => Timestamp(c.toInstant)),
|
||||
doc.getNumberOfPages()
|
||||
doc.getNumberOfPages
|
||||
)
|
||||
}.toEither
|
||||
}
|
||||
|
Reference in New Issue
Block a user