Use passwords when reading PDFs

This commit is contained in:
eikek
2021-09-30 11:11:08 +02:00
parent f74624485f
commit aa8f3b82fc
9 changed files with 73 additions and 37 deletions

View File

@ -595,8 +595,12 @@ Docpell Update Check
# docspell needs to read it. It also requires to specify a
# password here. All passwords are tried when reading a PDF.
#
# This is enabled by default, using an empty password list. This
# This is enabled by default with an empty password list. This
# removes protection from PDFs, which is better for processing.
#
# Passwords can be given here and each collective can maintain
# their passwords as well. But if the `enabled` setting below is
# `false`, then no attempt at decrypting is done.
decrypt-pdf = {
enabled = true
passwords = []

View File

@ -77,17 +77,27 @@ object ConvertPdf {
ctx: Context[F, ProcessItemArgs],
item: ItemData
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
mime match {
case mt =>
val data = ctx.store.fileStore.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item)
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
data
)
loadCollectivePasswords(ctx).flatMap(collPass =>
Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
mime match {
case mt =>
val data = ctx.store.fileStore.getBytes(ra.fileId)
val handler = conversionHandler[F](ctx, cfg, ra, item)
ctx.logger
.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
data
)
}
}
}
)
private def loadCollectivePasswords[F[_]: Async](
ctx: Context[F, ProcessItemArgs]
): F[List[Password]] =
ctx.store
.transact(RCollectivePassword.findAll(ctx.args.meta.collective))
.map(_.map(_.password).distinct)
private def conversionHandler[F[_]: Sync](
ctx: Context[F, ProcessItemArgs],