mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Merge pull request #1095 from eikek/fix/1074-encrypted-pdf
Fix/1074 encrypted pdf
This commit is contained in:
commit
3921bb88de
@ -275,6 +275,7 @@ val common = project
|
|||||||
.settings(testSettingsMUnit)
|
.settings(testSettingsMUnit)
|
||||||
.settings(
|
.settings(
|
||||||
name := "docspell-common",
|
name := "docspell-common",
|
||||||
|
addCompilerPlugin(Dependencies.kindProjectorPlugin),
|
||||||
libraryDependencies ++=
|
libraryDependencies ++=
|
||||||
Dependencies.fs2 ++
|
Dependencies.fs2 ++
|
||||||
Dependencies.circe ++
|
Dependencies.circe ++
|
||||||
@ -409,7 +410,8 @@ val convert = project
|
|||||||
name := "docspell-convert",
|
name := "docspell-convert",
|
||||||
libraryDependencies ++=
|
libraryDependencies ++=
|
||||||
Dependencies.flexmark ++
|
Dependencies.flexmark ++
|
||||||
Dependencies.twelvemonkeys
|
Dependencies.twelvemonkeys ++
|
||||||
|
Dependencies.pdfbox
|
||||||
)
|
)
|
||||||
.dependsOn(common, files % "compile->compile;test->test")
|
.dependsOn(common, files % "compile->compile;test->test")
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ services:
|
|||||||
image: docspell/joex:latest
|
image: docspell/joex:latest
|
||||||
container_name: docspell-joex
|
container_name: docspell-joex
|
||||||
command:
|
command:
|
||||||
|
- -J-Xmx3G
|
||||||
- /opt/docspell.conf
|
- /opt/docspell.conf
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
env_file: ./.env
|
env_file: ./.env
|
||||||
|
@ -63,6 +63,12 @@ trait OCollective[F[_]] {
|
|||||||
|
|
||||||
def findEnabledSource(sourceId: Ident): F[Option[RSource]]
|
def findEnabledSource(sourceId: Ident): F[Option[RSource]]
|
||||||
|
|
||||||
|
def addPassword(collective: Ident, pw: Password): F[Unit]
|
||||||
|
|
||||||
|
def getPasswords(collective: Ident): F[List[RCollectivePassword]]
|
||||||
|
|
||||||
|
def removePassword(id: Ident): F[Unit]
|
||||||
|
|
||||||
def startLearnClassifier(collective: Ident): F[Unit]
|
def startLearnClassifier(collective: Ident): F[Unit]
|
||||||
|
|
||||||
def startEmptyTrash(args: EmptyTrashArgs): F[Unit]
|
def startEmptyTrash(args: EmptyTrashArgs): F[Unit]
|
||||||
@ -149,7 +155,7 @@ object OCollective {
|
|||||||
private def updateLearnClassifierTask(coll: Ident, sett: Settings): F[Unit] =
|
private def updateLearnClassifierTask(coll: Ident, sett: Settings): F[Unit] =
|
||||||
for {
|
for {
|
||||||
id <- Ident.randomId[F]
|
id <- Ident.randomId[F]
|
||||||
on = sett.classifier.map(_.enabled).getOrElse(false)
|
on = sett.classifier.exists(_.enabled)
|
||||||
timer = sett.classifier.map(_.schedule).getOrElse(CalEvent.unsafe(""))
|
timer = sett.classifier.map(_.schedule).getOrElse(CalEvent.unsafe(""))
|
||||||
args = LearnClassifierArgs(coll)
|
args = LearnClassifierArgs(coll)
|
||||||
ut = UserTask(
|
ut = UserTask(
|
||||||
@ -174,6 +180,18 @@ object OCollective {
|
|||||||
_ <- joex.notifyAllNodes
|
_ <- joex.notifyAllNodes
|
||||||
} yield ()
|
} yield ()
|
||||||
|
|
||||||
|
def addPassword(collective: Ident, pw: Password): F[Unit] =
|
||||||
|
for {
|
||||||
|
cpass <- RCollectivePassword.createNew[F](collective, pw)
|
||||||
|
_ <- store.transact(RCollectivePassword.upsert(cpass))
|
||||||
|
} yield ()
|
||||||
|
|
||||||
|
def getPasswords(collective: Ident): F[List[RCollectivePassword]] =
|
||||||
|
store.transact(RCollectivePassword.findAll(collective))
|
||||||
|
|
||||||
|
def removePassword(id: Ident): F[Unit] =
|
||||||
|
store.transact(RCollectivePassword.deleteById(id)).map(_ => ())
|
||||||
|
|
||||||
def startLearnClassifier(collective: Ident): F[Unit] =
|
def startLearnClassifier(collective: Ident): F[Unit] =
|
||||||
for {
|
for {
|
||||||
id <- Ident.randomId[F]
|
id <- Ident.randomId[F]
|
||||||
|
@ -7,12 +7,13 @@
|
|||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
|
import fs2.Stream
|
||||||
|
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
|
|
||||||
import org.log4s.{Logger => Log4sLogger}
|
import org.log4s.{Logger => Log4sLogger}
|
||||||
|
|
||||||
trait Logger[F[_]] {
|
trait Logger[F[_]] { self =>
|
||||||
|
|
||||||
def trace(msg: => String): F[Unit]
|
def trace(msg: => String): F[Unit]
|
||||||
def debug(msg: => String): F[Unit]
|
def debug(msg: => String): F[Unit]
|
||||||
@ -21,6 +22,25 @@ trait Logger[F[_]] {
|
|||||||
def error(ex: Throwable)(msg: => String): F[Unit]
|
def error(ex: Throwable)(msg: => String): F[Unit]
|
||||||
def error(msg: => String): F[Unit]
|
def error(msg: => String): F[Unit]
|
||||||
|
|
||||||
|
final def s: Logger[Stream[F, *]] = new Logger[Stream[F, *]] {
|
||||||
|
def trace(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.trace(msg))
|
||||||
|
|
||||||
|
def debug(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.debug(msg))
|
||||||
|
|
||||||
|
def info(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.info(msg))
|
||||||
|
|
||||||
|
def warn(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.warn(msg))
|
||||||
|
|
||||||
|
def error(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.error(msg))
|
||||||
|
|
||||||
|
def error(ex: Throwable)(msg: => String): Stream[F, Unit] =
|
||||||
|
Stream.eval(self.error(ex)(msg))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
object Logger {
|
object Logger {
|
||||||
|
@ -33,6 +33,7 @@ object Conversion {
|
|||||||
def create[F[_]: Async](
|
def create[F[_]: Async](
|
||||||
cfg: ConvertConfig,
|
cfg: ConvertConfig,
|
||||||
sanitizeHtml: SanitizeHtml,
|
sanitizeHtml: SanitizeHtml,
|
||||||
|
additionalPasswords: List[Password],
|
||||||
logger: Logger[F]
|
logger: Logger[F]
|
||||||
): Resource[F, Conversion[F]] =
|
): Resource[F, Conversion[F]] =
|
||||||
Resource.pure[F, Conversion[F]](new Conversion[F] {
|
Resource.pure[F, Conversion[F]](new Conversion[F] {
|
||||||
@ -42,8 +43,16 @@ object Conversion {
|
|||||||
): F[A] =
|
): F[A] =
|
||||||
TikaMimetype.resolve(dataType, in).flatMap {
|
TikaMimetype.resolve(dataType, in).flatMap {
|
||||||
case MimeType.PdfMatch(_) =>
|
case MimeType.PdfMatch(_) =>
|
||||||
|
val allPass = cfg.decryptPdf.passwords ++ additionalPasswords
|
||||||
|
val pdfStream =
|
||||||
|
if (cfg.decryptPdf.enabled) {
|
||||||
|
logger.s
|
||||||
|
.debug(s"Trying to read the PDF using ${allPass.size} passwords")
|
||||||
|
.drain ++
|
||||||
|
in.through(RemovePdfEncryption(logger, allPass))
|
||||||
|
} else in
|
||||||
OcrMyPdf
|
OcrMyPdf
|
||||||
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(in, handler)
|
.toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(pdfStream, handler)
|
||||||
|
|
||||||
case MimeType.HtmlMatch(mt) =>
|
case MimeType.HtmlMatch(mt) =>
|
||||||
val cs = mt.charsetOrUtf8
|
val cs = mt.charsetOrUtf8
|
||||||
|
@ -6,11 +6,13 @@
|
|||||||
|
|
||||||
package docspell.convert
|
package docspell.convert
|
||||||
|
|
||||||
|
import docspell.common.Password
|
||||||
|
import docspell.convert.ConvertConfig.DecryptPdf
|
||||||
import docspell.convert.extern.OcrMyPdfConfig
|
import docspell.convert.extern.OcrMyPdfConfig
|
||||||
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
||||||
import docspell.convert.flexmark.MarkdownConfig
|
import docspell.convert.flexmark.MarkdownConfig
|
||||||
|
|
||||||
case class ConvertConfig(
|
final case class ConvertConfig(
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
convertedFilenamePart: String,
|
convertedFilenamePart: String,
|
||||||
maxImageSize: Int,
|
maxImageSize: Int,
|
||||||
@ -18,5 +20,11 @@ case class ConvertConfig(
|
|||||||
wkhtmlpdf: WkHtmlPdfConfig,
|
wkhtmlpdf: WkHtmlPdfConfig,
|
||||||
tesseract: TesseractConfig,
|
tesseract: TesseractConfig,
|
||||||
unoconv: UnoconvConfig,
|
unoconv: UnoconvConfig,
|
||||||
ocrmypdf: OcrMyPdfConfig
|
ocrmypdf: OcrMyPdfConfig,
|
||||||
|
decryptPdf: DecryptPdf
|
||||||
)
|
)
|
||||||
|
|
||||||
|
object ConvertConfig {
|
||||||
|
|
||||||
|
final case class DecryptPdf(enabled: Boolean, passwords: List[Password])
|
||||||
|
}
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.convert
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import fs2.{Chunk, Pipe, Stream}
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument
|
||||||
|
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException
|
||||||
|
|
||||||
|
/** Using PDFBox, the incoming pdf is loaded while trying the given passwords. */
|
||||||
|
object RemovePdfEncryption {
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](
|
||||||
|
logger: Logger[F],
|
||||||
|
passwords: List[Password]
|
||||||
|
): Pipe[F, Byte, Byte] =
|
||||||
|
apply(logger, Stream.emits(passwords))
|
||||||
|
|
||||||
|
def apply[F[_]: Sync](
|
||||||
|
logger: Logger[F],
|
||||||
|
passwords: Stream[F, Password]
|
||||||
|
): Pipe[F, Byte, Byte] = {
|
||||||
|
val pws = passwords.cons1(Password.empty)
|
||||||
|
in =>
|
||||||
|
pws
|
||||||
|
.flatMap(pw => in.through(openPdf[F](logger, pw)))
|
||||||
|
.head
|
||||||
|
.flatMap { doc =>
|
||||||
|
if (doc.isEncrypted) {
|
||||||
|
logger.s.debug("Removing protection/encryption from PDF").drain ++
|
||||||
|
Stream.eval(Sync[F].delay(doc.setAllSecurityToBeRemoved(true))).drain ++
|
||||||
|
toStream[F](doc)
|
||||||
|
} else {
|
||||||
|
in
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.ifEmpty(
|
||||||
|
logger.s
|
||||||
|
.info(
|
||||||
|
s"None of the passwords helped to read the given PDF!"
|
||||||
|
)
|
||||||
|
.drain ++ in
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def openPdf[F[_]: Sync](
|
||||||
|
logger: Logger[F],
|
||||||
|
pw: Password
|
||||||
|
): Pipe[F, Byte, PDDocument] = {
|
||||||
|
def alloc(bytes: Array[Byte]): F[Option[PDDocument]] =
|
||||||
|
Sync[F].delay(load(bytes, pw))
|
||||||
|
|
||||||
|
def free(doc: Option[PDDocument]): F[Unit] =
|
||||||
|
Sync[F].delay(doc.foreach(_.close()))
|
||||||
|
|
||||||
|
val log =
|
||||||
|
if (pw.isEmpty) Stream.empty
|
||||||
|
else logger.s.debug(s"Try opening PDF with password: ${pw.pass.take(2)}***").drain
|
||||||
|
|
||||||
|
in =>
|
||||||
|
Stream
|
||||||
|
.eval(in.compile.to(Array))
|
||||||
|
.flatMap(bytes => log ++ Stream.bracket(alloc(bytes))(free))
|
||||||
|
.flatMap(opt => opt.map(Stream.emit).getOrElse(Stream.empty))
|
||||||
|
}
|
||||||
|
|
||||||
|
private def load(bytes: Array[Byte], pw: Password): Option[PDDocument] =
|
||||||
|
try Option(PDDocument.load(bytes, pw.pass))
|
||||||
|
catch {
|
||||||
|
case _: InvalidPasswordException =>
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
private def toStream[F[_]](doc: PDDocument): Stream[F, Byte] = {
|
||||||
|
val baos = new ByteArrayOutputStream()
|
||||||
|
doc.save(baos)
|
||||||
|
Stream.chunk(Chunk.array(baos.toByteArray))
|
||||||
|
}
|
||||||
|
}
|
@ -74,11 +74,12 @@ class ConversionTest extends FunSuite with FileChecks {
|
|||||||
Duration.seconds(20)
|
Duration.seconds(20)
|
||||||
),
|
),
|
||||||
target
|
target
|
||||||
)
|
),
|
||||||
|
ConvertConfig.DecryptPdf(true, Nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
val conversion =
|
val conversion =
|
||||||
Conversion.create[IO](convertConfig, SanitizeHtml.none, logger)
|
Conversion.create[IO](convertConfig, SanitizeHtml.none, Nil, logger)
|
||||||
|
|
||||||
val bombs = List(
|
val bombs = List(
|
||||||
ExampleFiles.bombs_20K_gray_jpeg,
|
ExampleFiles.bombs_20K_gray_jpeg,
|
||||||
|
@ -9,6 +9,8 @@ package docspell.convert
|
|||||||
import java.nio.charset.StandardCharsets
|
import java.nio.charset.StandardCharsets
|
||||||
import java.nio.file.Files
|
import java.nio.file.Files
|
||||||
|
|
||||||
|
import scala.util.Try
|
||||||
|
|
||||||
import cats.data.Kleisli
|
import cats.data.Kleisli
|
||||||
import cats.effect.IO
|
import cats.effect.IO
|
||||||
import cats.effect.unsafe.implicits.global
|
import cats.effect.unsafe.implicits.global
|
||||||
@ -19,6 +21,9 @@ import docspell.common._
|
|||||||
import docspell.convert.ConversionResult.Handler
|
import docspell.convert.ConversionResult.Handler
|
||||||
import docspell.files.TikaMimetype
|
import docspell.files.TikaMimetype
|
||||||
|
|
||||||
|
import org.apache.pdfbox.pdmodel.PDDocument
|
||||||
|
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException
|
||||||
|
|
||||||
trait FileChecks {
|
trait FileChecks {
|
||||||
|
|
||||||
implicit class FileCheckOps(p: Path) {
|
implicit class FileCheckOps(p: Path) {
|
||||||
@ -34,15 +39,46 @@ trait FileChecks {
|
|||||||
|
|
||||||
def isPlainText: Boolean =
|
def isPlainText: Boolean =
|
||||||
isType(MimeType.text("plain"))
|
isType(MimeType.text("plain"))
|
||||||
|
|
||||||
|
def isUnencryptedPDF: Boolean =
|
||||||
|
Try(PDDocument.load(p.toNioPath.toFile)).map(_.close()).isSuccess
|
||||||
|
}
|
||||||
|
|
||||||
|
implicit class ByteStreamOps(delegate: Stream[IO, Byte]) {
|
||||||
|
def isNonEmpty: IO[Boolean] =
|
||||||
|
delegate.head.compile.last.map(_.isDefined)
|
||||||
|
|
||||||
|
def isType(mime: MimeType): IO[Boolean] =
|
||||||
|
TikaMimetype.detect(delegate, MimeTypeHint.none).map(_ == mime)
|
||||||
|
|
||||||
|
def isPDF: IO[Boolean] =
|
||||||
|
isType(MimeType.pdf)
|
||||||
|
|
||||||
|
def isUnencryptedPDF: IO[Boolean] =
|
||||||
|
delegate.compile
|
||||||
|
.to(Array)
|
||||||
|
.map(PDDocument.load(_))
|
||||||
|
.map(_.close())
|
||||||
|
.map(_ => true)
|
||||||
|
|
||||||
|
def isEncryptedPDF: IO[Boolean] =
|
||||||
|
delegate.compile
|
||||||
|
.to(Array)
|
||||||
|
.map(PDDocument.load(_))
|
||||||
|
.attempt
|
||||||
|
.map(e =>
|
||||||
|
e.fold(
|
||||||
|
_.isInstanceOf[InvalidPasswordException],
|
||||||
|
doc => {
|
||||||
|
doc.close();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||||
in =>
|
fs2.io.file.Files[IO].writeAll(file).andThen(s => s ++ Stream.emit(file))
|
||||||
Stream
|
|
||||||
.eval(
|
|
||||||
in.compile.to(Array).flatMap(bytes => IO(Files.write(file.toNioPath, bytes)))
|
|
||||||
)
|
|
||||||
.map(p => File.path(p))
|
|
||||||
|
|
||||||
def storePdfHandler(file: Path): Handler[IO, Path] =
|
def storePdfHandler(file: Path): Handler[IO, Path] =
|
||||||
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
||||||
|
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.convert
|
||||||
|
|
||||||
|
import cats.effect.IO
|
||||||
|
import fs2.Stream
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.files.ExampleFiles
|
||||||
|
|
||||||
|
import munit.CatsEffectSuite
|
||||||
|
|
||||||
|
class RemovePdfEncryptionTest extends CatsEffectSuite with FileChecks {
|
||||||
|
val logger: Logger[IO] = Logger.log4s(org.log4s.getLogger)
|
||||||
|
|
||||||
|
private val protectedPdf =
|
||||||
|
ExampleFiles.secured_protected_test123_pdf.readURL[IO](16 * 1024)
|
||||||
|
private val encryptedPdf =
|
||||||
|
ExampleFiles.secured_encrypted_test123_pdf.readURL[IO](16 * 1024)
|
||||||
|
private val plainPdf = ExampleFiles.letter_en_pdf.readURL[IO](16 * 1024)
|
||||||
|
|
||||||
|
test("have encrypted pdfs") {
|
||||||
|
for {
|
||||||
|
_ <- assertIO(encryptedPdf.isEncryptedPDF, true)
|
||||||
|
_ <- assertIO(encryptedPdf.isEncryptedPDF, true)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
|
||||||
|
test("decrypt pdf") {
|
||||||
|
encryptedPdf
|
||||||
|
.through(RemovePdfEncryption(logger, List(Password("test123"))))
|
||||||
|
.isUnencryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("decrypt pdf with multiple passwords") {
|
||||||
|
encryptedPdf
|
||||||
|
.through(
|
||||||
|
RemovePdfEncryption(
|
||||||
|
logger,
|
||||||
|
List("xy123", "123xy", "test123", "abc123").map(Password(_))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.isUnencryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("remove protection") {
|
||||||
|
protectedPdf
|
||||||
|
.through(RemovePdfEncryption(logger, Nil))
|
||||||
|
.isUnencryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("read unprotected pdf") {
|
||||||
|
plainPdf
|
||||||
|
.through(RemovePdfEncryption(logger, Nil))
|
||||||
|
.isUnencryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("decrypt with multiple passwords, stop on first") {
|
||||||
|
val passwords: Stream[IO, String] =
|
||||||
|
Stream("test123") ++ Stream.raiseError[IO](new Exception("is not called"))
|
||||||
|
val decrypt = RemovePdfEncryption(logger, passwords.map(Password(_)))
|
||||||
|
encryptedPdf
|
||||||
|
.through(decrypt)
|
||||||
|
.isUnencryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
|
||||||
|
test("return input stream if nothing helps") {
|
||||||
|
encryptedPdf
|
||||||
|
.through(RemovePdfEncryption(logger, List("a", "b").map(Password(_))))
|
||||||
|
.isEncryptedPDF
|
||||||
|
.map(assert(_))
|
||||||
|
}
|
||||||
|
}
|
BIN
modules/files/src/test/resources/secured/encrypted-test123.pdf
Normal file
BIN
modules/files/src/test/resources/secured/encrypted-test123.pdf
Normal file
Binary file not shown.
BIN
modules/files/src/test/resources/secured/protected-test123.pdf
Normal file
BIN
modules/files/src/test/resources/secured/protected-test123.pdf
Normal file
Binary file not shown.
@ -586,6 +586,25 @@ Docpell Update Check
|
|||||||
}
|
}
|
||||||
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
working-dir = ${java.io.tmpdir}"/docspell-convert"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Allows to try to decrypt a PDF with encryption or protection. If
|
||||||
|
# enabled, a PDFs encryption or protection will be removed during
|
||||||
|
# conversion.
|
||||||
|
#
|
||||||
|
# For encrypted PDFs, this is necessary to be processed, because
|
||||||
|
# docspell needs to read it. It also requires to specify a
|
||||||
|
# password here. All passwords are tried when reading a PDF.
|
||||||
|
#
|
||||||
|
# This is enabled by default with an empty password list. This
|
||||||
|
# removes protection from PDFs, which is better for processing.
|
||||||
|
#
|
||||||
|
# Passwords can be given here and each collective can maintain
|
||||||
|
# their passwords as well. But if the `enabled` setting below is
|
||||||
|
# `false`, then no attempt at decrypting is done.
|
||||||
|
decrypt-pdf = {
|
||||||
|
enabled = true
|
||||||
|
passwords = []
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# The same section is also present in the rest-server config. It is
|
# The same section is also present in the rest-server config. It is
|
||||||
|
@ -77,17 +77,27 @@ object ConvertPdf {
|
|||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
item: ItemData
|
item: ItemData
|
||||||
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
|
)(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] =
|
||||||
Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv =>
|
loadCollectivePasswords(ctx).flatMap(collPass =>
|
||||||
mime match {
|
Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv =>
|
||||||
case mt =>
|
mime match {
|
||||||
val data = ctx.store.fileStore.getBytes(ra.fileId)
|
case mt =>
|
||||||
val handler = conversionHandler[F](ctx, cfg, ra, item)
|
val data = ctx.store.fileStore.getBytes(ra.fileId)
|
||||||
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
|
val handler = conversionHandler[F](ctx, cfg, ra, item)
|
||||||
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
|
ctx.logger
|
||||||
data
|
.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
|
||||||
)
|
conv.toPDF(DataType(mt), ctx.args.meta.language, handler)(
|
||||||
|
data
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
)
|
||||||
|
|
||||||
|
private def loadCollectivePasswords[F[_]: Async](
|
||||||
|
ctx: Context[F, ProcessItemArgs]
|
||||||
|
): F[List[Password]] =
|
||||||
|
ctx.store
|
||||||
|
.transact(RCollectivePassword.findAll(ctx.args.meta.collective))
|
||||||
|
.map(_.map(_.password).distinct)
|
||||||
|
|
||||||
private def conversionHandler[F[_]: Sync](
|
private def conversionHandler[F[_]: Sync](
|
||||||
ctx: Context[F, ProcessItemArgs],
|
ctx: Context[F, ProcessItemArgs],
|
||||||
|
@ -5635,6 +5635,7 @@ components:
|
|||||||
- integrationEnabled
|
- integrationEnabled
|
||||||
- classifier
|
- classifier
|
||||||
- emptyTrash
|
- emptyTrash
|
||||||
|
- passwords
|
||||||
properties:
|
properties:
|
||||||
language:
|
language:
|
||||||
type: string
|
type: string
|
||||||
@ -5648,6 +5649,11 @@ components:
|
|||||||
$ref: "#/components/schemas/ClassifierSetting"
|
$ref: "#/components/schemas/ClassifierSetting"
|
||||||
emptyTrash:
|
emptyTrash:
|
||||||
$ref: "#/components/schemas/EmptyTrashSetting"
|
$ref: "#/components/schemas/EmptyTrashSetting"
|
||||||
|
passwords:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
format: password
|
||||||
|
|
||||||
EmptyTrashSetting:
|
EmptyTrashSetting:
|
||||||
description: |
|
description: |
|
||||||
|
@ -12,8 +12,7 @@ import cats.implicits._
|
|||||||
import docspell.backend.BackendApp
|
import docspell.backend.BackendApp
|
||||||
import docspell.backend.auth.AuthToken
|
import docspell.backend.auth.AuthToken
|
||||||
import docspell.backend.ops.OCollective
|
import docspell.backend.ops.OCollective
|
||||||
import docspell.common.EmptyTrashArgs
|
import docspell.common._
|
||||||
import docspell.common.ListType
|
|
||||||
import docspell.restapi.model._
|
import docspell.restapi.model._
|
||||||
import docspell.restserver.conv.Conversions
|
import docspell.restserver.conv.Conversions
|
||||||
import docspell.restserver.http4s._
|
import docspell.restserver.http4s._
|
||||||
@ -62,7 +61,8 @@ object CollectiveRoutes {
|
|||||||
settings.emptyTrash.schedule,
|
settings.emptyTrash.schedule,
|
||||||
settings.emptyTrash.minAge
|
settings.emptyTrash.minAge
|
||||||
)
|
)
|
||||||
)
|
),
|
||||||
|
settings.passwords.map(Password.apply)
|
||||||
)
|
)
|
||||||
res <-
|
res <-
|
||||||
backend.collective
|
backend.collective
|
||||||
@ -89,7 +89,8 @@ object CollectiveRoutes {
|
|||||||
EmptyTrashSetting(
|
EmptyTrashSetting(
|
||||||
trash.schedule,
|
trash.schedule,
|
||||||
trash.minAge
|
trash.minAge
|
||||||
)
|
),
|
||||||
|
settDb.map(_.passwords).getOrElse(Nil).map(_.pass)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
resp <- sett.toResponse()
|
resp <- sett.toResponse()
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
CREATE TABLE "collective_password" (
|
||||||
|
"id" varchar(254) not null primary key,
|
||||||
|
"cid" varchar(254) not null,
|
||||||
|
"pass" varchar(254) not null,
|
||||||
|
"created" timestamp not null,
|
||||||
|
foreign key ("cid") references "collective"("cid") on delete cascade
|
||||||
|
)
|
@ -0,0 +1,7 @@
|
|||||||
|
CREATE TABLE `collective_password` (
|
||||||
|
`id` varchar(254) not null primary key,
|
||||||
|
`cid` varchar(254) not null,
|
||||||
|
`pass` varchar(254) not null,
|
||||||
|
`created` timestamp not null,
|
||||||
|
foreign key (`cid`) references `collective`(`cid`) on delete cascade
|
||||||
|
)
|
@ -0,0 +1,7 @@
|
|||||||
|
CREATE TABLE "collective_password" (
|
||||||
|
"id" varchar(254) not null primary key,
|
||||||
|
"cid" varchar(254) not null,
|
||||||
|
"pass" varchar(254) not null,
|
||||||
|
"created" timestamp not null,
|
||||||
|
foreign key ("cid") references "collective"("cid") on delete cascade
|
||||||
|
)
|
@ -89,7 +89,8 @@ object RCollective {
|
|||||||
case None =>
|
case None =>
|
||||||
REmptyTrashSetting.delete(cid)
|
REmptyTrashSetting.delete(cid)
|
||||||
}
|
}
|
||||||
} yield n1 + n2 + n3
|
n4 <- RCollectivePassword.replaceAll(cid, settings.passwords)
|
||||||
|
} yield n1 + n2 + n3 + n4
|
||||||
|
|
||||||
// this hides categories that have been deleted in the meantime
|
// this hides categories that have been deleted in the meantime
|
||||||
// they are finally removed from the json array once the learn classifier task is run
|
// they are finally removed from the json array once the learn classifier task is run
|
||||||
@ -99,10 +100,12 @@ object RCollective {
|
|||||||
prev <- OptionT.fromOption[ConnectionIO](sett.classifier)
|
prev <- OptionT.fromOption[ConnectionIO](sett.classifier)
|
||||||
cats <- OptionT.liftF(RTag.listCategories(coll))
|
cats <- OptionT.liftF(RTag.listCategories(coll))
|
||||||
next = prev.copy(categories = prev.categories.intersect(cats))
|
next = prev.copy(categories = prev.categories.intersect(cats))
|
||||||
} yield sett.copy(classifier = Some(next))).value
|
pws <- OptionT.liftF(RCollectivePassword.findAll(coll))
|
||||||
|
} yield sett.copy(classifier = Some(next), passwords = pws.map(_.password))).value
|
||||||
|
|
||||||
private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = {
|
private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = {
|
||||||
import RClassifierSetting.stringListMeta
|
import RClassifierSetting.stringListMeta
|
||||||
|
|
||||||
val c = RCollective.as("c")
|
val c = RCollective.as("c")
|
||||||
val cs = RClassifierSetting.as("cs")
|
val cs = RClassifierSetting.as("cs")
|
||||||
val es = REmptyTrashSetting.as("es")
|
val es = REmptyTrashSetting.as("es")
|
||||||
@ -116,7 +119,8 @@ object RCollective {
|
|||||||
cs.categories.s,
|
cs.categories.s,
|
||||||
cs.listType.s,
|
cs.listType.s,
|
||||||
es.schedule.s,
|
es.schedule.s,
|
||||||
es.minAge.s
|
es.minAge.s,
|
||||||
|
const(0) //dummy value to load Nil as list of passwords
|
||||||
),
|
),
|
||||||
from(c).leftJoin(cs, cs.cid === c.id).leftJoin(es, es.cid === c.id),
|
from(c).leftJoin(cs, cs.cid === c.id).leftJoin(es, es.cid === c.id),
|
||||||
c.id === coll
|
c.id === coll
|
||||||
@ -170,7 +174,11 @@ object RCollective {
|
|||||||
language: Language,
|
language: Language,
|
||||||
integrationEnabled: Boolean,
|
integrationEnabled: Boolean,
|
||||||
classifier: Option[RClassifierSetting.Classifier],
|
classifier: Option[RClassifierSetting.Classifier],
|
||||||
emptyTrash: Option[REmptyTrashSetting.EmptyTrash]
|
emptyTrash: Option[REmptyTrashSetting.EmptyTrash],
|
||||||
|
passwords: List[Password]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
implicit val passwordListMeta: Read[List[Password]] =
|
||||||
|
Read[Int].map(_ => Nil: List[Password])
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.store.records
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.store.qb.DSL._
|
||||||
|
import docspell.store.qb._
|
||||||
|
|
||||||
|
import doobie._
|
||||||
|
import doobie.implicits._
|
||||||
|
|
||||||
|
final case class RCollectivePassword(
|
||||||
|
id: Ident,
|
||||||
|
cid: Ident,
|
||||||
|
password: Password,
|
||||||
|
created: Timestamp
|
||||||
|
) {}
|
||||||
|
|
||||||
|
object RCollectivePassword {
|
||||||
|
final case class Table(alias: Option[String]) extends TableDef {
|
||||||
|
val tableName: String = "collective_password"
|
||||||
|
|
||||||
|
val id = Column[Ident]("id", this)
|
||||||
|
val cid = Column[Ident]("cid", this)
|
||||||
|
val password = Column[Password]("pass", this)
|
||||||
|
val created = Column[Timestamp]("created", this)
|
||||||
|
|
||||||
|
val all: NonEmptyList[Column[_]] =
|
||||||
|
NonEmptyList.of(id, cid, password, created)
|
||||||
|
}
|
||||||
|
|
||||||
|
val T = Table(None)
|
||||||
|
def as(alias: String): Table =
|
||||||
|
Table(Some(alias))
|
||||||
|
|
||||||
|
def createNew[F[_]: Sync](cid: Ident, pw: Password): F[RCollectivePassword] =
|
||||||
|
for {
|
||||||
|
id <- Ident.randomId[F]
|
||||||
|
time <- Timestamp.current[F]
|
||||||
|
} yield RCollectivePassword(id, cid, pw, time)
|
||||||
|
|
||||||
|
def insert(v: RCollectivePassword): ConnectionIO[Int] =
|
||||||
|
DML.insert(
|
||||||
|
T,
|
||||||
|
T.all,
|
||||||
|
fr"${v.id}, ${v.cid},${v.password},${v.created}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def upsert(v: RCollectivePassword): ConnectionIO[Int] =
|
||||||
|
for {
|
||||||
|
k <- deleteByPassword(v.cid, v.password)
|
||||||
|
n <- insert(v)
|
||||||
|
} yield n + k
|
||||||
|
|
||||||
|
def deleteById(id: Ident): ConnectionIO[Int] =
|
||||||
|
DML.delete(T, T.id === id)
|
||||||
|
|
||||||
|
def deleteByPassword(cid: Ident, pw: Password): ConnectionIO[Int] =
|
||||||
|
DML.delete(T, T.password === pw && T.cid === cid)
|
||||||
|
|
||||||
|
def findAll(cid: Ident): ConnectionIO[List[RCollectivePassword]] =
|
||||||
|
Select(select(T.all), from(T), T.cid === cid).build
|
||||||
|
.query[RCollectivePassword]
|
||||||
|
.to[List]
|
||||||
|
|
||||||
|
def replaceAll(cid: Ident, pws: List[Password]): ConnectionIO[Int] =
|
||||||
|
for {
|
||||||
|
k <- DML.delete(T, T.cid === cid)
|
||||||
|
pw <- pws.traverse(p => createNew[ConnectionIO](cid, p))
|
||||||
|
n <-
|
||||||
|
if (pws.isEmpty) 0.pure[ConnectionIO]
|
||||||
|
else
|
||||||
|
DML.insertMany(
|
||||||
|
T,
|
||||||
|
T.all,
|
||||||
|
pw.map(p => fr"${p.id},${p.cid},${p.password},${p.created}")
|
||||||
|
)
|
||||||
|
} yield k + n
|
||||||
|
}
|
@ -22,6 +22,7 @@ import Comp.ClassifierSettingsForm
|
|||||||
import Comp.Dropdown
|
import Comp.Dropdown
|
||||||
import Comp.EmptyTrashForm
|
import Comp.EmptyTrashForm
|
||||||
import Comp.MenuBar as MB
|
import Comp.MenuBar as MB
|
||||||
|
import Comp.StringListInput
|
||||||
import Data.DropdownStyle as DS
|
import Data.DropdownStyle as DS
|
||||||
import Data.Flags exposing (Flags)
|
import Data.Flags exposing (Flags)
|
||||||
import Data.Language exposing (Language)
|
import Data.Language exposing (Language)
|
||||||
@ -30,6 +31,7 @@ import Html exposing (..)
|
|||||||
import Html.Attributes exposing (..)
|
import Html.Attributes exposing (..)
|
||||||
import Html.Events exposing (onCheck, onClick, onInput)
|
import Html.Events exposing (onCheck, onClick, onInput)
|
||||||
import Http
|
import Http
|
||||||
|
import Markdown
|
||||||
import Messages.Comp.CollectiveSettingsForm exposing (Texts)
|
import Messages.Comp.CollectiveSettingsForm exposing (Texts)
|
||||||
import Styles as S
|
import Styles as S
|
||||||
|
|
||||||
@ -44,6 +46,8 @@ type alias Model =
|
|||||||
, startClassifierResult : ClassifierResult
|
, startClassifierResult : ClassifierResult
|
||||||
, emptyTrashModel : Comp.EmptyTrashForm.Model
|
, emptyTrashModel : Comp.EmptyTrashForm.Model
|
||||||
, startEmptyTrashResult : EmptyTrashResult
|
, startEmptyTrashResult : EmptyTrashResult
|
||||||
|
, passwordModel : Comp.StringListInput.Model
|
||||||
|
, passwords : List String
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -96,6 +100,8 @@ init flags settings =
|
|||||||
, startClassifierResult = ClassifierResultInitial
|
, startClassifierResult = ClassifierResultInitial
|
||||||
, emptyTrashModel = em
|
, emptyTrashModel = em
|
||||||
, startEmptyTrashResult = EmptyTrashResultInitial
|
, startEmptyTrashResult = EmptyTrashResultInitial
|
||||||
|
, passwordModel = Comp.StringListInput.init
|
||||||
|
, passwords = settings.passwords
|
||||||
}
|
}
|
||||||
, Cmd.batch [ Cmd.map ClassifierSettingMsg cc, Cmd.map EmptyTrashMsg ec ]
|
, Cmd.batch [ Cmd.map ClassifierSettingMsg cc, Cmd.map EmptyTrashMsg ec ]
|
||||||
)
|
)
|
||||||
@ -114,6 +120,7 @@ getSettings model =
|
|||||||
, integrationEnabled = model.intEnabled
|
, integrationEnabled = model.intEnabled
|
||||||
, classifier = cls
|
, classifier = cls
|
||||||
, emptyTrash = trash
|
, emptyTrash = trash
|
||||||
|
, passwords = model.passwords
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
(Comp.ClassifierSettingsForm.getSettings model.classifierModel)
|
(Comp.ClassifierSettingsForm.getSettings model.classifierModel)
|
||||||
@ -133,6 +140,7 @@ type Msg
|
|||||||
| StartEmptyTrashTask
|
| StartEmptyTrashTask
|
||||||
| StartClassifierResp (Result Http.Error BasicResult)
|
| StartClassifierResp (Result Http.Error BasicResult)
|
||||||
| StartEmptyTrashResp (Result Http.Error BasicResult)
|
| StartEmptyTrashResp (Result Http.Error BasicResult)
|
||||||
|
| PasswordMsg Comp.StringListInput.Msg
|
||||||
|
|
||||||
|
|
||||||
update : Flags -> Msg -> Model -> ( Model, Cmd Msg, Maybe CollectiveSettings )
|
update : Flags -> Msg -> Model -> ( Model, Cmd Msg, Maybe CollectiveSettings )
|
||||||
@ -285,6 +293,27 @@ update flags msg model =
|
|||||||
, Nothing
|
, Nothing
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PasswordMsg lm ->
|
||||||
|
let
|
||||||
|
( pm, action ) =
|
||||||
|
Comp.StringListInput.update lm model.passwordModel
|
||||||
|
|
||||||
|
pws =
|
||||||
|
case action of
|
||||||
|
Comp.StringListInput.AddAction pw ->
|
||||||
|
pw :: model.passwords
|
||||||
|
|
||||||
|
Comp.StringListInput.RemoveAction pw ->
|
||||||
|
List.filter (\e -> e /= pw) model.passwords
|
||||||
|
|
||||||
|
Comp.StringListInput.NoAction ->
|
||||||
|
model.passwords
|
||||||
|
in
|
||||||
|
( { model | passwordModel = pm, passwords = pws }
|
||||||
|
, Cmd.none
|
||||||
|
, Nothing
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--- View2
|
--- View2
|
||||||
@ -460,6 +489,18 @@ view2 flags texts settings model =
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
, div []
|
||||||
|
[ h2 [ class S.header2 ]
|
||||||
|
[ text texts.passwords
|
||||||
|
]
|
||||||
|
, div [ class "mb-4" ]
|
||||||
|
[ div [ class "opacity-50 text-sm" ]
|
||||||
|
[ Markdown.toHtml [] texts.passwordsInfo
|
||||||
|
]
|
||||||
|
, Html.map PasswordMsg
|
||||||
|
(Comp.StringListInput.view2 model.passwords model.passwordModel)
|
||||||
|
]
|
||||||
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,6 +44,8 @@ type alias Texts =
|
|||||||
, fulltextReindexSubmitted : String
|
, fulltextReindexSubmitted : String
|
||||||
, fulltextReindexOkMissing : String
|
, fulltextReindexOkMissing : String
|
||||||
, emptyTrash : String
|
, emptyTrash : String
|
||||||
|
, passwords : String
|
||||||
|
, passwordsInfo : String
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -77,6 +79,8 @@ gb =
|
|||||||
, fulltextReindexOkMissing =
|
, fulltextReindexOkMissing =
|
||||||
"Please type OK in the field if you really want to start re-indexing your data."
|
"Please type OK in the field if you really want to start re-indexing your data."
|
||||||
, emptyTrash = "Empty Trash"
|
, emptyTrash = "Empty Trash"
|
||||||
|
, passwords = "Passwords"
|
||||||
|
, passwordsInfo = "These passwords are used when encrypted PDFs are being processed. Please note, that they are stored in the database as **plain text**!"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -110,4 +114,6 @@ de =
|
|||||||
, fulltextReindexOkMissing =
|
, fulltextReindexOkMissing =
|
||||||
"Bitte tippe OK in das Feld ein, wenn Du wirklich den Index neu erzeugen möchtest."
|
"Bitte tippe OK in das Feld ein, wenn Du wirklich den Index neu erzeugen möchtest."
|
||||||
, emptyTrash = "Papierkorb löschen"
|
, emptyTrash = "Papierkorb löschen"
|
||||||
|
, passwords = "Passwörter"
|
||||||
|
, passwordsInfo = "Diese Passwörter werden zum Lesen von verschlüsselten PDFs verwendet. Diese Passwörter werden in der Datanbank **in Klartext** gespeichert!"
|
||||||
}
|
}
|
||||||
|
@ -56,6 +56,7 @@ description = "A list of features and limitations."
|
|||||||
- Everything stored in a SQL database: PostgreSQL, MariaDB or H2
|
- Everything stored in a SQL database: PostgreSQL, MariaDB or H2
|
||||||
- H2 is embedded, a "one-file-only" database, avoids installing db
|
- H2 is embedded, a "one-file-only" database, avoids installing db
|
||||||
servers
|
servers
|
||||||
|
- Support for encrypted PDFs
|
||||||
- Files supported:
|
- Files supported:
|
||||||
- Documents:
|
- Documents:
|
||||||
- PDF
|
- PDF
|
||||||
|
Loading…
x
Reference in New Issue
Block a user