diff --git a/build.sbt b/build.sbt index bd15be3f..c4837e71 100644 --- a/build.sbt +++ b/build.sbt @@ -275,6 +275,7 @@ val common = project .settings(testSettingsMUnit) .settings( name := "docspell-common", + addCompilerPlugin(Dependencies.kindProjectorPlugin), libraryDependencies ++= Dependencies.fs2 ++ Dependencies.circe ++ @@ -409,7 +410,8 @@ val convert = project name := "docspell-convert", libraryDependencies ++= Dependencies.flexmark ++ - Dependencies.twelvemonkeys + Dependencies.twelvemonkeys ++ + Dependencies.pdfbox ) .dependsOn(common, files % "compile->compile;test->test") diff --git a/docker/docker-compose/docker-compose.yml b/docker/docker-compose/docker-compose.yml index 18712b4c..d63ed86d 100644 --- a/docker/docker-compose/docker-compose.yml +++ b/docker/docker-compose/docker-compose.yml @@ -19,6 +19,7 @@ services: image: docspell/joex:latest container_name: docspell-joex command: + - -J-Xmx3G - /opt/docspell.conf restart: unless-stopped env_file: ./.env diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala index 1b2825fe..907bfcef 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala @@ -63,6 +63,12 @@ trait OCollective[F[_]] { def findEnabledSource(sourceId: Ident): F[Option[RSource]] + def addPassword(collective: Ident, pw: Password): F[Unit] + + def getPasswords(collective: Ident): F[List[RCollectivePassword]] + + def removePassword(id: Ident): F[Unit] + def startLearnClassifier(collective: Ident): F[Unit] def startEmptyTrash(args: EmptyTrashArgs): F[Unit] @@ -149,7 +155,7 @@ object OCollective { private def updateLearnClassifierTask(coll: Ident, sett: Settings): F[Unit] = for { id <- Ident.randomId[F] - on = sett.classifier.map(_.enabled).getOrElse(false) + on = sett.classifier.exists(_.enabled) timer = sett.classifier.map(_.schedule).getOrElse(CalEvent.unsafe("")) args = LearnClassifierArgs(coll) ut = UserTask( @@ -174,6 +180,18 @@ object OCollective { _ <- joex.notifyAllNodes } yield () + def addPassword(collective: Ident, pw: Password): F[Unit] = + for { + cpass <- RCollectivePassword.createNew[F](collective, pw) + _ <- store.transact(RCollectivePassword.upsert(cpass)) + } yield () + + def getPasswords(collective: Ident): F[List[RCollectivePassword]] = + store.transact(RCollectivePassword.findAll(collective)) + + def removePassword(id: Ident): F[Unit] = + store.transact(RCollectivePassword.deleteById(id)).map(_ => ()) + def startLearnClassifier(collective: Ident): F[Unit] = for { id <- Ident.randomId[F] diff --git a/modules/common/src/main/scala/docspell/common/Logger.scala b/modules/common/src/main/scala/docspell/common/Logger.scala index df1dba26..936c9d34 100644 --- a/modules/common/src/main/scala/docspell/common/Logger.scala +++ b/modules/common/src/main/scala/docspell/common/Logger.scala @@ -7,12 +7,13 @@ package docspell.common import cats.effect.Sync +import fs2.Stream import docspell.common.syntax.all._ import org.log4s.{Logger => Log4sLogger} -trait Logger[F[_]] { +trait Logger[F[_]] { self => def trace(msg: => String): F[Unit] def debug(msg: => String): F[Unit] @@ -21,6 +22,25 @@ trait Logger[F[_]] { def error(ex: Throwable)(msg: => String): F[Unit] def error(msg: => String): F[Unit] + final def s: Logger[Stream[F, *]] = new Logger[Stream[F, *]] { + def trace(msg: => String): Stream[F, Unit] = + Stream.eval(self.trace(msg)) + + def debug(msg: => String): Stream[F, Unit] = + Stream.eval(self.debug(msg)) + + def info(msg: => String): Stream[F, Unit] = + Stream.eval(self.info(msg)) + + def warn(msg: => String): Stream[F, Unit] = + Stream.eval(self.warn(msg)) + + def error(msg: => String): Stream[F, Unit] = + Stream.eval(self.error(msg)) + + def error(ex: Throwable)(msg: => String): Stream[F, Unit] = + Stream.eval(self.error(ex)(msg)) + } } object Logger { diff --git a/modules/convert/src/main/scala/docspell/convert/Conversion.scala b/modules/convert/src/main/scala/docspell/convert/Conversion.scala index 54ee526a..b1a05aa4 100644 --- a/modules/convert/src/main/scala/docspell/convert/Conversion.scala +++ b/modules/convert/src/main/scala/docspell/convert/Conversion.scala @@ -33,6 +33,7 @@ object Conversion { def create[F[_]: Async]( cfg: ConvertConfig, sanitizeHtml: SanitizeHtml, + additionalPasswords: List[Password], logger: Logger[F] ): Resource[F, Conversion[F]] = Resource.pure[F, Conversion[F]](new Conversion[F] { @@ -42,8 +43,16 @@ object Conversion { ): F[A] = TikaMimetype.resolve(dataType, in).flatMap { case MimeType.PdfMatch(_) => + val allPass = cfg.decryptPdf.passwords ++ additionalPasswords + val pdfStream = + if (cfg.decryptPdf.enabled) { + logger.s + .debug(s"Trying to read the PDF using ${allPass.size} passwords") + .drain ++ + in.through(RemovePdfEncryption(logger, allPass)) + } else in OcrMyPdf - .toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(in, handler) + .toPDF(cfg.ocrmypdf, lang, cfg.chunkSize, logger)(pdfStream, handler) case MimeType.HtmlMatch(mt) => val cs = mt.charsetOrUtf8 diff --git a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala index fc4e4cd6..a4f3c224 100644 --- a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala @@ -6,11 +6,13 @@ package docspell.convert +import docspell.common.Password +import docspell.convert.ConvertConfig.DecryptPdf import docspell.convert.extern.OcrMyPdfConfig import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig} import docspell.convert.flexmark.MarkdownConfig -case class ConvertConfig( +final case class ConvertConfig( chunkSize: Int, convertedFilenamePart: String, maxImageSize: Int, @@ -18,5 +20,11 @@ case class ConvertConfig( wkhtmlpdf: WkHtmlPdfConfig, tesseract: TesseractConfig, unoconv: UnoconvConfig, - ocrmypdf: OcrMyPdfConfig + ocrmypdf: OcrMyPdfConfig, + decryptPdf: DecryptPdf ) + +object ConvertConfig { + + final case class DecryptPdf(enabled: Boolean, passwords: List[Password]) +} diff --git a/modules/convert/src/main/scala/docspell/convert/RemovePdfEncryption.scala b/modules/convert/src/main/scala/docspell/convert/RemovePdfEncryption.scala new file mode 100644 index 00000000..4d7a469f --- /dev/null +++ b/modules/convert/src/main/scala/docspell/convert/RemovePdfEncryption.scala @@ -0,0 +1,88 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.convert + +import java.io.ByteArrayOutputStream + +import cats.effect._ +import fs2.{Chunk, Pipe, Stream} + +import docspell.common._ + +import org.apache.pdfbox.pdmodel.PDDocument +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException + +/** Using PDFBox, the incoming pdf is loaded while trying the given passwords. */ +object RemovePdfEncryption { + + def apply[F[_]: Sync]( + logger: Logger[F], + passwords: List[Password] + ): Pipe[F, Byte, Byte] = + apply(logger, Stream.emits(passwords)) + + def apply[F[_]: Sync]( + logger: Logger[F], + passwords: Stream[F, Password] + ): Pipe[F, Byte, Byte] = { + val pws = passwords.cons1(Password.empty) + in => + pws + .flatMap(pw => in.through(openPdf[F](logger, pw))) + .head + .flatMap { doc => + if (doc.isEncrypted) { + logger.s.debug("Removing protection/encryption from PDF").drain ++ + Stream.eval(Sync[F].delay(doc.setAllSecurityToBeRemoved(true))).drain ++ + toStream[F](doc) + } else { + in + } + } + .ifEmpty( + logger.s + .info( + s"None of the passwords helped to read the given PDF!" + ) + .drain ++ in + ) + } + + private def openPdf[F[_]: Sync]( + logger: Logger[F], + pw: Password + ): Pipe[F, Byte, PDDocument] = { + def alloc(bytes: Array[Byte]): F[Option[PDDocument]] = + Sync[F].delay(load(bytes, pw)) + + def free(doc: Option[PDDocument]): F[Unit] = + Sync[F].delay(doc.foreach(_.close())) + + val log = + if (pw.isEmpty) Stream.empty + else logger.s.debug(s"Try opening PDF with password: ${pw.pass.take(2)}***").drain + + in => + Stream + .eval(in.compile.to(Array)) + .flatMap(bytes => log ++ Stream.bracket(alloc(bytes))(free)) + .flatMap(opt => opt.map(Stream.emit).getOrElse(Stream.empty)) + } + + private def load(bytes: Array[Byte], pw: Password): Option[PDDocument] = + try Option(PDDocument.load(bytes, pw.pass)) + catch { + case _: InvalidPasswordException => + None + } + + private def toStream[F[_]](doc: PDDocument): Stream[F, Byte] = { + val baos = new ByteArrayOutputStream() + doc.save(baos) + Stream.chunk(Chunk.array(baos.toByteArray)) + } +} diff --git a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala index fa4360e8..8f9f191f 100644 --- a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala +++ b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala @@ -74,11 +74,12 @@ class ConversionTest extends FunSuite with FileChecks { Duration.seconds(20) ), target - ) + ), + ConvertConfig.DecryptPdf(true, Nil) ) val conversion = - Conversion.create[IO](convertConfig, SanitizeHtml.none, logger) + Conversion.create[IO](convertConfig, SanitizeHtml.none, Nil, logger) val bombs = List( ExampleFiles.bombs_20K_gray_jpeg, diff --git a/modules/convert/src/test/scala/docspell/convert/FileChecks.scala b/modules/convert/src/test/scala/docspell/convert/FileChecks.scala index a6a62462..96f251ff 100644 --- a/modules/convert/src/test/scala/docspell/convert/FileChecks.scala +++ b/modules/convert/src/test/scala/docspell/convert/FileChecks.scala @@ -9,6 +9,8 @@ package docspell.convert import java.nio.charset.StandardCharsets import java.nio.file.Files +import scala.util.Try + import cats.data.Kleisli import cats.effect.IO import cats.effect.unsafe.implicits.global @@ -19,6 +21,9 @@ import docspell.common._ import docspell.convert.ConversionResult.Handler import docspell.files.TikaMimetype +import org.apache.pdfbox.pdmodel.PDDocument +import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException + trait FileChecks { implicit class FileCheckOps(p: Path) { @@ -34,15 +39,46 @@ trait FileChecks { def isPlainText: Boolean = isType(MimeType.text("plain")) + + def isUnencryptedPDF: Boolean = + Try(PDDocument.load(p.toNioPath.toFile)).map(_.close()).isSuccess + } + + implicit class ByteStreamOps(delegate: Stream[IO, Byte]) { + def isNonEmpty: IO[Boolean] = + delegate.head.compile.last.map(_.isDefined) + + def isType(mime: MimeType): IO[Boolean] = + TikaMimetype.detect(delegate, MimeTypeHint.none).map(_ == mime) + + def isPDF: IO[Boolean] = + isType(MimeType.pdf) + + def isUnencryptedPDF: IO[Boolean] = + delegate.compile + .to(Array) + .map(PDDocument.load(_)) + .map(_.close()) + .map(_ => true) + + def isEncryptedPDF: IO[Boolean] = + delegate.compile + .to(Array) + .map(PDDocument.load(_)) + .attempt + .map(e => + e.fold( + _.isInstanceOf[InvalidPasswordException], + doc => { + doc.close(); + false + } + ) + ) } def storeFile(file: Path): Pipe[IO, Byte, Path] = - in => - Stream - .eval( - in.compile.to(Array).flatMap(bytes => IO(Files.write(file.toNioPath, bytes))) - ) - .map(p => File.path(p)) + fs2.io.file.Files[IO].writeAll(file).andThen(s => s ++ Stream.emit(file)) def storePdfHandler(file: Path): Handler[IO, Path] = storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1) diff --git a/modules/convert/src/test/scala/docspell/convert/RemovePdfEncryptionTest.scala b/modules/convert/src/test/scala/docspell/convert/RemovePdfEncryptionTest.scala new file mode 100644 index 00000000..803f3174 --- /dev/null +++ b/modules/convert/src/test/scala/docspell/convert/RemovePdfEncryptionTest.scala @@ -0,0 +1,82 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.convert + +import cats.effect.IO +import fs2.Stream + +import docspell.common._ +import docspell.files.ExampleFiles + +import munit.CatsEffectSuite + +class RemovePdfEncryptionTest extends CatsEffectSuite with FileChecks { + val logger: Logger[IO] = Logger.log4s(org.log4s.getLogger) + + private val protectedPdf = + ExampleFiles.secured_protected_test123_pdf.readURL[IO](16 * 1024) + private val encryptedPdf = + ExampleFiles.secured_encrypted_test123_pdf.readURL[IO](16 * 1024) + private val plainPdf = ExampleFiles.letter_en_pdf.readURL[IO](16 * 1024) + + test("have encrypted pdfs") { + for { + _ <- assertIO(encryptedPdf.isEncryptedPDF, true) + _ <- assertIO(encryptedPdf.isEncryptedPDF, true) + } yield () + } + + test("decrypt pdf") { + encryptedPdf + .through(RemovePdfEncryption(logger, List(Password("test123")))) + .isUnencryptedPDF + .map(assert(_)) + } + + test("decrypt pdf with multiple passwords") { + encryptedPdf + .through( + RemovePdfEncryption( + logger, + List("xy123", "123xy", "test123", "abc123").map(Password(_)) + ) + ) + .isUnencryptedPDF + .map(assert(_)) + } + + test("remove protection") { + protectedPdf + .through(RemovePdfEncryption(logger, Nil)) + .isUnencryptedPDF + .map(assert(_)) + } + + test("read unprotected pdf") { + plainPdf + .through(RemovePdfEncryption(logger, Nil)) + .isUnencryptedPDF + .map(assert(_)) + } + + test("decrypt with multiple passwords, stop on first") { + val passwords: Stream[IO, String] = + Stream("test123") ++ Stream.raiseError[IO](new Exception("is not called")) + val decrypt = RemovePdfEncryption(logger, passwords.map(Password(_))) + encryptedPdf + .through(decrypt) + .isUnencryptedPDF + .map(assert(_)) + } + + test("return input stream if nothing helps") { + encryptedPdf + .through(RemovePdfEncryption(logger, List("a", "b").map(Password(_)))) + .isEncryptedPDF + .map(assert(_)) + } +} diff --git a/modules/files/src/test/resources/secured/encrypted-test123.pdf b/modules/files/src/test/resources/secured/encrypted-test123.pdf new file mode 100644 index 00000000..2750d634 Binary files /dev/null and b/modules/files/src/test/resources/secured/encrypted-test123.pdf differ diff --git a/modules/files/src/test/resources/secured/protected-test123.pdf b/modules/files/src/test/resources/secured/protected-test123.pdf new file mode 100644 index 00000000..6261e2e8 Binary files /dev/null and b/modules/files/src/test/resources/secured/protected-test123.pdf differ diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index c6ad2cdd..4313771a 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -586,6 +586,25 @@ Docpell Update Check } working-dir = ${java.io.tmpdir}"/docspell-convert" } + + # Allows to try to decrypt a PDF with encryption or protection. If + # enabled, a PDFs encryption or protection will be removed during + # conversion. + # + # For encrypted PDFs, this is necessary to be processed, because + # docspell needs to read it. It also requires to specify a + # password here. All passwords are tried when reading a PDF. + # + # This is enabled by default with an empty password list. This + # removes protection from PDFs, which is better for processing. + # + # Passwords can be given here and each collective can maintain + # their passwords as well. But if the `enabled` setting below is + # `false`, then no attempt at decrypting is done. + decrypt-pdf = { + enabled = true + passwords = [] + } } # The same section is also present in the rest-server config. It is diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 44e2613f..0108ef98 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -77,17 +77,27 @@ object ConvertPdf { ctx: Context[F, ProcessItemArgs], item: ItemData )(ra: RAttachment, mime: MimeType): F[(RAttachment, Option[RAttachmentMeta])] = - Conversion.create[F](cfg, sanitizeHtml, ctx.logger).use { conv => - mime match { - case mt => - val data = ctx.store.fileStore.getBytes(ra.fileId) - val handler = conversionHandler[F](ctx, cfg, ra, item) - ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> - conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( - data - ) + loadCollectivePasswords(ctx).flatMap(collPass => + Conversion.create[F](cfg, sanitizeHtml, collPass, ctx.logger).use { conv => + mime match { + case mt => + val data = ctx.store.fileStore.getBytes(ra.fileId) + val handler = conversionHandler[F](ctx, cfg, ra, item) + ctx.logger + .info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *> + conv.toPDF(DataType(mt), ctx.args.meta.language, handler)( + data + ) + } } - } + ) + + private def loadCollectivePasswords[F[_]: Async]( + ctx: Context[F, ProcessItemArgs] + ): F[List[Password]] = + ctx.store + .transact(RCollectivePassword.findAll(ctx.args.meta.collective)) + .map(_.map(_.password).distinct) private def conversionHandler[F[_]: Sync]( ctx: Context[F, ProcessItemArgs], diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 4c6b31ee..c5e95d29 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -5635,6 +5635,7 @@ components: - integrationEnabled - classifier - emptyTrash + - passwords properties: language: type: string @@ -5648,6 +5649,11 @@ components: $ref: "#/components/schemas/ClassifierSetting" emptyTrash: $ref: "#/components/schemas/EmptyTrashSetting" + passwords: + type: array + items: + type: string + format: password EmptyTrashSetting: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala index 75690d64..f1cd03d8 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala @@ -12,8 +12,7 @@ import cats.implicits._ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.backend.ops.OCollective -import docspell.common.EmptyTrashArgs -import docspell.common.ListType +import docspell.common._ import docspell.restapi.model._ import docspell.restserver.conv.Conversions import docspell.restserver.http4s._ @@ -62,7 +61,8 @@ object CollectiveRoutes { settings.emptyTrash.schedule, settings.emptyTrash.minAge ) - ) + ), + settings.passwords.map(Password.apply) ) res <- backend.collective @@ -89,7 +89,8 @@ object CollectiveRoutes { EmptyTrashSetting( trash.schedule, trash.minAge - ) + ), + settDb.map(_.passwords).getOrElse(Nil).map(_.pass) ) ) resp <- sett.toResponse() diff --git a/modules/store/src/main/resources/db/migration/h2/V1.27.0__collective_passwords.sql b/modules/store/src/main/resources/db/migration/h2/V1.27.0__collective_passwords.sql new file mode 100644 index 00000000..223fa8d5 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/h2/V1.27.0__collective_passwords.sql @@ -0,0 +1,7 @@ +CREATE TABLE "collective_password" ( + "id" varchar(254) not null primary key, + "cid" varchar(254) not null, + "pass" varchar(254) not null, + "created" timestamp not null, + foreign key ("cid") references "collective"("cid") on delete cascade +) diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.27.0__collective_passwords.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.27.0__collective_passwords.sql new file mode 100644 index 00000000..2224e560 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.27.0__collective_passwords.sql @@ -0,0 +1,7 @@ +CREATE TABLE `collective_password` ( + `id` varchar(254) not null primary key, + `cid` varchar(254) not null, + `pass` varchar(254) not null, + `created` timestamp not null, + foreign key (`cid`) references `collective`(`cid`) on delete cascade +) diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.27.0__collective_passwords.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.27.0__collective_passwords.sql new file mode 100644 index 00000000..223fa8d5 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.27.0__collective_passwords.sql @@ -0,0 +1,7 @@ +CREATE TABLE "collective_password" ( + "id" varchar(254) not null primary key, + "cid" varchar(254) not null, + "pass" varchar(254) not null, + "created" timestamp not null, + foreign key ("cid") references "collective"("cid") on delete cascade +) diff --git a/modules/store/src/main/scala/docspell/store/records/RCollective.scala b/modules/store/src/main/scala/docspell/store/records/RCollective.scala index dd0afce2..906277c6 100644 --- a/modules/store/src/main/scala/docspell/store/records/RCollective.scala +++ b/modules/store/src/main/scala/docspell/store/records/RCollective.scala @@ -89,7 +89,8 @@ object RCollective { case None => REmptyTrashSetting.delete(cid) } - } yield n1 + n2 + n3 + n4 <- RCollectivePassword.replaceAll(cid, settings.passwords) + } yield n1 + n2 + n3 + n4 // this hides categories that have been deleted in the meantime // they are finally removed from the json array once the learn classifier task is run @@ -99,10 +100,12 @@ object RCollective { prev <- OptionT.fromOption[ConnectionIO](sett.classifier) cats <- OptionT.liftF(RTag.listCategories(coll)) next = prev.copy(categories = prev.categories.intersect(cats)) - } yield sett.copy(classifier = Some(next))).value + pws <- OptionT.liftF(RCollectivePassword.findAll(coll)) + } yield sett.copy(classifier = Some(next), passwords = pws.map(_.password))).value private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = { import RClassifierSetting.stringListMeta + val c = RCollective.as("c") val cs = RClassifierSetting.as("cs") val es = REmptyTrashSetting.as("es") @@ -116,7 +119,8 @@ object RCollective { cs.categories.s, cs.listType.s, es.schedule.s, - es.minAge.s + es.minAge.s, + const(0) //dummy value to load Nil as list of passwords ), from(c).leftJoin(cs, cs.cid === c.id).leftJoin(es, es.cid === c.id), c.id === coll @@ -170,7 +174,11 @@ object RCollective { language: Language, integrationEnabled: Boolean, classifier: Option[RClassifierSetting.Classifier], - emptyTrash: Option[REmptyTrashSetting.EmptyTrash] + emptyTrash: Option[REmptyTrashSetting.EmptyTrash], + passwords: List[Password] ) + implicit val passwordListMeta: Read[List[Password]] = + Read[Int].map(_ => Nil: List[Password]) + } diff --git a/modules/store/src/main/scala/docspell/store/records/RCollectivePassword.scala b/modules/store/src/main/scala/docspell/store/records/RCollectivePassword.scala new file mode 100644 index 00000000..c7931d20 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/records/RCollectivePassword.scala @@ -0,0 +1,87 @@ +/* + * Copyright 2020 Eike K. & Contributors + * + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package docspell.store.records + +import cats.data.NonEmptyList +import cats.effect._ +import cats.implicits._ + +import docspell.common._ +import docspell.store.qb.DSL._ +import docspell.store.qb._ + +import doobie._ +import doobie.implicits._ + +final case class RCollectivePassword( + id: Ident, + cid: Ident, + password: Password, + created: Timestamp +) {} + +object RCollectivePassword { + final case class Table(alias: Option[String]) extends TableDef { + val tableName: String = "collective_password" + + val id = Column[Ident]("id", this) + val cid = Column[Ident]("cid", this) + val password = Column[Password]("pass", this) + val created = Column[Timestamp]("created", this) + + val all: NonEmptyList[Column[_]] = + NonEmptyList.of(id, cid, password, created) + } + + val T = Table(None) + def as(alias: String): Table = + Table(Some(alias)) + + def createNew[F[_]: Sync](cid: Ident, pw: Password): F[RCollectivePassword] = + for { + id <- Ident.randomId[F] + time <- Timestamp.current[F] + } yield RCollectivePassword(id, cid, pw, time) + + def insert(v: RCollectivePassword): ConnectionIO[Int] = + DML.insert( + T, + T.all, + fr"${v.id}, ${v.cid},${v.password},${v.created}" + ) + + def upsert(v: RCollectivePassword): ConnectionIO[Int] = + for { + k <- deleteByPassword(v.cid, v.password) + n <- insert(v) + } yield n + k + + def deleteById(id: Ident): ConnectionIO[Int] = + DML.delete(T, T.id === id) + + def deleteByPassword(cid: Ident, pw: Password): ConnectionIO[Int] = + DML.delete(T, T.password === pw && T.cid === cid) + + def findAll(cid: Ident): ConnectionIO[List[RCollectivePassword]] = + Select(select(T.all), from(T), T.cid === cid).build + .query[RCollectivePassword] + .to[List] + + def replaceAll(cid: Ident, pws: List[Password]): ConnectionIO[Int] = + for { + k <- DML.delete(T, T.cid === cid) + pw <- pws.traverse(p => createNew[ConnectionIO](cid, p)) + n <- + if (pws.isEmpty) 0.pure[ConnectionIO] + else + DML.insertMany( + T, + T.all, + pw.map(p => fr"${p.id},${p.cid},${p.password},${p.created}") + ) + } yield k + n +} diff --git a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm index fee15790..f7994b4a 100644 --- a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm +++ b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm @@ -22,6 +22,7 @@ import Comp.ClassifierSettingsForm import Comp.Dropdown import Comp.EmptyTrashForm import Comp.MenuBar as MB +import Comp.StringListInput import Data.DropdownStyle as DS import Data.Flags exposing (Flags) import Data.Language exposing (Language) @@ -30,6 +31,7 @@ import Html exposing (..) import Html.Attributes exposing (..) import Html.Events exposing (onCheck, onClick, onInput) import Http +import Markdown import Messages.Comp.CollectiveSettingsForm exposing (Texts) import Styles as S @@ -44,6 +46,8 @@ type alias Model = , startClassifierResult : ClassifierResult , emptyTrashModel : Comp.EmptyTrashForm.Model , startEmptyTrashResult : EmptyTrashResult + , passwordModel : Comp.StringListInput.Model + , passwords : List String } @@ -96,6 +100,8 @@ init flags settings = , startClassifierResult = ClassifierResultInitial , emptyTrashModel = em , startEmptyTrashResult = EmptyTrashResultInitial + , passwordModel = Comp.StringListInput.init + , passwords = settings.passwords } , Cmd.batch [ Cmd.map ClassifierSettingMsg cc, Cmd.map EmptyTrashMsg ec ] ) @@ -114,6 +120,7 @@ getSettings model = , integrationEnabled = model.intEnabled , classifier = cls , emptyTrash = trash + , passwords = model.passwords } ) (Comp.ClassifierSettingsForm.getSettings model.classifierModel) @@ -133,6 +140,7 @@ type Msg | StartEmptyTrashTask | StartClassifierResp (Result Http.Error BasicResult) | StartEmptyTrashResp (Result Http.Error BasicResult) + | PasswordMsg Comp.StringListInput.Msg update : Flags -> Msg -> Model -> ( Model, Cmd Msg, Maybe CollectiveSettings ) @@ -285,6 +293,27 @@ update flags msg model = , Nothing ) + PasswordMsg lm -> + let + ( pm, action ) = + Comp.StringListInput.update lm model.passwordModel + + pws = + case action of + Comp.StringListInput.AddAction pw -> + pw :: model.passwords + + Comp.StringListInput.RemoveAction pw -> + List.filter (\e -> e /= pw) model.passwords + + Comp.StringListInput.NoAction -> + model.passwords + in + ( { model | passwordModel = pm, passwords = pws } + , Cmd.none + , Nothing + ) + --- View2 @@ -460,6 +489,18 @@ view2 flags texts settings model = ] ] ] + , div [] + [ h2 [ class S.header2 ] + [ text texts.passwords + ] + , div [ class "mb-4" ] + [ div [ class "opacity-50 text-sm" ] + [ Markdown.toHtml [] texts.passwordsInfo + ] + , Html.map PasswordMsg + (Comp.StringListInput.view2 model.passwords model.passwordModel) + ] + ] ] diff --git a/modules/webapp/src/main/elm/Messages/Comp/CollectiveSettingsForm.elm b/modules/webapp/src/main/elm/Messages/Comp/CollectiveSettingsForm.elm index 79de30cc..ea6248f1 100644 --- a/modules/webapp/src/main/elm/Messages/Comp/CollectiveSettingsForm.elm +++ b/modules/webapp/src/main/elm/Messages/Comp/CollectiveSettingsForm.elm @@ -44,6 +44,8 @@ type alias Texts = , fulltextReindexSubmitted : String , fulltextReindexOkMissing : String , emptyTrash : String + , passwords : String + , passwordsInfo : String } @@ -77,6 +79,8 @@ gb = , fulltextReindexOkMissing = "Please type OK in the field if you really want to start re-indexing your data." , emptyTrash = "Empty Trash" + , passwords = "Passwords" + , passwordsInfo = "These passwords are used when encrypted PDFs are being processed. Please note, that they are stored in the database as **plain text**!" } @@ -110,4 +114,6 @@ de = , fulltextReindexOkMissing = "Bitte tippe OK in das Feld ein, wenn Du wirklich den Index neu erzeugen möchtest." , emptyTrash = "Papierkorb löschen" + , passwords = "Passwörter" + , passwordsInfo = "Diese Passwörter werden zum Lesen von verschlüsselten PDFs verwendet. Diese Passwörter werden in der Datanbank **in Klartext** gespeichert!" } diff --git a/website/site/content/docs/features/_index.md b/website/site/content/docs/features/_index.md index b5855066..03da2d7d 100644 --- a/website/site/content/docs/features/_index.md +++ b/website/site/content/docs/features/_index.md @@ -56,6 +56,7 @@ description = "A list of features and limitations." - Everything stored in a SQL database: PostgreSQL, MariaDB or H2 - H2 is embedded, a "one-file-only" database, avoids installing db servers +- Support for encrypted PDFs - Files supported: - Documents: - PDF