diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala index 74ba6374..091d9e16 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala @@ -37,14 +37,19 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift]( def classify( logger: Logger[F], model: ClassifierModel, - text: String + txt: String ): F[Option[String]] = - Sync[F].delay { - val cls = ColumnDataClassifier.getClassifier( - model.model.normalize().toAbsolutePath().toString() - ) - val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text))) - Option(cat) + Option(txt).map(_.trim).filter(_.nonEmpty) match { + case Some(text) => + Sync[F].delay { + val cls = ColumnDataClassifier.getClassifier( + model.model.normalize().toAbsolutePath().toString() + ) + val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text))) + Option(cat) + } + case None => + (None: Option[String]).pure[F] } // --- helpers diff --git a/modules/backend/src/main/scala/docspell/backend/JobFactory.scala b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala index bc05a188..fdb0d860 100644 --- a/modules/backend/src/main/scala/docspell/backend/JobFactory.scala +++ b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala @@ -8,6 +8,45 @@ import docspell.store.records.RJob object JobFactory { + def makePreview[F[_]: Sync]( + args: MakePreviewArgs, + account: Option[AccountId] + ): F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + job = RJob.newJob( + id, + MakePreviewArgs.taskName, + account.map(_.collective).getOrElse(DocspellSystem.taskGroup), + args, + s"Generate preview image", + now, + account.map(_.user).getOrElse(DocspellSystem.user), + Priority.Low, + Some(MakePreviewArgs.taskName / args.attachment) + ) + } yield job + + def allPreviews[F[_]: Sync]( + args: AllPreviewsArgs, + submitter: Option[Ident] + ): F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + } yield RJob.newJob( + id, + AllPreviewsArgs.taskName, + args.collective.getOrElse(DocspellSystem.taskGroup), + args, + "Create preview images", + now, + submitter.getOrElse(DocspellSystem.taskGroup), + Priority.Low, + Some(DocspellSystem.allPreviewTaskTracker) + ) + def convertAllPdfs[F[_]: Sync]( collective: Option[Ident], account: AccountId, diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala index 5e9b5aaf..a4f06986 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OCollective.scala @@ -4,9 +4,11 @@ import cats.effect.{Effect, Resource} import cats.implicits._ import fs2.Stream +import docspell.backend.JobFactory import docspell.backend.PasswordCrypt import docspell.backend.ops.OCollective._ import docspell.common._ +import docspell.store.UpdateResult import docspell.store.queries.QCollective import docspell.store.queue.JobQueue import docspell.store.records._ @@ -51,6 +53,15 @@ trait OCollective[F[_]] { def findEnabledSource(sourceId: Ident): F[Option[RSource]] def startLearnClassifier(collective: Ident): F[Unit] + + /** Submits a task that (re)generates the preview images for all + * attachments of the given collective. + */ + def generatePreviews( + storeMode: MakePreviewArgs.StoreMode, + account: AccountId, + notifyJoex: Boolean + ): F[UpdateResult] } object OCollective { @@ -210,5 +221,20 @@ object OCollective { def findEnabledSource(sourceId: Ident): F[Option[RSource]] = store.transact(RSource.findEnabled(sourceId)) + + def generatePreviews( + storeMode: MakePreviewArgs.StoreMode, + account: AccountId, + notifyJoex: Boolean + ): F[UpdateResult] = + for { + job <- JobFactory.allPreviews[F]( + AllPreviewsArgs(Some(account.collective), storeMode), + Some(account.user) + ) + _ <- queue.insertIfNew(job) + _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] + } yield UpdateResult.success + }) } diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index 492d613a..13ee91c7 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -175,6 +175,15 @@ trait OItem[F[_]] { account: AccountId, notifyJoex: Boolean ): F[UpdateResult] + + /** Submits a task that (re)generates the preview image for an + * attachment. + */ + def generatePreview( + args: MakePreviewArgs, + account: AccountId, + notifyJoex: Boolean + ): F[UpdateResult] } object OItem { @@ -656,6 +665,17 @@ object OItem { _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] } yield UpdateResult.success + def generatePreview( + args: MakePreviewArgs, + account: AccountId, + notifyJoex: Boolean + ): F[UpdateResult] = + for { + job <- JobFactory.makePreview[F](args, account.some) + _ <- queue.insertIfNew(job) + _ <- if (notifyJoex) joex.notifyAllNodes else ().pure[F] + } yield UpdateResult.success + private def onSuccessIgnoreError(update: F[Unit])(ar: UpdateResult): F[Unit] = ar match { case UpdateResult.Success => diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala index 44fe2e71..6a5cb49b 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala @@ -36,6 +36,13 @@ trait OItemSearch[F[_]] { collective: Ident ): F[Option[AttachmentArchiveData[F]]] + def findAttachmentPreview( + id: Ident, + collective: Ident + ): F[Option[AttachmentPreviewData[F]]] + + def findItemPreview(item: Ident, collective: Ident): F[Option[AttachmentPreviewData[F]]] + def findAttachmentMeta(id: Ident, collective: Ident): F[Option[RAttachmentMeta]] def findByFileCollective(checksum: String, collective: Ident): F[Vector[RItem]] @@ -82,6 +89,15 @@ object OItemSearch { val fileId = rs.fileId } + case class AttachmentPreviewData[F[_]]( + rs: RAttachmentPreview, + meta: FileMeta, + data: Stream[F, Byte] + ) extends BinaryData[F] { + val name = rs.name + val fileId = rs.fileId + } + case class AttachmentArchiveData[F[_]]( rs: RAttachmentArchive, meta: FileMeta, @@ -154,6 +170,46 @@ object OItemSearch { (None: Option[AttachmentSourceData[F]]).pure[F] }) + def findAttachmentPreview( + id: Ident, + collective: Ident + ): F[Option[AttachmentPreviewData[F]]] = + store + .transact(RAttachmentPreview.findByIdAndCollective(id, collective)) + .flatMap({ + case Some(ra) => + makeBinaryData(ra.fileId) { m => + AttachmentPreviewData[F]( + ra, + m, + store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + ) + } + + case None => + (None: Option[AttachmentPreviewData[F]]).pure[F] + }) + + def findItemPreview( + item: Ident, + collective: Ident + ): F[Option[AttachmentPreviewData[F]]] = + store + .transact(RAttachmentPreview.findByItemAndCollective(item, collective)) + .flatMap({ + case Some(ra) => + makeBinaryData(ra.fileId) { m => + AttachmentPreviewData[F]( + ra, + m, + store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m)) + ) + } + + case None => + (None: Option[AttachmentPreviewData[F]]).pure[F] + }) + def findAttachmentArchive( id: Ident, collective: Ident diff --git a/modules/common/src/main/scala/docspell/common/AllPreviewsArgs.scala b/modules/common/src/main/scala/docspell/common/AllPreviewsArgs.scala new file mode 100644 index 00000000..b4ee054f --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/AllPreviewsArgs.scala @@ -0,0 +1,26 @@ +package docspell.common + +import io.circe.generic.semiauto._ +import io.circe.{Decoder, Encoder} + +/** Arguments for the `AllPreviewsTask` that submits tasks to + * generates a preview image for attachments. + * + * It can replace the current preview image or only generate one, if + * it is missing. If no collective is specified, it considers all + * attachments. + */ +case class AllPreviewsArgs( + collective: Option[Ident], + storeMode: MakePreviewArgs.StoreMode +) + +object AllPreviewsArgs { + + val taskName = Ident.unsafe("all-previews") + + implicit val jsonEncoder: Encoder[AllPreviewsArgs] = + deriveEncoder[AllPreviewsArgs] + implicit val jsonDecoder: Decoder[AllPreviewsArgs] = + deriveDecoder[AllPreviewsArgs] +} diff --git a/modules/common/src/main/scala/docspell/common/DocspellSystem.scala b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala index 52cbb717..ad410281 100644 --- a/modules/common/src/main/scala/docspell/common/DocspellSystem.scala +++ b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala @@ -2,8 +2,8 @@ package docspell.common object DocspellSystem { - val user = Ident.unsafe("docspell-system") - val taskGroup = user - val migrationTaskTracker = Ident.unsafe("full-text-index-tracker") - + val user = Ident.unsafe("docspell-system") + val taskGroup = user + val migrationTaskTracker = Ident.unsafe("full-text-index-tracker") + val allPreviewTaskTracker = Ident.unsafe("generate-all-previews") } diff --git a/modules/common/src/main/scala/docspell/common/FileName.scala b/modules/common/src/main/scala/docspell/common/FileName.scala new file mode 100644 index 00000000..1bc9184c --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/FileName.scala @@ -0,0 +1,48 @@ +package docspell.common + +case class FileName private (name: String) { + + private[this] val (base, ext) = + name.lastIndexOf('.') match { + case -1 => (name, None) + case n => (name.take(n), Some(name.drop(n + 1))) + } + + /** Returns the name part without the extension. If there is no + * extension, it is the same as fullname. + */ + def baseName: String = + base + + /** Returns the extension part if available without the dot. */ + def extension: Option[String] = + ext + + def fullName: String = + name + + /** Creates a new name where part is spliced into the name before the + * extension, separated by separator. + */ + def withPart(part: String, sep: Char): FileName = + if (part.isEmpty()) this + else + ext + .map(e => new FileName(s"${base}${sep}${part}.${e}")) + .getOrElse(new FileName(s"${base}${sep}${part}")) + + /** Create a new name using the given extension. */ + def withExtension(newExt: String): FileName = + if (newExt.isEmpty()) new FileName(base) + else new FileName(s"${base}.${newExt}") + +} +object FileName { + + def apply(name: String): FileName = + Option(name) + .map(_.trim) + .filter(_.nonEmpty) + .map(n => new FileName(n)) + .getOrElse(new FileName("unknown-file")) +} diff --git a/modules/common/src/main/scala/docspell/common/MakePreviewArgs.scala b/modules/common/src/main/scala/docspell/common/MakePreviewArgs.scala new file mode 100644 index 00000000..ebe94107 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/MakePreviewArgs.scala @@ -0,0 +1,59 @@ +package docspell.common + +import io.circe.generic.semiauto._ +import io.circe.{Decoder, Encoder} + +/** Arguments for the `MakePreviewTask` that generates a preview image + * for an attachment. + * + * It can replace the current preview image or only generate one, if + * it is missing. + */ +case class MakePreviewArgs( + attachment: Ident, + store: MakePreviewArgs.StoreMode +) + +object MakePreviewArgs { + + val taskName = Ident.unsafe("make-preview") + + def replace(attach: Ident): MakePreviewArgs = + MakePreviewArgs(attach, StoreMode.Replace) + + def whenMissing(attach: Ident): MakePreviewArgs = + MakePreviewArgs(attach, StoreMode.WhenMissing) + + sealed trait StoreMode extends Product { + final def name: String = + productPrefix.toLowerCase() + } + object StoreMode { + + /** Replace any preview file that may already exist. */ + case object Replace extends StoreMode + + /** Only create a preview image, if it is missing. */ + case object WhenMissing extends StoreMode + + def fromString(str: String): Either[String, StoreMode] = + Option(str).map(_.trim.toLowerCase()) match { + case Some("replace") => Right(Replace) + case Some("whenmissing") => Right(WhenMissing) + case _ => Left(s"Invalid store mode: $str") + } + + implicit val jsonEncoder: Encoder[StoreMode] = + Encoder.encodeString.contramap(_.name) + + implicit val jsonDecoder: Decoder[StoreMode] = + Decoder.decodeString.emap(fromString) + } + + implicit val jsonEncoder: Encoder[MakePreviewArgs] = + deriveEncoder[MakePreviewArgs] + + implicit val jsonDecoder: Decoder[MakePreviewArgs] = + deriveDecoder[MakePreviewArgs] + +} diff --git a/modules/common/src/test/scala/docspell/common/FileNameTest.scala b/modules/common/src/test/scala/docspell/common/FileNameTest.scala new file mode 100644 index 00000000..8b2778d7 --- /dev/null +++ b/modules/common/src/test/scala/docspell/common/FileNameTest.scala @@ -0,0 +1,58 @@ +package docspell.common + +import minitest._ + +object FileNameTest extends SimpleTestSuite { + + test("make filename") { + val data = List( + (FileName("test"), "test", None), + (FileName("test.pdf"), "test", Some("pdf")), + (FileName("bla.xml.gz"), "bla.xml", Some("gz")), + (FileName(""), "unknown-file", None) + ) + + data.foreach { case (fn, base, ext) => + assertEquals(fn.baseName, base) + assertEquals(fn.extension, ext) + } + } + + test("with part") { + assertEquals( + FileName("test.pdf").withPart("converted", '_'), + FileName("test_converted.pdf") + ) + assertEquals( + FileName("bla.xml.gz").withPart("converted", '_'), + FileName("bla.xml_converted.gz") + ) + assertEquals( + FileName("test").withPart("converted", '_'), + FileName("test_converted") + ) + assertEquals( + FileName("test").withPart("", '_'), + FileName("test") + ) + } + + test("with extension") { + assertEquals( + FileName("test.pdf").withExtension("xml"), + FileName("test.xml") + ) + assertEquals( + FileName("test").withExtension("xml"), + FileName("test.xml") + ) + assertEquals( + FileName("test.pdf.gz").withExtension("xml"), + FileName("test.pdf.xml") + ) + assertEquals( + FileName("test.pdf.gz").withExtension(""), + FileName("test.pdf") + ) + } +} diff --git a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala index f51791c0..8013f3bb 100644 --- a/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala +++ b/modules/convert/src/main/scala/docspell/convert/ConvertConfig.scala @@ -6,6 +6,7 @@ import docspell.convert.flexmark.MarkdownConfig case class ConvertConfig( chunkSize: Int, + convertedFilenamePart: String, maxImageSize: Int, markdown: MarkdownConfig, wkhtmlpdf: WkHtmlPdfConfig, diff --git a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala index 31dadd88..4d7e80ed 100644 --- a/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala +++ b/modules/convert/src/test/scala/docspell/convert/ConversionTest.scala @@ -23,6 +23,7 @@ object ConversionTest extends SimpleTestSuite with FileChecks { val convertConfig = ConvertConfig( 8192, + "converted", 3000 * 3000, MarkdownConfig("body { padding: 2em 5em; }"), WkHtmlPdfConfig( diff --git a/modules/extract/src/main/scala/docspell/extract/ExtractConfig.scala b/modules/extract/src/main/scala/docspell/extract/ExtractConfig.scala index b4951686..e283b720 100644 --- a/modules/extract/src/main/scala/docspell/extract/ExtractConfig.scala +++ b/modules/extract/src/main/scala/docspell/extract/ExtractConfig.scala @@ -1,5 +1,6 @@ package docspell.extract import docspell.extract.ocr.OcrConfig +import docspell.extract.pdfbox.PreviewConfig -case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig) +case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig, preview: PreviewConfig) diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfLoader.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfLoader.scala new file mode 100644 index 00000000..47e04543 --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfLoader.scala @@ -0,0 +1,24 @@ +package docspell.extract.pdfbox + +import cats.effect._ +import cats.implicits._ +import fs2.Stream + +import org.apache.pdfbox.pdmodel.PDDocument + +object PdfLoader { + + private def readBytes1[F[_]: Sync](bytes: Array[Byte]): F[PDDocument] = + Sync[F].delay(PDDocument.load(bytes)) + + private def closePDDocument[F[_]: Sync](pd: PDDocument): F[Unit] = + Sync[F].delay(pd.close()) + + def withDocumentBytes[F[_]: Sync, A](pdf: Array[Byte])(f: PDDocument => F[A]): F[A] = + Sync[F].bracket(readBytes1(pdf))(f)(pd => closePDDocument(pd)) + + def withDocumentStream[F[_]: Sync, A](pdf: Stream[F, Byte])( + f: PDDocument => F[A] + ): F[A] = + pdf.compile.to(Array).flatMap(bytes => withDocumentBytes(bytes)(f)) +} diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxPreview.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxPreview.scala new file mode 100644 index 00000000..226c6e82 --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxPreview.scala @@ -0,0 +1,56 @@ +package docspell.extract.pdfbox + +import java.awt.image.BufferedImage +import java.awt.image.RenderedImage +import javax.imageio.ImageIO + +import cats.effect._ +import cats.implicits._ +import fs2.Chunk +import fs2.Stream + +import org.apache.commons.io.output.ByteArrayOutputStream +import org.apache.pdfbox.pdmodel.PDDocument +import org.apache.pdfbox.rendering.PDFRenderer + +trait PdfboxPreview[F[_]] { + + def previewPNG(pdf: Stream[F, Byte]): F[Option[Stream[F, Byte]]] + +} + +object PdfboxPreview { + + def apply[F[_]: Sync](cfg: PreviewConfig): F[PdfboxPreview[F]] = + Sync[F].pure(new PdfboxPreview[F] { + + def previewImage(pdf: Stream[F, Byte]): F[Option[BufferedImage]] = + PdfLoader.withDocumentStream(pdf)(doc => + Sync[F].delay(getPageImage(doc, 0, cfg.dpi)) + ) + + def previewPNG(pdf: Stream[F, Byte]): F[Option[Stream[F, Byte]]] = + previewImage(pdf).map(_.map(pngStream[F])) + + }) + + private def getPageImage( + pdoc: PDDocument, + page: Int, + dpi: Float + ): Option[BufferedImage] = { + val count = pdoc.getNumberOfPages + if (count <= 0 || page < 0 || count <= page) None + else { + val renderer = new PDFRenderer(pdoc) + Option(renderer.renderImageWithDPI(page, dpi)) + } + } + + private def pngStream[F[_]](img: RenderedImage): Stream[F, Byte] = { + val out = new ByteArrayOutputStream() + ImageIO.write(img, "PNG", out) + Stream.chunk(Chunk.bytes(out.toByteArray())) + } + +} diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PreviewConfig.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PreviewConfig.scala new file mode 100644 index 00000000..db3bc56b --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PreviewConfig.scala @@ -0,0 +1,3 @@ +package docspell.extract.pdfbox + +case class PreviewConfig(dpi: Float) diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala new file mode 100644 index 00000000..c07c4c64 --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxPreviewTest.scala @@ -0,0 +1,46 @@ +package docspell.extract.pdfbox + +import cats.effect._ +import docspell.files.{ExampleFiles, TestFiles} +import minitest.SimpleTestSuite +import java.nio.file.Path +import fs2.Stream + +object PdfboxPreviewTest extends SimpleTestSuite { + val blocker = TestFiles.blocker + implicit val CS = TestFiles.CS + + val testPDFs = List( + ExampleFiles.letter_de_pdf -> "83bdb379fe9ce86e830adfbe11238808bed9da6e31c1b66687d70b6b59a0d815", + ExampleFiles.letter_en_pdf -> "699655a162c0c21dd9f19d8638f4e03811c6626a52bb30a1ac733d7fa5638932", + ExampleFiles.scanner_pdf13_pdf -> "a1680b80b42d8e04365ffd1e806ea2a8adb0492104cc41d8b40435b0fe4d4e65" + ) + + test("extract first page image from PDFs") { + testPDFs.foreach { case (file, checksum) => + val data = file.readURL[IO](8192, blocker) + val sha256out = + Stream + .eval(PdfboxPreview[IO](PreviewConfig(48))) + .evalMap(_.previewPNG(data)) + .flatMap(_.get) + .through(fs2.hash.sha256) + .chunks + .map(_.toByteVector) + .fold1(_ ++ _) + .compile + .lastOrError + .map(_.toHex.toLowerCase) + + assertEquals(sha256out.unsafeRunSync(), checksum) + } + } + + def writeToFile(data: Stream[IO, Byte], file: Path): IO[Unit] = + data + .through( + fs2.io.file.writeAll(file, blocker) + ) + .compile + .drain +} diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 23ec5b47..f8deb8e7 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -172,6 +172,18 @@ docspell.joex { min-text-len = 500 } + preview { + # When rendering a pdf page, use this dpi. This results in + # scaling the image. A standard A4 page rendered at 96dpi + # results in roughly 790x1100px image. Using 32 results in + # roughly 200x300px image. + # + # Note, when this is changed, you might want to re-generate + # preview images. Check the api for this, there is an endpoint + # to regenerate all for a collective. + dpi = 32 + } + # Extracting text using OCR works for image and pdf files. It will # first run ghostscript to create a gray image from a pdf. Then # unpaper is run to optimize the image for the upcoming ocr, which @@ -328,6 +340,11 @@ docspell.joex { # as used with the rest server. chunk-size = 524288 + # A string used to change the filename of the converted pdf file. + # If empty, the original file name is used for the pdf file ( the + # extension is always replaced with `pdf`). + converted-filename-part = "converted" + # When reading images, this is the maximum size. Images that are # larger are not processed. max-image-size = ${docspell.joex.extraction.ocr.max-image-size} diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 7c3f57fc..2b9b96c5 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -18,6 +18,7 @@ import docspell.joex.learn.LearnClassifierTask import docspell.joex.notify._ import docspell.joex.pdfconv.ConvertAllPdfTask import docspell.joex.pdfconv.PdfConvTask +import docspell.joex.preview._ import docspell.joex.process.ItemHandler import docspell.joex.process.ReProcessItem import docspell.joex.scanmailbox._ @@ -68,7 +69,10 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer]( HouseKeepingTask .periodicTask[F](cfg.houseKeeping.schedule) .flatMap(pstore.insert) *> - MigrationTask.job.flatMap(queue.insertIfNew) + MigrationTask.job.flatMap(queue.insertIfNew) *> + AllPreviewsTask + .job(MakePreviewArgs.StoreMode.WhenMissing, None) + .flatMap(queue.insertIfNew) } object JoexAppImpl { @@ -167,6 +171,20 @@ object JoexAppImpl { LearnClassifierTask.onCancel[F] ) ) + .withTask( + JobTask.json( + MakePreviewArgs.taskName, + MakePreviewTask[F](cfg.convert, cfg.extraction.preview), + MakePreviewTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + AllPreviewsArgs.taskName, + AllPreviewsTask[F](queue, joex), + AllPreviewsTask.onCancel[F] + ) + ) .resource psch <- PeriodicScheduler.create( cfg.periodicScheduler, diff --git a/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala b/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala new file mode 100644 index 00000000..70d87fdb --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/preview/AllPreviewsTask.scala @@ -0,0 +1,84 @@ +package docspell.joex.preview + +import cats.effect._ +import cats.implicits._ +import fs2.{Chunk, Stream} + +import docspell.backend.JobFactory +import docspell.backend.ops.OJoex +import docspell.common.MakePreviewArgs.StoreMode +import docspell.common._ +import docspell.joex.scheduler.Context +import docspell.joex.scheduler.Task +import docspell.store.queue.JobQueue +import docspell.store.records.RAttachment +import docspell.store.records.RJob + +object AllPreviewsTask { + + type Args = AllPreviewsArgs + + def apply[F[_]: Sync](queue: JobQueue[F], joex: OJoex[F]): Task[F, Args, Unit] = + Task { ctx => + for { + _ <- ctx.logger.info("Generating previews for attachments") + n <- submitConversionJobs(ctx, queue) + _ <- ctx.logger.info(s"Submitted $n jobs") + _ <- joex.notifyAllNodes + } yield () + } + + def onCancel[F[_]: Sync]: Task[F, Args, Unit] = + Task.log(_.warn("Cancelling all-previews task")) + + def submitConversionJobs[F[_]: Sync]( + ctx: Context[F, Args], + queue: JobQueue[F] + ): F[Int] = + ctx.store + .transact(findAttachments(ctx)) + .chunks + .flatMap(createJobs[F](ctx)) + .chunks + .evalMap(jobs => queue.insertAllIfNew(jobs.toVector).map(_ => jobs.size)) + .evalTap(n => ctx.logger.debug(s"Submitted $n jobs …")) + .compile + .foldMonoid + + private def findAttachments[F[_]](ctx: Context[F, Args]) = + ctx.args.storeMode match { + case StoreMode.Replace => + RAttachment.findAll(ctx.args.collective, 50) + case StoreMode.WhenMissing => + RAttachment.findWithoutPreview(ctx.args.collective, 50) + } + + private def createJobs[F[_]: Sync]( + ctx: Context[F, Args] + )(ras: Chunk[RAttachment]): Stream[F, RJob] = { + val collectiveOrSystem = ctx.args.collective.getOrElse(DocspellSystem.taskGroup) + + def mkJob(ra: RAttachment): F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + } yield RJob.newJob( + id, + MakePreviewArgs.taskName, + collectiveOrSystem, + MakePreviewArgs(ra.id, ctx.args.storeMode), + s"Create preview ${ra.id.id}/${ra.name.getOrElse("-")}", + now, + collectiveOrSystem, + Priority.Low, + Some(MakePreviewArgs.taskName / ra.id) + ) + + val jobs = ras.traverse(mkJob) + Stream.evalUnChunk(jobs) + } + + def job[F[_]: Sync](storeMode: MakePreviewArgs.StoreMode, cid: Option[Ident]): F[RJob] = + JobFactory.allPreviews(AllPreviewsArgs(cid, storeMode), None) + +} diff --git a/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala new file mode 100644 index 00000000..ba9671f5 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/preview/MakePreviewTask.scala @@ -0,0 +1,61 @@ +package docspell.joex.preview + +import cats.effect._ +import cats.implicits._ + +import docspell.common._ +import docspell.convert.ConvertConfig +import docspell.extract.pdfbox.PdfboxPreview +import docspell.extract.pdfbox.PreviewConfig +import docspell.joex.process.AttachmentPreview +import docspell.joex.scheduler.Context +import docspell.joex.scheduler.Task +import docspell.store.records.RAttachment +import docspell.store.records.RAttachmentPreview + +object MakePreviewTask { + + type Args = MakePreviewArgs + + def apply[F[_]: Sync](cfg: ConvertConfig, pcfg: PreviewConfig): Task[F, Args, Unit] = + Task { ctx => + for { + exists <- previewExists(ctx) + preview <- PdfboxPreview(pcfg) + _ <- + if (exists) + ctx.logger.info( + s"Preview already exists for attachment ${ctx.args.attachment}. Skipping." + ) + else + ctx.logger.info( + s"Generating preview image for attachment ${ctx.args.attachment}" + ) *> generatePreview(ctx, preview, cfg) + } yield () + } + + def onCancel[F[_]: Sync]: Task[F, Args, Unit] = + Task.log(_.warn("Cancelling make-preview task")) + + private def generatePreview[F[_]: Sync]( + ctx: Context[F, Args], + preview: PdfboxPreview[F], + cfg: ConvertConfig + ): F[Unit] = + for { + ra <- ctx.store.transact(RAttachment.findById(ctx.args.attachment)) + _ <- ra + .map(AttachmentPreview.createPreview(ctx, preview, cfg.chunkSize)) + .getOrElse( + ctx.logger.warn(s"No attachment found with id: ${ctx.args.attachment}") + ) + } yield () + + private def previewExists[F[_]: Sync](ctx: Context[F, Args]): F[Boolean] = + if (ctx.args.store == MakePreviewArgs.StoreMode.WhenMissing) + ctx.store.transact( + RAttachmentPreview.findById(ctx.args.attachment).map(_.isDefined) + ) + else + false.pure[F] +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala new file mode 100644 index 00000000..e42e67ab --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/process/AttachmentPreview.scala @@ -0,0 +1,106 @@ +package docspell.joex.process + +import cats.Functor +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ +import fs2.Stream + +import docspell.common._ +import docspell.convert._ +import docspell.extract.pdfbox.PdfboxPreview +import docspell.extract.pdfbox.PreviewConfig +import docspell.joex.scheduler._ +import docspell.store.queries.QAttachment +import docspell.store.records.RAttachment +import docspell.store.records._ +import docspell.store.syntax.MimeTypes._ + +import bitpeace.{Mimetype, MimetypeHint, RangeDef} + +/** Goes through all attachments that must be already converted into a + * pdf. If it is a pdf, the first page is converted into a small + * preview png image and linked to the attachment. + */ +object AttachmentPreview { + + def apply[F[_]: Sync: ContextShift](cfg: ConvertConfig, pcfg: PreviewConfig)( + item: ItemData + ): Task[F, ProcessItemArgs, ItemData] = + Task { ctx => + for { + _ <- ctx.logger.info( + s"Creating preview images for ${item.attachments.size} files…" + ) + preview <- PdfboxPreview(pcfg) + _ <- item.attachments + .traverse(createPreview(ctx, preview, cfg.chunkSize)) + .attempt + .flatMap { + case Right(_) => ().pure[F] + case Left(ex) => + ctx.logger.error(ex)( + s"Creating preview images failed, continuing without it." + ) + } + } yield item + } + + def createPreview[F[_]: Sync]( + ctx: Context[F, _], + preview: PdfboxPreview[F], + chunkSize: Int + )( + ra: RAttachment + ): F[Option[RAttachmentPreview]] = + findMime[F](ctx)(ra).flatMap { + case MimeType.PdfMatch(_) => + preview.previewPNG(loadFile(ctx)(ra)).flatMap { + case Some(out) => + createRecord(ctx, out, ra, chunkSize).map(_.some) + case None => + (None: Option[RAttachmentPreview]).pure[F] + } + + case _ => + (None: Option[RAttachmentPreview]).pure[F] + } + + private def createRecord[F[_]: Sync]( + ctx: Context[F, _], + png: Stream[F, Byte], + ra: RAttachment, + chunkSize: Int + ): F[RAttachmentPreview] = { + val name = ra.name + .map(FileName.apply) + .map(_.withPart("preview", '_').withExtension("png")) + for { + fileMeta <- ctx.store.bitpeace + .saveNew( + png, + chunkSize, + MimetypeHint(name.map(_.fullName), Some("image/png")) + ) + .compile + .lastOrError + now <- Timestamp.current[F] + rp = RAttachmentPreview(ra.id, Ident.unsafe(fileMeta.id), name.map(_.fullName), now) + _ <- QAttachment.deletePreview(ctx.store)(ra.id) + _ <- ctx.store.transact(RAttachmentPreview.insert(rp)) + } yield rp + } + + def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[MimeType] = + OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + .map(_.mimetype) + .getOrElse(Mimetype.`application/octet-stream`) + .map(_.toLocal) + + def loadFile[F[_]](ctx: Context[F, _])(ra: RAttachment): Stream[F, Byte] = + ctx.store.bitpeace + .get(ra.fileId.id) + .unNoneTerminate + .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala index 17cca3e0..65ff0dda 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ConvertPdf.scala @@ -135,7 +135,11 @@ object ConvertPdf { ) = { val hint = MimeTypeHint.advertised(MimeType.pdf).withName(ra.name.getOrElse("file.pdf")) - val newName = ra.name.map(n => s"$n.pdf") + val newName = + ra.name + .map(FileName.apply) + .map(_.withExtension("pdf").withPart(cfg.convertedFilenamePart, '.')) + .map(_.fullName) ctx.store.bitpeace .saveNew(pdf, cfg.chunkSize, MimetypeHint(hint.filename, hint.advertised)) .compile diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index fb777b24..8caf25fb 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -54,6 +54,7 @@ object ProcessItem { ConvertPdf(cfg.convert, item) .flatMap(Task.setProgress(progress._1)) .flatMap(TextExtraction(cfg.extraction, fts)) + .flatMap(AttachmentPreview(cfg.convert, cfg.extraction.preview)) .flatMap(Task.setProgress(progress._2)) .flatMap(analysisOnly[F](cfg, analyser, regexNer)) .flatMap(Task.setProgress(progress._3)) diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index cc929c4b..3ef90ff3 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -1069,6 +1069,29 @@ paths: schema: $ref: "#/components/schemas/BasicResult" + /sec/collective/previews: + post: + tags: [ Collective ] + summary: Starts the generate previews task + description: | + Submits a task that re-generates preview images of all + attachments of the current collective. Each existing preview + image will be replaced. + + This can be used after changing the `preview` settings. + + If only preview images of selected attachments should be + regenerated, see the `/sec/attachment/{id}/preview` endpoint. + security: + - authTokenHeader: [] + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + /sec/user: get: tags: [ Collective ] @@ -1847,6 +1870,47 @@ paths: application/json: schema: $ref: "#/components/schemas/ItemProposals" + /sec/item/{id}/preview: + head: + tags: [ Attachment ] + summary: Get a preview image of an attachment file. + description: | + Checks if an image file showing a preview of the item is + available. If not available, a 404 is returned. The preview is + currently the an image of the first page of the first + attachment. + security: + - authTokenHeader: [] + parameters: + - $ref: "#/components/parameters/id" + responses: + 200: + description: Ok + 404: + description: NotFound + get: + tags: [ Attachment ] + summary: Get a preview image of an attachment file. + description: | + Gets a image file showing a preview of the item. Usually it is + a small image of the first page of the first attachment. If + not available, a 404 is returned. However, if the query + parameter `withFallback` is `true`, a fallback preview image + is returned. You can also use the `HEAD` method to check for + existence. + security: + - authTokenHeader: [] + parameters: + - $ref: "#/components/parameters/id" + - $ref: "#/components/parameters/withFallback" + responses: + 200: + description: Ok + content: + application/octet-stream: + schema: + type: string + format: binary /sec/item/{itemId}/reprocess: post: @@ -2446,6 +2510,63 @@ paths: schema: type: string format: binary + /sec/attachment/{id}/preview: + head: + tags: [ Attachment ] + summary: Get a preview image of an attachment file. + description: | + Checks if an image file showing a preview of the attachment is + available. If not available, a 404 is returned. + security: + - authTokenHeader: [] + parameters: + - $ref: "#/components/parameters/id" + responses: + 200: + description: Ok + 404: + description: NotFound + get: + tags: [ Attachment ] + summary: Get a preview image of an attachment file. + description: | + Gets a image file showing a preview of the attachment. Usually + it is a small image of the first page of the document.If not + available, a 404 is returned. However, if the query parameter + `withFallback` is `true`, a fallback preview image is + returned. You can also use the `HEAD` method to check for + existence. + security: + - authTokenHeader: [] + parameters: + - $ref: "#/components/parameters/id" + - $ref: "#/components/parameters/withFallback" + responses: + 200: + description: Ok + content: + application/octet-stream: + schema: + type: string + format: binary + post: + tags: [ Attachment ] + summary: (Re)generate a preview image. + description: | + Submits a task that generates a preview image for this + attachment. The existing preview will be replaced. + security: + - authTokenHeader: [] + parameters: + - $ref: "#/components/parameters/id" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + /sec/attachment/{id}/meta: get: tags: [ Attachment ] @@ -4822,3 +4943,10 @@ components: One of the available contact kinds. schema: type: string + withFallback: + name: withFallback + in: query + description: Whether to provide a fallback or not. + required: false + schema: + type: boolean diff --git a/modules/restserver/src/main/resources/docspell/restserver/no-preview.svg b/modules/restserver/src/main/resources/docspell/restserver/no-preview.svg new file mode 100644 index 00000000..b7c093eb --- /dev/null +++ b/modules/restserver/src/main/resources/docspell/restserver/no-preview.svg @@ -0,0 +1,128 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="210mm" + height="297mm" + viewBox="0 0 210 297" + version="1.1" + id="svg8" + inkscape:version="0.92.4 (5da689c313, 2019-01-14)" + sodipodi:docname="no-preview.svg"> + <defs + id="defs2" /> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="0.7" + inkscape:cx="638.19656" + inkscape:cy="138.48596" + inkscape:document-units="mm" + inkscape:current-layer="layer1" + showgrid="false" + inkscape:window-width="1896" + inkscape:window-height="2101" + inkscape:window-x="3844" + inkscape:window-y="39" + inkscape:window-maximized="0" /> + <metadata + id="metadata5"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <rect + style="opacity:1;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:1.50112426;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="rect4518" + width="209.3988" + height="297.08929" + x="0.37797618" + y="0.28869045" /> + <text + xml:space="preserve" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:4.23650599px;line-height:2;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:center;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.26478162" + x="101.91397" + y="163.31726" + id="text4554" + transform="scale(0.99565662,1.0043623)"><tspan + sodipodi:role="line" + id="tspan4552" + x="101.91397" + y="163.31726" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:22.59469986px;line-height:2;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle;stroke-width:0.26478162">Preview</tspan><tspan + sodipodi:role="line" + x="101.91397" + y="208.50665" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:22.59469986px;line-height:2;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle;stroke-width:0.26478162" + id="tspan4556">not</tspan><tspan + sodipodi:role="line" + x="101.91397" + y="253.69606" + style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:22.59469986px;line-height:2;font-family:'DejaVu Sans';-inkscape-font-specification:'DejaVu Sans Bold';text-align:center;text-anchor:middle;stroke-width:0.26478162" + id="tspan4558">available</tspan></text> + <path + style="opacity:1;fill:#d7e3f4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.64033973;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="path4581" + sodipodi:type="arc" + sodipodi:cx="103.00598" + sodipodi:cy="78.360054" + sodipodi:rx="34.460411" + sodipodi:ry="34.761723" + sodipodi:start="0.34567468" + sodipodi:end="0.34365009" + sodipodi:open="true" + d="M 135.42796,90.138421 A 34.460411,34.761723 0 0 1 91.34612,111.07148 34.460411,34.761723 0 0 1 70.572202,66.6148 34.460411,34.761723 0 0 1 114.63301,45.636742 a 34.460411,34.761723 0 0 1 20.81852,44.43544" /> + <rect + style="opacity:1;fill:#000055;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.78938425;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="rect4583" + width="35.846756" + height="4.1953807" + x="84.785538" + y="90.746422" /> + <path + style="opacity:1;fill:#000055;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.78938425;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="path4585" + sodipodi:type="arc" + sodipodi:cx="117.95863" + sodipodi:cy="66.872711" + sodipodi:rx="5.8424263" + sodipodi:ry="5.8935103" + sodipodi:start="0.34567468" + sodipodi:end="0.34365009" + sodipodi:open="true" + d="m 123.45546,68.869618 a 5.8424263,5.8935103 0 0 1 -7.47364,3.548995 5.8424263,5.8935103 0 0 1 -3.52202,-7.537195 5.8424263,5.8935103 0 0 1 7.47008,-3.556625 5.8424263,5.8935103 0 0 1 3.52958,7.533595" /> + <path + style="opacity:1;fill:#000055;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.78938425;stroke-linecap:square;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" + id="path4585-8" + sodipodi:type="arc" + sodipodi:cx="87.558212" + sodipodi:cy="67.172394" + sodipodi:rx="5.8424263" + sodipodi:ry="5.8935103" + sodipodi:start="0.34567468" + sodipodi:end="0.34365009" + sodipodi:open="true" + d="m 93.055042,69.169301 a 5.8424263,5.8935103 0 0 1 -7.473645,3.548995 5.8424263,5.8935103 0 0 1 -3.522015,-7.537195 5.8424263,5.8935103 0 0 1 7.47008,-3.556625 5.8424263,5.8935103 0 0 1 3.529577,7.533595" /> + </g> +</svg> diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala index de4dfbfb..9dbba2b0 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala @@ -33,7 +33,7 @@ object RestServer { "/api/info" -> routes.InfoRoutes(), "/api/v1/open/" -> openRoutes(cfg, restApp), "/api/v1/sec/" -> Authenticate(restApp.backend.login, cfg.auth) { token => - securedRoutes(cfg, restApp, token) + securedRoutes(cfg, pools, restApp, token) }, "/api/doc" -> templates.doc, "/app/assets" -> WebjarRoutes.appRoutes[F](pools.blocker), @@ -57,8 +57,9 @@ object RestServer { ) }.drain - def securedRoutes[F[_]: Effect]( + def securedRoutes[F[_]: Effect: ContextShift]( cfg: Config, + pools: Pools, restApp: RestApp[F], token: AuthToken ): HttpRoutes[F] = @@ -72,9 +73,9 @@ object RestServer { "user" -> UserRoutes(restApp.backend, token), "collective" -> CollectiveRoutes(restApp.backend, token), "queue" -> JobQueueRoutes(restApp.backend, token), - "item" -> ItemRoutes(cfg, restApp.backend, token), + "item" -> ItemRoutes(cfg, pools.blocker, restApp.backend, token), "items" -> ItemMultiRoutes(restApp.backend, token), - "attachment" -> AttachmentRoutes(restApp.backend, token), + "attachment" -> AttachmentRoutes(pools.blocker, restApp.backend, token), "upload" -> UploadRoutes.secured(restApp.backend, cfg, token), "checkfile" -> CheckFileRoutes.secured(restApp.backend, token), "email/send" -> MailSendRoutes(restApp.backend, token), diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala new file mode 100644 index 00000000..152391b8 --- /dev/null +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/BinaryUtil.scala @@ -0,0 +1,67 @@ +package docspell.restserver.http4s + +import cats.data.NonEmptyList +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ + +import docspell.backend.ops._ + +import bitpeace.FileMeta +import org.http4s._ +import org.http4s.circe.CirceEntityEncoder._ +import org.http4s.dsl.Http4sDsl +import org.http4s.headers.ETag.EntityTag +import org.http4s.headers._ + +object BinaryUtil { + + def withResponseHeaders[F[_]: Sync](dsl: Http4sDsl[F], resp: F[Response[F]])( + data: OItemSearch.BinaryData[F] + ): F[Response[F]] = { + import dsl._ + + val mt = MediaType.unsafeParse(data.meta.mimetype.asString) + val ctype = `Content-Type`(mt) + val cntLen: Header = `Content-Length`.unsafeFromLong(data.meta.length) + val eTag: Header = ETag(data.meta.checksum) + val disp: Header = + `Content-Disposition`("inline", Map("filename" -> data.name.getOrElse(""))) + + resp.map(r => + if (r.status == NotModified) r.withHeaders(ctype, eTag, disp) + else r.withHeaders(ctype, cntLen, eTag, disp) + ) + } + + def makeByteResp[F[_]: Sync]( + dsl: Http4sDsl[F] + )(data: OItemSearch.BinaryData[F]): F[Response[F]] = { + import dsl._ + withResponseHeaders(dsl, Ok(data.data.take(data.meta.length)))(data) + } + + def matchETag[F[_]]( + fileData: Option[FileMeta], + noneMatch: Option[NonEmptyList[EntityTag]] + ): Boolean = + (fileData, noneMatch) match { + case (Some(meta), Some(nm)) => + meta.checksum == nm.head.tag + case _ => + false + } + + def noPreview[F[_]: Sync: ContextShift]( + blocker: Blocker, + req: Option[Request[F]] + ): OptionT[F, Response[F]] = + StaticFile.fromResource( + name = "/docspell/restserver/no-preview.svg", + blocker = blocker, + req = req, + preferGzipped = true, + classloader = getClass.getClassLoader().some + ) + +} diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/QueryParam.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/QueryParam.scala index b83296a1..4d91d959 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/http4s/QueryParam.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/QueryParam.scala @@ -29,4 +29,6 @@ object QueryParam { object ContactKindOpt extends OptionalQueryParamDecoderMatcher[ContactKind]("kind") object QueryOpt extends OptionalQueryParamDecoderMatcher[QueryString]("q") + + object WithFallback extends OptionalQueryParamDecoderMatcher[Boolean]("withFallback") } diff --git a/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala b/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala index 01bf9774..fbd300a3 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/http4s/Responses.scala @@ -1,5 +1,6 @@ package docspell.restserver.http4s +import cats.data.NonEmptyList import fs2.text.utf8Encode import fs2.{Pure, Stream} @@ -27,4 +28,12 @@ object Responses { def unauthorized[F[_]]: Response[F] = pureUnauthorized.copy(body = pureUnauthorized.body.covary[F]) + + def noCache[F[_]](r: Response[F]): Response[F] = + r.withHeaders( + `Cache-Control`( + NonEmptyList.of(CacheDirective.`no-cache`(), CacheDirective.`private`()) + ) + ) + } diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/AttachmentRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/AttachmentRoutes.scala index 91c574e7..34e09ba3 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/AttachmentRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/AttachmentRoutes.scala @@ -1,6 +1,5 @@ package docspell.restserver.routes -import cats.data.NonEmptyList import cats.effect._ import cats.implicits._ @@ -8,42 +7,36 @@ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.backend.ops._ import docspell.common.Ident +import docspell.common.MakePreviewArgs import docspell.restapi.model._ import docspell.restserver.conv.Conversions +import docspell.restserver.http4s.BinaryUtil +import docspell.restserver.http4s.{QueryParam => QP} import docspell.restserver.webapp.Webjars -import bitpeace.FileMeta import org.http4s._ import org.http4s.circe.CirceEntityDecoder._ import org.http4s.circe.CirceEntityEncoder._ import org.http4s.dsl.Http4sDsl -import org.http4s.headers.ETag.EntityTag import org.http4s.headers._ object AttachmentRoutes { - def apply[F[_]: Effect](backend: BackendApp[F], user: AuthToken): HttpRoutes[F] = { + def apply[F[_]: Effect: ContextShift]( + blocker: Blocker, + backend: BackendApp[F], + user: AuthToken + ): HttpRoutes[F] = { val dsl = new Http4sDsl[F] {} import dsl._ - def withResponseHeaders( - resp: F[Response[F]] - )(data: OItemSearch.BinaryData[F]): F[Response[F]] = { - val mt = MediaType.unsafeParse(data.meta.mimetype.asString) - val ctype = `Content-Type`(mt) - val cntLen: Header = `Content-Length`.unsafeFromLong(data.meta.length) - val eTag: Header = ETag(data.meta.checksum) - val disp: Header = - `Content-Disposition`("inline", Map("filename" -> data.name.getOrElse(""))) - - resp.map(r => - if (r.status == NotModified) r.withHeaders(ctype, eTag, disp) - else r.withHeaders(ctype, cntLen, eTag, disp) - ) - } + def withResponseHeaders(resp: F[Response[F]])( + data: OItemSearch.BinaryData[F] + ): F[Response[F]] = + BinaryUtil.withResponseHeaders[F](dsl, resp)(data) def makeByteResp(data: OItemSearch.BinaryData[F]): F[Response[F]] = - withResponseHeaders(Ok(data.data.take(data.meta.length)))(data) + BinaryUtil.makeByteResp(dsl)(data) HttpRoutes.of { case HEAD -> Root / Ident(id) => @@ -59,7 +52,7 @@ object AttachmentRoutes { for { fileData <- backend.itemSearch.findAttachment(id, user.account.collective) inm = req.headers.get(`If-None-Match`).flatMap(_.tags) - matches = matchETag(fileData.map(_.meta), inm) + matches = BinaryUtil.matchETag(fileData.map(_.meta), inm) resp <- fileData .map { data => @@ -82,7 +75,7 @@ object AttachmentRoutes { for { fileData <- backend.itemSearch.findAttachmentSource(id, user.account.collective) inm = req.headers.get(`If-None-Match`).flatMap(_.tags) - matches = matchETag(fileData.map(_.meta), inm) + matches = BinaryUtil.matchETag(fileData.map(_.meta), inm) resp <- fileData .map { data => @@ -107,7 +100,7 @@ object AttachmentRoutes { fileData <- backend.itemSearch.findAttachmentArchive(id, user.account.collective) inm = req.headers.get(`If-None-Match`).flatMap(_.tags) - matches = matchETag(fileData.map(_.meta), inm) + matches = BinaryUtil.matchETag(fileData.map(_.meta), inm) resp <- fileData .map { data => @@ -117,6 +110,49 @@ object AttachmentRoutes { .getOrElse(NotFound(BasicResult(false, "Not found"))) } yield resp + case req @ GET -> Root / Ident(id) / "preview" :? QP.WithFallback(flag) => + def notFound = + NotFound(BasicResult(false, "Not found")) + for { + fileData <- + backend.itemSearch.findAttachmentPreview(id, user.account.collective) + inm = req.headers.get(`If-None-Match`).flatMap(_.tags) + matches = BinaryUtil.matchETag(fileData.map(_.meta), inm) + fallback = flag.getOrElse(false) + resp <- + fileData + .map { data => + if (matches) withResponseHeaders(NotModified())(data) + else makeByteResp(data) + } + .getOrElse( + if (fallback) BinaryUtil.noPreview(blocker, req.some).getOrElseF(notFound) + else notFound + ) + } yield resp + + case HEAD -> Root / Ident(id) / "preview" => + for { + fileData <- + backend.itemSearch.findAttachmentPreview(id, user.account.collective) + resp <- + fileData + .map(data => withResponseHeaders(Ok())(data)) + .getOrElse(NotFound(BasicResult(false, "Not found"))) + } yield resp + + case POST -> Root / Ident(id) / "preview" => + for { + res <- backend.item.generatePreview( + MakePreviewArgs.replace(id), + user.account, + true + ) + resp <- Ok( + Conversions.basicResult(res, "Generating preview image task submitted.") + ) + } yield resp + case GET -> Root / Ident(id) / "view" => // this route exists to provide a stable url // it redirects currently to viewerjs @@ -148,16 +184,4 @@ object AttachmentRoutes { } yield resp } } - - private def matchETag[F[_]]( - fileData: Option[FileMeta], - noneMatch: Option[NonEmptyList[EntityTag]] - ): Boolean = - (fileData, noneMatch) match { - case (Some(meta), Some(nm)) => - meta.checksum == nm.head.tag - case _ => - false - } - } diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala index bf7eaddd..7ecd1e90 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala @@ -6,6 +6,7 @@ import cats.implicits._ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.backend.ops.OCollective +import docspell.common.MakePreviewArgs import docspell.restapi.model._ import docspell.restserver.conv.Conversions import docspell.restserver.http4s._ @@ -94,6 +95,18 @@ object CollectiveRoutes { resp <- Ok(BasicResult(true, "Task submitted")) } yield resp + case POST -> Root / "previews" => + for { + res <- backend.collective.generatePreviews( + MakePreviewArgs.StoreMode.Replace, + user.account, + true + ) + resp <- Ok( + Conversions.basicResult(res, "Generate all previews task submitted.") + ) + } yield resp + case GET -> Root => for { collDb <- backend.collective.find(user.account.collective) diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala index 1966a6f1..ba0c8c08 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala @@ -13,18 +13,23 @@ import docspell.common.{Ident, ItemState} import docspell.restapi.model._ import docspell.restserver.Config import docspell.restserver.conv.Conversions +import docspell.restserver.http4s.BinaryUtil +import docspell.restserver.http4s.Responses +import docspell.restserver.http4s.{QueryParam => QP} import org.http4s.HttpRoutes import org.http4s.circe.CirceEntityDecoder._ import org.http4s.circe.CirceEntityEncoder._ import org.http4s.dsl.Http4sDsl +import org.http4s.headers._ import org.log4s._ object ItemRoutes { private[this] val logger = getLogger - def apply[F[_]: Effect]( + def apply[F[_]: Effect: ContextShift]( cfg: Config, + blocker: Blocker, backend: BackendApp[F], user: AuthToken ): HttpRoutes[F] = { @@ -315,6 +320,35 @@ object ItemRoutes { resp <- Ok(Conversions.basicResult(res, "Attachment moved.")) } yield resp + case req @ GET -> Root / Ident(id) / "preview" :? QP.WithFallback(flag) => + def notFound = + NotFound(BasicResult(false, "Not found")) + for { + preview <- backend.itemSearch.findItemPreview(id, user.account.collective) + inm = req.headers.get(`If-None-Match`).flatMap(_.tags) + matches = BinaryUtil.matchETag(preview.map(_.meta), inm) + fallback = flag.getOrElse(false) + resp <- + preview + .map { data => + if (matches) BinaryUtil.withResponseHeaders(dsl, NotModified())(data) + else BinaryUtil.makeByteResp(dsl)(data).map(Responses.noCache) + } + .getOrElse( + if (fallback) BinaryUtil.noPreview(blocker, req.some).getOrElseF(notFound) + else notFound + ) + } yield resp + + case HEAD -> Root / Ident(id) / "preview" => + for { + preview <- backend.itemSearch.findItemPreview(id, user.account.collective) + resp <- + preview + .map(data => BinaryUtil.withResponseHeaders(dsl, Ok())(data)) + .getOrElse(NotFound(BasicResult(false, "Not found"))) + } yield resp + case req @ POST -> Root / Ident(id) / "reprocess" => for { data <- req.as[IdList] diff --git a/modules/store/src/main/resources/db/migration/h2/V1.10.0__attachment_preview.sql b/modules/store/src/main/resources/db/migration/h2/V1.10.0__attachment_preview.sql new file mode 100644 index 00000000..c6b36cbe --- /dev/null +++ b/modules/store/src/main/resources/db/migration/h2/V1.10.0__attachment_preview.sql @@ -0,0 +1,8 @@ +CREATE TABLE "attachment_preview" ( + "id" varchar(254) not null primary key, + "file_id" varchar(254) not null, + "filename" varchar(254), + "created" timestamp not null, + foreign key ("file_id") references "filemeta"("id"), + foreign key ("id") references "attachment"("attachid") +); diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.10.0__attachment_preview.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.10.0__attachment_preview.sql new file mode 100644 index 00000000..026a0c8c --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.10.0__attachment_preview.sql @@ -0,0 +1,8 @@ +CREATE TABLE `attachment_preview` ( + `id` varchar(254) not null primary key, + `file_id` varchar(254) not null, + `filename` varchar(254), + `created` timestamp not null, + foreign key (`file_id`) references `filemeta`(`id`), + foreign key (`id`) references `attachment`(`attachid`) +); diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.10.0__attachment_preview.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.10.0__attachment_preview.sql new file mode 100644 index 00000000..c6b36cbe --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.10.0__attachment_preview.sql @@ -0,0 +1,8 @@ +CREATE TABLE "attachment_preview" ( + "id" varchar(254) not null primary key, + "file_id" varchar(254) not null, + "filename" varchar(254), + "created" timestamp not null, + foreign key ("file_id") references "filemeta"("id"), + foreign key ("id") references "attachment"("attachid") +); diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index 0371ff79..86ae26f4 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -17,6 +17,22 @@ import doobie.implicits._ object QAttachment { private[this] val logger = org.log4s.getLogger + def deletePreview[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = { + val findPreview = + for { + rp <- RAttachmentPreview.findById(attachId) + } yield rp.toSeq + + Stream + .evalSeq(store.transact(findPreview)) + .map(_.fileId.id) + .evalTap(_ => store.transact(RAttachmentPreview.delete(attachId))) + .flatMap(store.bitpeace.delete) + .map(flag => if (flag) 1 else 0) + .compile + .foldMonoid + } + /** Deletes an attachment, its related source and meta data records. * It will only delete an related archive file, if this is the last * attachment in that archive. @@ -27,18 +43,19 @@ object QAttachment { val loadFiles = for { ra <- RAttachment.findByIdAndCollective(attachId, coll).map(_.map(_.fileId)) rs <- RAttachmentSource.findByIdAndCollective(attachId, coll).map(_.map(_.fileId)) + rp <- RAttachmentPreview.findByIdAndCollective(attachId, coll).map(_.map(_.fileId)) ne <- RAttachmentArchive.countEntries(attachId) - } yield (ra, rs, ne) + } yield (ra.toSeq ++ rs.toSeq ++ rp.toSeq, ne) for { files <- store.transact(loadFiles) k <- - if (files._3 == 1) deleteArchive(store)(attachId) + if (files._2 == 1) deleteArchive(store)(attachId) else store.transact(RAttachmentArchive.delete(attachId)) n <- store.transact(RAttachment.delete(attachId)) f <- Stream - .emits(files._1.toSeq ++ files._2.toSeq) + .emits(files._1) .map(_.id) .flatMap(store.bitpeace.delete) .map(flag => if (flag) 1 else 0) @@ -55,13 +72,14 @@ object QAttachment { for { _ <- logger.fdebug[F](s"Deleting attachment: ${ra.id.id}") s <- store.transact(RAttachmentSource.findById(ra.id)) + p <- store.transact(RAttachmentPreview.findById(ra.id)) n <- store.transact(RAttachment.delete(ra.id)) _ <- logger.fdebug[F]( - s"Deleted $n meta records (source, meta, archive). Deleting binaries now." + s"Deleted $n meta records (source, meta, preview, archive). Deleting binaries now." ) f <- Stream - .emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq)) + .emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq ++ p.map(_.fileId.id).toSeq)) .flatMap(store.bitpeace.delete) .map(flag => if (flag) 1 else 0) .compile diff --git a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala index dbdcb9e4..a1d162af 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala @@ -61,6 +61,9 @@ object QCollective { select a.file_id,m.length from attachment_source a inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) union distinct + select p.file_id,m.length from attachment_preview p + inner join filemeta m on m.id = p.file_id where p.id in (select aid from attachs) + union distinct select a.file_id,m.length from attachment_archive a inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) ) as t""".query[Option[Long]].unique diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala index 334ac711..fa1453b6 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala @@ -224,12 +224,69 @@ object RAttachment { for { n0 <- RAttachmentMeta.delete(attachId) n1 <- RAttachmentSource.delete(attachId) - n2 <- deleteFrom(table, id.is(attachId)).update.run - } yield n0 + n1 + n2 + n2 <- RAttachmentPreview.delete(attachId) + n3 <- deleteFrom(table, id.is(attachId)).update.run + } yield n0 + n1 + n2 + n3 def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] = selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option + def findAll( + coll: Option[Ident], + chunkSize: Int + ): Stream[ConnectionIO, RAttachment] = { + val aItem = Columns.itemId.prefix("a") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val cols = all.map(_.prefix("a")) + + coll match { + case Some(cid) => + val join = table ++ fr"a INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) + val cond = iColl.is(cid) + selectSimple(cols, join, cond) + .query[RAttachment] + .streamWithChunkSize(chunkSize) + case None => + selectSimple(cols, table, Fragment.empty) + .query[RAttachment] + .streamWithChunkSize(chunkSize) + } + } + + def findWithoutPreview( + coll: Option[Ident], + chunkSize: Int + ): Stream[ConnectionIO, RAttachment] = { + val aId = Columns.id.prefix("a") + val aItem = Columns.itemId.prefix("a") + val pId = RAttachmentPreview.Columns.id.prefix("p") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val cols = all.map(_.prefix("a")) + val baseJoin = + table ++ fr"a LEFT OUTER JOIN" ++ + RAttachmentPreview.table ++ fr"p ON" ++ pId.is(aId) + + val baseCond = + Seq(pId.isNull) + + coll match { + case Some(cid) => + val join = baseJoin ++ fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) + val cond = and(baseCond ++ Seq(iColl.is(cid))) + selectSimple(cols, join, cond) + .query[RAttachment] + .streamWithChunkSize(chunkSize) + case None => + selectSimple(cols, baseJoin, and(baseCond)) + .query[RAttachment] + .streamWithChunkSize(chunkSize) + } + } + def findNonConvertedPdf( coll: Option[Ident], chunkSize: Int diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala new file mode 100644 index 00000000..c28169b7 --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentPreview.scala @@ -0,0 +1,123 @@ +package docspell.store.records + +import docspell.common._ +import docspell.store.impl.Implicits._ +import docspell.store.impl._ + +import bitpeace.FileMeta +import doobie._ +import doobie.implicits._ + +/** A preview image of an attachment. The `id` is shared with the + * attachment, to create a 1-1 (or 0..1-1) relationship. + */ +case class RAttachmentPreview( + id: Ident, //same as RAttachment.id + fileId: Ident, + name: Option[String], + created: Timestamp +) + +object RAttachmentPreview { + + val table = fr"attachment_preview" + + object Columns { + val id = Column("id") + val fileId = Column("file_id") + val name = Column("filename") + val created = Column("created") + + val all = List(id, fileId, name, created) + } + + import Columns._ + + def insert(v: RAttachmentPreview): ConnectionIO[Int] = + insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run + + def findById(attachId: Ident): ConnectionIO[Option[RAttachmentPreview]] = + selectSimple(all, table, id.is(attachId)).query[RAttachmentPreview].option + + def delete(attachId: Ident): ConnectionIO[Int] = + deleteFrom(table, id.is(attachId)).update.run + + def findByIdAndCollective( + attachId: Ident, + collective: Ident + ): ConnectionIO[Option[RAttachmentPreview]] = { + val bId = RAttachment.Columns.id.prefix("b") + val aId = Columns.id.prefix("a") + val bItem = RAttachment.Columns.itemId.prefix("b") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val from = table ++ fr"a INNER JOIN" ++ + RAttachment.table ++ fr"b ON" ++ aId.is(bId) ++ + fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ bItem.is(iId) + + val where = and(aId.is(attachId), bId.is(attachId), iColl.is(collective)) + + selectSimple(all.map(_.prefix("a")), from, where).query[RAttachmentPreview].option + } + + def findByItem(itemId: Ident): ConnectionIO[Vector[RAttachmentPreview]] = { + val sId = Columns.id.prefix("s") + val aId = RAttachment.Columns.id.prefix("a") + val aItem = RAttachment.Columns.itemId.prefix("a") + + val from = table ++ fr"s INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ sId.is(aId) + selectSimple(all.map(_.prefix("s")), from, aItem.is(itemId)) + .query[RAttachmentPreview] + .to[Vector] + } + + def findByItemAndCollective( + itemId: Ident, + coll: Ident + ): ConnectionIO[Option[RAttachmentPreview]] = { + val sId = Columns.id.prefix("s") + val aId = RAttachment.Columns.id.prefix("a") + val aItem = RAttachment.Columns.itemId.prefix("a") + val aPos = RAttachment.Columns.position.prefix("a") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val from = + table ++ fr"s INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ sId.is(aId) ++ + fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) + + selectSimple( + all.map(_.prefix("s")) ++ List(aPos), + from, + and(aItem.is(itemId), iColl.is(coll)) + ) + .query[(RAttachmentPreview, Int)] + .to[Vector] + .map(_.sortBy(_._2).headOption.map(_._1)) + } + + def findByItemWithMeta( + id: Ident + ): ConnectionIO[Vector[(RAttachmentPreview, FileMeta)]] = { + import bitpeace.sql._ + + val aId = Columns.id.prefix("a") + val afileMeta = fileId.prefix("a") + val bPos = RAttachment.Columns.position.prefix("b") + val bId = RAttachment.Columns.id.prefix("b") + val bItem = RAttachment.Columns.itemId.prefix("b") + val mId = RFileMeta.Columns.id.prefix("m") + + val cols = all.map(_.prefix("a")) ++ RFileMeta.Columns.all.map(_.prefix("m")) + val from = table ++ fr"a INNER JOIN" ++ + RFileMeta.table ++ fr"m ON" ++ afileMeta.is(mId) ++ fr"INNER JOIN" ++ + RAttachment.table ++ fr"b ON" ++ aId.is(bId) + val where = bItem.is(id) + + (selectSimple(cols, from, where) ++ orderBy(bPos.asc)) + .query[(RAttachmentPreview, FileMeta)] + .to[Vector] + } + +} diff --git a/modules/webapp/src/main/elm/Api.elm b/modules/webapp/src/main/elm/Api.elm index 6ce34ec1..7230be7e 100644 --- a/modules/webapp/src/main/elm/Api.elm +++ b/modules/webapp/src/main/elm/Api.elm @@ -30,6 +30,7 @@ module Api exposing , deleteSource , deleteTag , deleteUser + , fileURL , getAttachmentMeta , getCollective , getCollectiveSettings @@ -59,6 +60,7 @@ module Api exposing , getUsers , itemDetail , itemIndexSearch + , itemPreviewURL , itemSearch , login , loginSession @@ -1501,6 +1503,16 @@ deleteAllItems flags ids receive = --- Item +itemPreviewURL : String -> String +itemPreviewURL itemId = + "/api/v1/sec/item/" ++ itemId ++ "/preview?withFallback=true" + + +fileURL : String -> String +fileURL attachId = + "/api/v1/sec/attachment/" ++ attachId + + setAttachmentName : Flags -> String diff --git a/modules/webapp/src/main/elm/Comp/BasicSizeField.elm b/modules/webapp/src/main/elm/Comp/BasicSizeField.elm new file mode 100644 index 00000000..1ec4f048 --- /dev/null +++ b/modules/webapp/src/main/elm/Comp/BasicSizeField.elm @@ -0,0 +1,40 @@ +module Comp.BasicSizeField exposing (Msg, update, view) + +import Data.BasicSize exposing (BasicSize) +import Html exposing (..) +import Html.Attributes exposing (..) +import Html.Events exposing (onCheck) + + +type Msg + = Toggle BasicSize + + +update : Msg -> Maybe BasicSize +update msg = + case msg of + Toggle bs -> + Just bs + + +view : String -> BasicSize -> Html Msg +view labelTxt current = + div [ class "grouped fields" ] + (label [] [ text labelTxt ] + :: List.map (makeField current) Data.BasicSize.all + ) + + +makeField : BasicSize -> BasicSize -> Html Msg +makeField current element = + div [ class "field" ] + [ div [ class "ui radio checkbox" ] + [ input + [ type_ "radio" + , checked (current == element) + , onCheck (\_ -> Toggle element) + ] + [] + , label [] [ text (Data.BasicSize.label element) ] + ] + ] diff --git a/modules/webapp/src/main/elm/Comp/ItemCardList.elm b/modules/webapp/src/main/elm/Comp/ItemCardList.elm index b2b110ec..b9595b67 100644 --- a/modules/webapp/src/main/elm/Comp/ItemCardList.elm +++ b/modules/webapp/src/main/elm/Comp/ItemCardList.elm @@ -10,6 +10,7 @@ module Comp.ItemCardList exposing , view ) +import Api import Api.Model.HighlightEntry exposing (HighlightEntry) import Api.Model.ItemLight exposing (ItemLight) import Api.Model.ItemLightGroup exposing (ItemLightGroup) @@ -230,7 +231,19 @@ viewItem cfg settings item = ] ++ DD.draggable ItemDDMsg item.id ) - [ div [ class "content" ] + [ if fieldHidden Data.Fields.PreviewImage then + span [ class "invisible" ] [] + + else + div [ class "image" ] + [ img + [ class "preview-image" + , src (Api.itemPreviewURL item.id) + , Data.UiSettings.cardPreviewSize settings + ] + [] + ] + , div [ class "content" ] [ case cfg.selection of Data.ItemSelection.Active ids -> div [ class "header" ] diff --git a/modules/webapp/src/main/elm/Comp/ItemDetail/Update.elm b/modules/webapp/src/main/elm/Comp/ItemDetail/Update.elm index bda2e73b..578a11ef 100644 --- a/modules/webapp/src/main/elm/Comp/ItemDetail/Update.elm +++ b/modules/webapp/src/main/elm/Comp/ItemDetail/Update.elm @@ -1464,6 +1464,9 @@ resetField flags item tagger field = Data.Fields.Direction -> Cmd.none + Data.Fields.PreviewImage -> + Cmd.none + resetHiddenFields : UiSettings diff --git a/modules/webapp/src/main/elm/Comp/ItemDetail/View.elm b/modules/webapp/src/main/elm/Comp/ItemDetail/View.elm index dd154d23..04c00406 100644 --- a/modules/webapp/src/main/elm/Comp/ItemDetail/View.elm +++ b/modules/webapp/src/main/elm/Comp/ItemDetail/View.elm @@ -1,5 +1,6 @@ module Comp.ItemDetail.View exposing (view) +import Api import Api.Model.Attachment exposing (Attachment) import Comp.AttachmentMeta import Comp.DatePicker @@ -320,7 +321,7 @@ renderAttachmentView : UiSettings -> Model -> Int -> Attachment -> Html Msg renderAttachmentView settings model pos attach = let fileUrl = - "/api/v1/sec/attachment/" ++ attach.id + Api.fileURL attach.id attachName = Maybe.withDefault "No name" attach.name diff --git a/modules/webapp/src/main/elm/Comp/UiSettingsForm.elm b/modules/webapp/src/main/elm/Comp/UiSettingsForm.elm index 4bd955af..940de6e6 100644 --- a/modules/webapp/src/main/elm/Comp/UiSettingsForm.elm +++ b/modules/webapp/src/main/elm/Comp/UiSettingsForm.elm @@ -8,9 +8,11 @@ module Comp.UiSettingsForm exposing import Api import Api.Model.TagList exposing (TagList) +import Comp.BasicSizeField import Comp.ColorTagger import Comp.FieldListSelect import Comp.IntField +import Data.BasicSize exposing (BasicSize) import Data.Color exposing (Color) import Data.Fields exposing (Field) import Data.Flags exposing (Flags) @@ -42,6 +44,7 @@ type alias Model = , itemDetailShortcuts : Bool , searchMenuVisible : Bool , editMenuVisible : Bool + , cardPreviewSize : BasicSize } @@ -93,6 +96,7 @@ init flags settings = , itemDetailShortcuts = settings.itemDetailShortcuts , searchMenuVisible = settings.searchMenuVisible , editMenuVisible = settings.editMenuVisible + , cardPreviewSize = settings.cardPreviewSize } , Api.getTags flags "" GetTagsResp ) @@ -112,6 +116,7 @@ type Msg | ToggleItemDetailShortcuts | ToggleSearchMenuVisible | ToggleEditMenuVisible + | CardPreviewSizeMsg Comp.BasicSizeField.Msg @@ -297,6 +302,23 @@ update sett msg model = , Just { sett | editMenuVisible = flag } ) + CardPreviewSizeMsg lm -> + let + next = + Comp.BasicSizeField.update lm + |> Maybe.withDefault model.cardPreviewSize + + newSettings = + if next /= model.cardPreviewSize then + Just { sett | cardPreviewSize = next } + + else + Nothing + in + ( { model | cardPreviewSize = next } + , newSettings + ) + --- View @@ -329,6 +351,9 @@ view flags _ model = "field" model.searchPageSizeModel ) + , div [ class "ui dividing header" ] + [ text "Item Cards" + ] , Html.map NoteLengthMsg (Comp.IntField.viewWithInfo ("Maximum size of the item notes to display in card view. Between 0 - " @@ -339,6 +364,11 @@ view flags _ model = "field" model.searchNoteLengthModel ) + , Html.map CardPreviewSizeMsg + (Comp.BasicSizeField.view + "Size of item preview" + model.cardPreviewSize + ) , div [ class "ui dividing header" ] [ text "Search Menu" ] , div [ class "field" ] diff --git a/modules/webapp/src/main/elm/Data/BasicSize.elm b/modules/webapp/src/main/elm/Data/BasicSize.elm new file mode 100644 index 00000000..decae6b7 --- /dev/null +++ b/modules/webapp/src/main/elm/Data/BasicSize.elm @@ -0,0 +1,55 @@ +module Data.BasicSize exposing + ( BasicSize(..) + , all + , asString + , fromString + , label + ) + + +type BasicSize + = Small + | Medium + | Large + + +all : List BasicSize +all = + [ Small + , Medium + , Large + ] + + +fromString : String -> Maybe BasicSize +fromString str = + case String.toLower str of + "small" -> + Just Small + + "medium" -> + Just Medium + + "large" -> + Just Large + + _ -> + Nothing + + +asString : BasicSize -> String +asString size = + label size |> String.toLower + + +label : BasicSize -> String +label size = + case size of + Small -> + "Small" + + Medium -> + "Medium" + + Large -> + "Large" diff --git a/modules/webapp/src/main/elm/Data/Fields.elm b/modules/webapp/src/main/elm/Data/Fields.elm index 3412015a..4a0244d2 100644 --- a/modules/webapp/src/main/elm/Data/Fields.elm +++ b/modules/webapp/src/main/elm/Data/Fields.elm @@ -19,6 +19,7 @@ type Field | Date | DueDate | Direction + | PreviewImage all : List Field @@ -33,6 +34,7 @@ all = , Date , DueDate , Direction + , PreviewImage ] @@ -71,6 +73,9 @@ fromString str = "direction" -> Just Direction + "preview" -> + Just PreviewImage + _ -> Nothing @@ -105,6 +110,9 @@ toString field = Direction -> "direction" + PreviewImage -> + "preview" + label : Field -> String label field = @@ -136,6 +144,9 @@ label field = Direction -> "Direction" + PreviewImage -> + "Preview Image" + fromList : List String -> List Field fromList strings = diff --git a/modules/webapp/src/main/elm/Data/UiSettings.elm b/modules/webapp/src/main/elm/Data/UiSettings.elm index 38263ddf..8d955549 100644 --- a/modules/webapp/src/main/elm/Data/UiSettings.elm +++ b/modules/webapp/src/main/elm/Data/UiSettings.elm @@ -2,6 +2,7 @@ module Data.UiSettings exposing ( Pos(..) , StoredUiSettings , UiSettings + , cardPreviewSize , catColor , catColorString , defaults @@ -17,9 +18,12 @@ module Data.UiSettings exposing ) import Api.Model.Tag exposing (Tag) +import Data.BasicSize exposing (BasicSize) import Data.Color exposing (Color) import Data.Fields exposing (Field) import Dict exposing (Dict) +import Html exposing (Attribute) +import Html.Attributes as HA {-| Settings for the web ui. All fields should be optional, since it @@ -43,6 +47,7 @@ type alias StoredUiSettings = , itemDetailShortcuts : Bool , searchMenuVisible : Bool , editMenuVisible : Bool + , cardPreviewSize : Maybe String } @@ -66,6 +71,7 @@ type alias UiSettings = , itemDetailShortcuts : Bool , searchMenuVisible : Bool , editMenuVisible : Bool + , cardPreviewSize : BasicSize } @@ -111,6 +117,7 @@ defaults = , itemDetailShortcuts = False , searchMenuVisible = False , editMenuVisible = False + , cardPreviewSize = Data.BasicSize.Medium } @@ -146,6 +153,10 @@ merge given fallback = , itemDetailShortcuts = given.itemDetailShortcuts , searchMenuVisible = given.searchMenuVisible , editMenuVisible = given.editMenuVisible + , cardPreviewSize = + given.cardPreviewSize + |> Maybe.andThen Data.BasicSize.fromString + |> Maybe.withDefault fallback.cardPreviewSize } @@ -172,6 +183,10 @@ toStoredUiSettings settings = , itemDetailShortcuts = settings.itemDetailShortcuts , searchMenuVisible = settings.searchMenuVisible , editMenuVisible = settings.editMenuVisible + , cardPreviewSize = + settings.cardPreviewSize + |> Data.BasicSize.asString + |> Just } @@ -209,6 +224,19 @@ fieldHidden settings field = fieldVisible settings field |> not +cardPreviewSize : UiSettings -> Attribute msg +cardPreviewSize settings = + case settings.cardPreviewSize of + Data.BasicSize.Small -> + HA.style "max-width" "80px" + + Data.BasicSize.Medium -> + HA.style "max-width" "160px" + + Data.BasicSize.Large -> + HA.style "max-width" "none" + + --- Helpers diff --git a/modules/webapp/src/main/webjar/docspell.css b/modules/webapp/src/main/webjar/docspell.css index c026e157..04ec1eff 100644 --- a/modules/webapp/src/main/webjar/docspell.css +++ b/modules/webapp/src/main/webjar/docspell.css @@ -93,6 +93,11 @@ padding: 0.8em; } +.default-layout img.preview-image { + margin-left: auto; + margin-right: auto; +} + .default-layout .menu .item.active a.right-tab-icon-link { position: relative; right: -8px; diff --git a/tools/preview/regenerate-previews.sh b/tools/preview/regenerate-previews.sh new file mode 100755 index 00000000..b439a8e0 --- /dev/null +++ b/tools/preview/regenerate-previews.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# This script submits a job to regenerate all preview images. This may +# be necessary if you change the dpi setting that affects the size of +# the preview. + +set -e + +BASE_URL="${1:-http://localhost:7880}" +LOGIN_URL="$BASE_URL/api/v1/open/auth/login" +TRIGGER_URL="$BASE_URL/api/v1/sec/collective/previews" + +echo "Login to trigger regenerating preview images." +echo "Using url: $BASE_URL" +echo -n "Account: " +read USER +echo -n "Password: " +read -s PASS +echo + +auth=$(curl --fail -XPOST --silent --data-binary "{\"account\":\"$USER\", \"password\":\"$PASS\"}" "$LOGIN_URL") + +if [ "$(echo $auth | jq .success)" == "true" ]; then + echo "Login successful" + auth_token=$(echo $auth | jq -r .token) + curl --fail -XPOST -H "X-Docspell-Auth: $auth_token" "$TRIGGER_URL" +else + echo "Login failed." +fi