mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Add first drafts for converting
This commit is contained in:
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
import docspell.common._
|
||||
|
||||
object ExternConv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift](
|
||||
name: String,
|
||||
cmdCfg: SystemCommand.Config,
|
||||
wd: Path,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cmdCfg.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
||||
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
||||
.flatMap(result =>
|
||||
logResult(name, result, logger) ++ readResult[F](
|
||||
out,
|
||||
result,
|
||||
blocker,
|
||||
chunkSize,
|
||||
logger
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
def toPDFviaFile[F[_]: Sync: ContextShift](
|
||||
name: String,
|
||||
cmdCfg: SystemCommand.Config,
|
||||
wd: Path,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
val inFile = dir.resolve("infile")
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cmdCfg.mapArgs(
|
||||
_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString)
|
||||
.replace("{{infile}}", inFile.toAbsolutePath.normalize.toString)
|
||||
)
|
||||
|
||||
(Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
|
||||
Stream.eval(storeFile(in, inFile, blocker))).flatMap { _ =>
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir))
|
||||
.flatMap(result =>
|
||||
logResult(name, result, logger) ++ readResult[F](
|
||||
out,
|
||||
result,
|
||||
blocker,
|
||||
chunkSize,
|
||||
logger
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def readResult[F[_]: Sync: ContextShift](
|
||||
out: Path,
|
||||
result: SystemCommand.Result,
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Byte] =
|
||||
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
||||
case true =>
|
||||
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
||||
else
|
||||
Stream
|
||||
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
||||
.drain ++
|
||||
File.readAll(out, blocker, chunkSize)
|
||||
|
||||
case false =>
|
||||
Stream.raiseError[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
)
|
||||
}
|
||||
|
||||
private def logResult[F[_]: Sync](
|
||||
name: String,
|
||||
result: SystemCommand.Result,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Nothing] =
|
||||
Stream.eval(logger.debug(s"$name stdout: ${result.stdout}")).drain ++
|
||||
Stream.eval(logger.debug(s"$name stderr: ${result.stderr}")).drain
|
||||
|
||||
private def storeFile[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
target: Path,
|
||||
blocker: Blocker
|
||||
): F[Unit] =
|
||||
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
|
||||
}
|
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Pipe
|
||||
import docspell.common._
|
||||
|
||||
object Unoconv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift](
|
||||
cfg: UnoconvConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
): Pipe[F, Byte, Byte] =
|
||||
ExternConv.toPDFviaFile[F]("unoconv", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||
|
||||
|
||||
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)
|
@ -1,9 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
import fs2.Pipe
|
||||
import docspell.common._
|
||||
|
||||
object WkHtmlPdf {
|
||||
@ -12,39 +10,9 @@ object WkHtmlPdf {
|
||||
cfg: WkHtmlPdfConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
logger: Logger[F],
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](cfg.workingDir, "docspell-wkhtmltopdf")).flatMap { dir =>
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cfg.cmd.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
||||
ExternConv.toPDF[F]("wkhtmltopdf", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||
|
||||
Stream.eval(logger.info(s"Running ${sysCfg.program}")).drain ++
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
||||
.flatMap(result => readResult[F](out, result, blocker, chunkSize, logger))
|
||||
}
|
||||
|
||||
private def readResult[F[_]: Sync: ContextShift](
|
||||
out: Path,
|
||||
result: SystemCommand.Result,
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Byte] =
|
||||
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
||||
case true =>
|
||||
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
||||
else
|
||||
Stream
|
||||
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
||||
.drain ++
|
||||
File.readAll(out, blocker, chunkSize)
|
||||
|
||||
case false =>
|
||||
Stream.raiseError(
|
||||
new Exception(s"Command result=${result.rc}. No output file found. ${result.stderr}")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -40,8 +40,10 @@ object Markdown {
|
||||
map(str => toHtml(str, cfg))
|
||||
|
||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
||||
s"""<html>
|
||||
s"""<!DOCTYPE html>
|
||||
|<html>
|
||||
|<head>
|
||||
|<meta charset="utf-8"/>
|
||||
|<style>
|
||||
|${cfg.internalCss}
|
||||
|</style>
|
||||
|
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.{Files, Path, Paths}
|
||||
|
||||
import fs2.Stream
|
||||
import cats.effect._
|
||||
import docspell.common._
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import fs2.Pipe
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object ExternConvTest extends SimpleTestSuite {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
|
||||
|
||||
test("convert html to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"wkhtmltopdf",
|
||||
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "wkhtmltopdf")
|
||||
.use(dir => IO {
|
||||
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
||||
val p = ExampleFiles.letter_de_html
|
||||
.readURL[IO](8192, blocker)
|
||||
.through(WkHtmlPdf.toPDF[IO](wkCfg, 8192, blocker, logger))
|
||||
.through(storeFile(dir.resolve("test.pdf")))
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(Files.exists(p) && Files.size(p) > 0)
|
||||
})
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
test("convert office to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"unoconv",
|
||||
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "unoconv")
|
||||
.use(dir => IO {
|
||||
val ucCfg = UnoconvConfig(cfg, target)
|
||||
val p = ExampleFiles.examples_sample_docx
|
||||
.readURL[IO](8192, blocker)
|
||||
.through(Unoconv.toPDF[IO](ucCfg, 8192, blocker, logger))
|
||||
.through(storeFile(dir.resolve("test.pdf")))
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(Files.exists(p) && Files.size(p) > 0)
|
||||
})
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
||||
|
||||
def commandExists(cmd: String): Boolean =
|
||||
Runtime.getRuntime().exec(Array("which", cmd)).waitFor() == 0
|
||||
}
|
Reference in New Issue
Block a user