Add first drafts for converting

This commit is contained in:
Eike Kettner
2020-02-18 01:31:22 +01:00
parent c665c212a0
commit bd605b8c94
10 changed files with 266 additions and 42 deletions

View File

@ -0,0 +1,108 @@
package docspell.convert.extern
import java.nio.file.Path
import cats.effect._
import fs2.{Pipe, Stream}
import docspell.common._
object ExternConv {
def toPDF[F[_]: Sync: ContextShift](
name: String,
cmdCfg: SystemCommand.Config,
wd: Path,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
val out = dir.resolve("out.pdf")
val sysCfg =
cmdCfg.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir), in)
.flatMap(result =>
logResult(name, result, logger) ++ readResult[F](
out,
result,
blocker,
chunkSize,
logger
)
)
}
def toPDFviaFile[F[_]: Sync: ContextShift](
name: String,
cmdCfg: SystemCommand.Config,
wd: Path,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
val inFile = dir.resolve("infile")
val out = dir.resolve("out.pdf")
val sysCfg =
cmdCfg.mapArgs(
_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString)
.replace("{{infile}}", inFile.toAbsolutePath.normalize.toString)
)
(Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
Stream.eval(storeFile(in, inFile, blocker))).flatMap { _ =>
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir))
.flatMap(result =>
logResult(name, result, logger) ++ readResult[F](
out,
result,
blocker,
chunkSize,
logger
)
)
}
}
private def readResult[F[_]: Sync: ContextShift](
out: Path,
result: SystemCommand.Result,
blocker: Blocker,
chunkSize: Int,
logger: Logger[F]
): Stream[F, Byte] =
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
case true =>
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
else
Stream
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
.drain ++
File.readAll(out, blocker, chunkSize)
case false =>
Stream.raiseError[F](
new Exception(s"Command result=${result.rc}. No output file found.")
)
}
private def logResult[F[_]: Sync](
name: String,
result: SystemCommand.Result,
logger: Logger[F]
): Stream[F, Nothing] =
Stream.eval(logger.debug(s"$name stdout: ${result.stdout}")).drain ++
Stream.eval(logger.debug(s"$name stderr: ${result.stderr}")).drain
private def storeFile[F[_]: Sync: ContextShift](
in: Stream[F, Byte],
target: Path,
blocker: Blocker
): F[Unit] =
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
}

View File

@ -0,0 +1,18 @@
package docspell.convert.extern
import cats.effect._
import fs2.Pipe
import docspell.common._
object Unoconv {
def toPDF[F[_]: Sync: ContextShift](
cfg: UnoconvConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F],
): Pipe[F, Byte, Byte] =
ExternConv.toPDFviaFile[F]("unoconv", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
}

View File

@ -0,0 +1,7 @@
package docspell.convert.extern
import java.nio.file.Path
import docspell.common.SystemCommand
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)

View File

@ -1,9 +1,7 @@
package docspell.convert.extern
import java.nio.file.Path
import cats.effect._
import fs2.{Pipe, Stream}
import fs2.Pipe
import docspell.common._
object WkHtmlPdf {
@ -12,39 +10,9 @@ object WkHtmlPdf {
cfg: WkHtmlPdfConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
logger: Logger[F],
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](cfg.workingDir, "docspell-wkhtmltopdf")).flatMap { dir =>
val out = dir.resolve("out.pdf")
val sysCfg =
cfg.cmd.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
ExternConv.toPDF[F]("wkhtmltopdf", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
Stream.eval(logger.info(s"Running ${sysCfg.program}")).drain ++
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir), in)
.flatMap(result => readResult[F](out, result, blocker, chunkSize, logger))
}
private def readResult[F[_]: Sync: ContextShift](
out: Path,
result: SystemCommand.Result,
blocker: Blocker,
chunkSize: Int,
logger: Logger[F]
): Stream[F, Byte] =
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
case true =>
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
else
Stream
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
.drain ++
File.readAll(out, blocker, chunkSize)
case false =>
Stream.raiseError(
new Exception(s"Command result=${result.rc}. No output file found. ${result.stderr}")
)
}
}

View File

@ -40,8 +40,10 @@ object Markdown {
map(str => toHtml(str, cfg))
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
s"""<html>
s"""<!DOCTYPE html>
|<html>
|<head>
|<meta charset="utf-8"/>
|<style>
|${cfg.internalCss}
|</style>

View File

@ -0,0 +1,80 @@
package docspell.convert.extern
import java.nio.file.{Files, Path, Paths}
import fs2.Stream
import cats.effect._
import docspell.common._
import docspell.files.{ExampleFiles, TestFiles}
import fs2.Pipe
import minitest.SimpleTestSuite
object ExternConvTest extends SimpleTestSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
test("convert html to pdf") {
val cfg = SystemCommand.Config(
"wkhtmltopdf",
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
Duration.seconds(20)
)
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
else {
File
.withTempDir[IO](target, "wkhtmltopdf")
.use(dir => IO {
val wkCfg = WkHtmlPdfConfig(cfg, target)
val p = ExampleFiles.letter_de_html
.readURL[IO](8192, blocker)
.through(WkHtmlPdf.toPDF[IO](wkCfg, 8192, blocker, logger))
.through(storeFile(dir.resolve("test.pdf")))
.compile
.lastOrError
.unsafeRunSync()
assert(Files.exists(p) && Files.size(p) > 0)
})
.unsafeRunSync
}
}
test("convert office to pdf") {
val cfg = SystemCommand.Config(
"unoconv",
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
Duration.seconds(20)
)
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
else {
File
.withTempDir[IO](target, "unoconv")
.use(dir => IO {
val ucCfg = UnoconvConfig(cfg, target)
val p = ExampleFiles.examples_sample_docx
.readURL[IO](8192, blocker)
.through(Unoconv.toPDF[IO](ucCfg, 8192, blocker, logger))
.through(storeFile(dir.resolve("test.pdf")))
.compile
.lastOrError
.unsafeRunSync()
assert(Files.exists(p) && Files.size(p) > 0)
})
.unsafeRunSync
}
}
def storeFile(file: Path): Pipe[IO, Byte, Path] =
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
def commandExists(cmd: String): Boolean =
Runtime.getRuntime().exec(Array("which", cmd)).waitFor() == 0
}