Add first drafts for converting

This commit is contained in:
Eike Kettner 2020-02-18 01:31:22 +01:00
parent c665c212a0
commit bd605b8c94
10 changed files with 266 additions and 42 deletions

View File

@ -17,6 +17,9 @@ object File {
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
def mkTempFile[F[_]: Sync](parent: Path, prefix: String, suffix: Option[String] = None): F[Path] =
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay {
val count = new AtomicInteger(0)
Files.walkFileTree(

View File

@ -1,12 +1,15 @@
package docspell.common
import java.io.InputStream
import java.lang.ProcessBuilder.Redirect
import java.nio.file.Path
import java.util.concurrent.TimeUnit
import cats.implicits._
import cats.effect.{Blocker, ContextShift, Sync}
import fs2.{Stream, io, text}
import org.log4s.getLogger
import scala.jdk.CollectionConverters._
import docspell.common.syntax.all._
@ -33,7 +36,7 @@ object SystemCommand {
wd: Option[Path] = None,
stdin: Stream[F, Byte] = Stream.empty
): Stream[F, Result] =
startProcess(cmd, wd) { proc =>
startProcess(cmd, wd, stdin) { proc =>
Stream.eval {
for {
_ <- writeToProcess(stdin, proc, blocker)
@ -66,15 +69,20 @@ object SystemCommand {
else Stream.emit(r)
}
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path])(
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path], stdin: Stream[F, Byte])(
f: Process => Stream[F, A]
): Stream[F, A] = {
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
val proc = log *> Sync[F].delay {
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
val proc = log *> hasStdin.flatMap(flag => Sync[F].delay {
val pb = new ProcessBuilder(cmd.toCmd.asJava)
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
.redirectError(Redirect.PIPE)
.redirectOutput(Redirect.PIPE)
wd.map(_.toFile).foreach(pb.directory)
pb.start()
}
})
Stream
.bracket(proc)(p =>
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>

View File

@ -0,0 +1,108 @@
package docspell.convert.extern
import java.nio.file.Path
import cats.effect._
import fs2.{Pipe, Stream}
import docspell.common._
object ExternConv {
def toPDF[F[_]: Sync: ContextShift](
name: String,
cmdCfg: SystemCommand.Config,
wd: Path,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
val out = dir.resolve("out.pdf")
val sysCfg =
cmdCfg.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir), in)
.flatMap(result =>
logResult(name, result, logger) ++ readResult[F](
out,
result,
blocker,
chunkSize,
logger
)
)
}
def toPDFviaFile[F[_]: Sync: ContextShift](
name: String,
cmdCfg: SystemCommand.Config,
wd: Path,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
val inFile = dir.resolve("infile")
val out = dir.resolve("out.pdf")
val sysCfg =
cmdCfg.mapArgs(
_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString)
.replace("{{infile}}", inFile.toAbsolutePath.normalize.toString)
)
(Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
Stream.eval(storeFile(in, inFile, blocker))).flatMap { _ =>
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir))
.flatMap(result =>
logResult(name, result, logger) ++ readResult[F](
out,
result,
blocker,
chunkSize,
logger
)
)
}
}
private def readResult[F[_]: Sync: ContextShift](
out: Path,
result: SystemCommand.Result,
blocker: Blocker,
chunkSize: Int,
logger: Logger[F]
): Stream[F, Byte] =
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
case true =>
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
else
Stream
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
.drain ++
File.readAll(out, blocker, chunkSize)
case false =>
Stream.raiseError[F](
new Exception(s"Command result=${result.rc}. No output file found.")
)
}
private def logResult[F[_]: Sync](
name: String,
result: SystemCommand.Result,
logger: Logger[F]
): Stream[F, Nothing] =
Stream.eval(logger.debug(s"$name stdout: ${result.stdout}")).drain ++
Stream.eval(logger.debug(s"$name stderr: ${result.stderr}")).drain
private def storeFile[F[_]: Sync: ContextShift](
in: Stream[F, Byte],
target: Path,
blocker: Blocker
): F[Unit] =
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
}

View File

@ -0,0 +1,18 @@
package docspell.convert.extern
import cats.effect._
import fs2.Pipe
import docspell.common._
object Unoconv {
def toPDF[F[_]: Sync: ContextShift](
cfg: UnoconvConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F],
): Pipe[F, Byte, Byte] =
ExternConv.toPDFviaFile[F]("unoconv", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
}

View File

@ -0,0 +1,7 @@
package docspell.convert.extern
import java.nio.file.Path
import docspell.common.SystemCommand
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)

View File

@ -1,9 +1,7 @@
package docspell.convert.extern
import java.nio.file.Path
import cats.effect._
import fs2.{Pipe, Stream}
import fs2.Pipe
import docspell.common._
object WkHtmlPdf {
@ -12,39 +10,9 @@ object WkHtmlPdf {
cfg: WkHtmlPdfConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F]
logger: Logger[F],
): Pipe[F, Byte, Byte] =
in =>
Stream.resource(File.withTempDir[F](cfg.workingDir, "docspell-wkhtmltopdf")).flatMap { dir =>
val out = dir.resolve("out.pdf")
val sysCfg =
cfg.cmd.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
ExternConv.toPDF[F]("wkhtmltopdf", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
Stream.eval(logger.info(s"Running ${sysCfg.program}")).drain ++
SystemCommand
.execSuccess[F](sysCfg, blocker, Some(dir), in)
.flatMap(result => readResult[F](out, result, blocker, chunkSize, logger))
}
private def readResult[F[_]: Sync: ContextShift](
out: Path,
result: SystemCommand.Result,
blocker: Blocker,
chunkSize: Int,
logger: Logger[F]
): Stream[F, Byte] =
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
case true =>
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
else
Stream
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
.drain ++
File.readAll(out, blocker, chunkSize)
case false =>
Stream.raiseError(
new Exception(s"Command result=${result.rc}. No output file found. ${result.stderr}")
)
}
}

View File

@ -40,8 +40,10 @@ object Markdown {
map(str => toHtml(str, cfg))
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
s"""<html>
s"""<!DOCTYPE html>
|<html>
|<head>
|<meta charset="utf-8"/>
|<style>
|${cfg.internalCss}
|</style>

View File

@ -0,0 +1,80 @@
package docspell.convert.extern
import java.nio.file.{Files, Path, Paths}
import fs2.Stream
import cats.effect._
import docspell.common._
import docspell.files.{ExampleFiles, TestFiles}
import fs2.Pipe
import minitest.SimpleTestSuite
object ExternConvTest extends SimpleTestSuite {
val blocker = TestFiles.blocker
implicit val CS = TestFiles.CS
val logger = Logger.log4s[IO](org.log4s.getLogger)
val target = Paths.get("target")
test("convert html to pdf") {
val cfg = SystemCommand.Config(
"wkhtmltopdf",
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
Duration.seconds(20)
)
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
else {
File
.withTempDir[IO](target, "wkhtmltopdf")
.use(dir => IO {
val wkCfg = WkHtmlPdfConfig(cfg, target)
val p = ExampleFiles.letter_de_html
.readURL[IO](8192, blocker)
.through(WkHtmlPdf.toPDF[IO](wkCfg, 8192, blocker, logger))
.through(storeFile(dir.resolve("test.pdf")))
.compile
.lastOrError
.unsafeRunSync()
assert(Files.exists(p) && Files.size(p) > 0)
})
.unsafeRunSync
}
}
test("convert office to pdf") {
val cfg = SystemCommand.Config(
"unoconv",
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
Duration.seconds(20)
)
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
else {
File
.withTempDir[IO](target, "unoconv")
.use(dir => IO {
val ucCfg = UnoconvConfig(cfg, target)
val p = ExampleFiles.examples_sample_docx
.readURL[IO](8192, blocker)
.through(Unoconv.toPDF[IO](ucCfg, 8192, blocker, logger))
.through(storeFile(dir.resolve("test.pdf")))
.compile
.lastOrError
.unsafeRunSync()
assert(Files.exists(p) && Files.size(p) > 0)
})
.unsafeRunSync
}
}
def storeFile(file: Path): Pipe[IO, Byte, Path] =
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
def commandExists(cmd: String): Boolean =
Runtime.getRuntime().exec(Array("which", cmd)).waitFor() == 0
}

View File

@ -0,0 +1,30 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<style>
body {
padding: 2em 5em;
}
</style>
</head>
<body>
<pre>
<code>
Max Mustermann
Lilienweg 21
12345 Nebendorf
E-Mail: max.muster@gmail.com
</code>
</pre>
<p>Max Mustermann, Lilienweg 21, 12345 Nebendorf</p>
<p>EasyCare AG<br> Abteilung Buchhaltung<br> Ackerweg 12<br> 12346 Ulmen<br></p>
<p>Nebendorf, 3. September 2019</p>
<h2>Sehr geehrte Damen und Herren</h2>
<p>hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum <em>nächstmöglichen</em> Termin.</p>
<p>Bitte senden Sie mir innerhalb der gesetzlichen Frist von <strong>14 Tagen</strong> eine Kündigungsbestätigung zu.</p>
<p>Vielen Dank im Vorraus!</p>
<p>Mit freundlichen Grüßen</p>
<p>Max Mustermann</p>
</body>
</html>

View File

@ -51,11 +51,11 @@ object ConvertPdf {
.map(_.mimetype)
.getOrElse(Mimetype.`application/octet-stream`)
def convertSafe[F[_]: Sync](
def convertSafe[F[_]: Sync: ContextShift](
cfg: ConvertConfig,
ctx: Context[F, ProcessItemArgs]
)(ra: RAttachment, mime: Mimetype): F[RAttachment] =
Conversion.create[F](cfg).use { conv =>
Conversion.create[F](cfg, ctx.blocker,ctx.logger).use { conv =>
ctx.logger
.info(s"File ${ra.name} has mime ${mime.asString}. conv=$conv")
.map(_ => ra)