mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-28 17:55:06 +00:00
Add first drafts for converting
This commit is contained in:
parent
c665c212a0
commit
bd605b8c94
@ -17,6 +17,9 @@ object File {
|
||||
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
||||
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
|
||||
|
||||
def mkTempFile[F[_]: Sync](parent: Path, prefix: String, suffix: Option[String] = None): F[Path] =
|
||||
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
|
||||
|
||||
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay {
|
||||
val count = new AtomicInteger(0)
|
||||
Files.walkFileTree(
|
||||
|
@ -1,12 +1,15 @@
|
||||
package docspell.common
|
||||
|
||||
import java.io.InputStream
|
||||
import java.lang.ProcessBuilder.Redirect
|
||||
import java.nio.file.Path
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.{Blocker, ContextShift, Sync}
|
||||
import fs2.{Stream, io, text}
|
||||
import org.log4s.getLogger
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import docspell.common.syntax.all._
|
||||
|
||||
@ -33,7 +36,7 @@ object SystemCommand {
|
||||
wd: Option[Path] = None,
|
||||
stdin: Stream[F, Byte] = Stream.empty
|
||||
): Stream[F, Result] =
|
||||
startProcess(cmd, wd) { proc =>
|
||||
startProcess(cmd, wd, stdin) { proc =>
|
||||
Stream.eval {
|
||||
for {
|
||||
_ <- writeToProcess(stdin, proc, blocker)
|
||||
@ -66,15 +69,20 @@ object SystemCommand {
|
||||
else Stream.emit(r)
|
||||
}
|
||||
|
||||
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path])(
|
||||
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path], stdin: Stream[F, Byte])(
|
||||
f: Process => Stream[F, A]
|
||||
): Stream[F, A] = {
|
||||
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
|
||||
val proc = log *> Sync[F].delay {
|
||||
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
|
||||
val proc = log *> hasStdin.flatMap(flag => Sync[F].delay {
|
||||
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
||||
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
|
||||
.redirectError(Redirect.PIPE)
|
||||
.redirectOutput(Redirect.PIPE)
|
||||
|
||||
wd.map(_.toFile).foreach(pb.directory)
|
||||
pb.start()
|
||||
}
|
||||
})
|
||||
Stream
|
||||
.bracket(proc)(p =>
|
||||
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||
|
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
import docspell.common._
|
||||
|
||||
object ExternConv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift](
|
||||
name: String,
|
||||
cmdCfg: SystemCommand.Config,
|
||||
wd: Path,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cmdCfg.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
||||
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
||||
.flatMap(result =>
|
||||
logResult(name, result, logger) ++ readResult[F](
|
||||
out,
|
||||
result,
|
||||
blocker,
|
||||
chunkSize,
|
||||
logger
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
def toPDFviaFile[F[_]: Sync: ContextShift](
|
||||
name: String,
|
||||
cmdCfg: SystemCommand.Config,
|
||||
wd: Path,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
val inFile = dir.resolve("infile")
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cmdCfg.mapArgs(
|
||||
_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString)
|
||||
.replace("{{infile}}", inFile.toAbsolutePath.normalize.toString)
|
||||
)
|
||||
|
||||
(Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
|
||||
Stream.eval(storeFile(in, inFile, blocker))).flatMap { _ =>
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir))
|
||||
.flatMap(result =>
|
||||
logResult(name, result, logger) ++ readResult[F](
|
||||
out,
|
||||
result,
|
||||
blocker,
|
||||
chunkSize,
|
||||
logger
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def readResult[F[_]: Sync: ContextShift](
|
||||
out: Path,
|
||||
result: SystemCommand.Result,
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Byte] =
|
||||
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
||||
case true =>
|
||||
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
||||
else
|
||||
Stream
|
||||
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
||||
.drain ++
|
||||
File.readAll(out, blocker, chunkSize)
|
||||
|
||||
case false =>
|
||||
Stream.raiseError[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
)
|
||||
}
|
||||
|
||||
private def logResult[F[_]: Sync](
|
||||
name: String,
|
||||
result: SystemCommand.Result,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Nothing] =
|
||||
Stream.eval(logger.debug(s"$name stdout: ${result.stdout}")).drain ++
|
||||
Stream.eval(logger.debug(s"$name stderr: ${result.stderr}")).drain
|
||||
|
||||
private def storeFile[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
target: Path,
|
||||
blocker: Blocker
|
||||
): F[Unit] =
|
||||
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
|
||||
}
|
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Pipe
|
||||
import docspell.common._
|
||||
|
||||
object Unoconv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift](
|
||||
cfg: UnoconvConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
): Pipe[F, Byte, Byte] =
|
||||
ExternConv.toPDFviaFile[F]("unoconv", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||
|
||||
|
||||
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)
|
@ -1,9 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
import fs2.Pipe
|
||||
import docspell.common._
|
||||
|
||||
object WkHtmlPdf {
|
||||
@ -12,39 +10,9 @@ object WkHtmlPdf {
|
||||
cfg: WkHtmlPdfConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
logger: Logger[F],
|
||||
): Pipe[F, Byte, Byte] =
|
||||
in =>
|
||||
Stream.resource(File.withTempDir[F](cfg.workingDir, "docspell-wkhtmltopdf")).flatMap { dir =>
|
||||
val out = dir.resolve("out.pdf")
|
||||
val sysCfg =
|
||||
cfg.cmd.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
||||
ExternConv.toPDF[F]("wkhtmltopdf", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||
|
||||
Stream.eval(logger.info(s"Running ${sysCfg.program}")).drain ++
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
||||
.flatMap(result => readResult[F](out, result, blocker, chunkSize, logger))
|
||||
}
|
||||
|
||||
private def readResult[F[_]: Sync: ContextShift](
|
||||
out: Path,
|
||||
result: SystemCommand.Result,
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Byte] =
|
||||
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
||||
case true =>
|
||||
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
||||
else
|
||||
Stream
|
||||
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
||||
.drain ++
|
||||
File.readAll(out, blocker, chunkSize)
|
||||
|
||||
case false =>
|
||||
Stream.raiseError(
|
||||
new Exception(s"Command result=${result.rc}. No output file found. ${result.stderr}")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -40,8 +40,10 @@ object Markdown {
|
||||
map(str => toHtml(str, cfg))
|
||||
|
||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
||||
s"""<html>
|
||||
s"""<!DOCTYPE html>
|
||||
|<html>
|
||||
|<head>
|
||||
|<meta charset="utf-8"/>
|
||||
|<style>
|
||||
|${cfg.internalCss}
|
||||
|</style>
|
||||
|
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.{Files, Path, Paths}
|
||||
|
||||
import fs2.Stream
|
||||
import cats.effect._
|
||||
import docspell.common._
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import fs2.Pipe
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object ExternConvTest extends SimpleTestSuite {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
|
||||
|
||||
test("convert html to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"wkhtmltopdf",
|
||||
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "wkhtmltopdf")
|
||||
.use(dir => IO {
|
||||
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
||||
val p = ExampleFiles.letter_de_html
|
||||
.readURL[IO](8192, blocker)
|
||||
.through(WkHtmlPdf.toPDF[IO](wkCfg, 8192, blocker, logger))
|
||||
.through(storeFile(dir.resolve("test.pdf")))
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(Files.exists(p) && Files.size(p) > 0)
|
||||
})
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
test("convert office to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"unoconv",
|
||||
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "unoconv")
|
||||
.use(dir => IO {
|
||||
val ucCfg = UnoconvConfig(cfg, target)
|
||||
val p = ExampleFiles.examples_sample_docx
|
||||
.readURL[IO](8192, blocker)
|
||||
.through(Unoconv.toPDF[IO](ucCfg, 8192, blocker, logger))
|
||||
.through(storeFile(dir.resolve("test.pdf")))
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(Files.exists(p) && Files.size(p) > 0)
|
||||
})
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
||||
|
||||
def commandExists(cmd: String): Boolean =
|
||||
Runtime.getRuntime().exec(Array("which", cmd)).waitFor() == 0
|
||||
}
|
30
modules/files/src/test/resources/letter-de.html
Executable file
30
modules/files/src/test/resources/letter-de.html
Executable file
@ -0,0 +1,30 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<style>
|
||||
body {
|
||||
padding: 2em 5em;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<pre>
|
||||
<code>
|
||||
Max Mustermann
|
||||
Lilienweg 21
|
||||
12345 Nebendorf
|
||||
E-Mail: max.muster@gmail.com
|
||||
</code>
|
||||
</pre>
|
||||
<p>Max Mustermann, Lilienweg 21, 12345 Nebendorf</p>
|
||||
<p>EasyCare AG<br> Abteilung Buchhaltung<br> Ackerweg 12<br> 12346 Ulmen<br></p>
|
||||
<p>Nebendorf, 3. September 2019</p>
|
||||
<h2>Sehr geehrte Damen und Herren</h2>
|
||||
<p>hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum <em>nächstmöglichen</em> Termin.</p>
|
||||
<p>Bitte senden Sie mir innerhalb der gesetzlichen Frist von <strong>14 Tagen</strong> eine Kündigungsbestätigung zu.</p>
|
||||
<p>Vielen Dank im Vorraus!</p>
|
||||
<p>Mit freundlichen Grüßen</p>
|
||||
<p>Max Mustermann</p>
|
||||
</body>
|
||||
</html>
|
@ -51,11 +51,11 @@ object ConvertPdf {
|
||||
.map(_.mimetype)
|
||||
.getOrElse(Mimetype.`application/octet-stream`)
|
||||
|
||||
def convertSafe[F[_]: Sync](
|
||||
def convertSafe[F[_]: Sync: ContextShift](
|
||||
cfg: ConvertConfig,
|
||||
ctx: Context[F, ProcessItemArgs]
|
||||
)(ra: RAttachment, mime: Mimetype): F[RAttachment] =
|
||||
Conversion.create[F](cfg).use { conv =>
|
||||
Conversion.create[F](cfg, ctx.blocker,ctx.logger).use { conv =>
|
||||
ctx.logger
|
||||
.info(s"File ${ra.name} has mime ${mime.asString}. conv=$conv")
|
||||
.map(_ => ra)
|
||||
|
Loading…
x
Reference in New Issue
Block a user