mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-03-31 05:15:08 +00:00
Add first drafts for converting
This commit is contained in:
parent
c665c212a0
commit
bd605b8c94
@ -17,6 +17,9 @@ object File {
|
|||||||
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
||||||
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
|
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
|
||||||
|
|
||||||
|
def mkTempFile[F[_]: Sync](parent: Path, prefix: String, suffix: Option[String] = None): F[Path] =
|
||||||
|
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
|
||||||
|
|
||||||
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay {
|
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay {
|
||||||
val count = new AtomicInteger(0)
|
val count = new AtomicInteger(0)
|
||||||
Files.walkFileTree(
|
Files.walkFileTree(
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
package docspell.common
|
package docspell.common
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
import java.lang.ProcessBuilder.Redirect
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import java.util.concurrent.TimeUnit
|
import java.util.concurrent.TimeUnit
|
||||||
|
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import cats.effect.{Blocker, ContextShift, Sync}
|
import cats.effect.{Blocker, ContextShift, Sync}
|
||||||
import fs2.{Stream, io, text}
|
import fs2.{Stream, io, text}
|
||||||
import org.log4s.getLogger
|
import org.log4s.getLogger
|
||||||
|
|
||||||
import scala.jdk.CollectionConverters._
|
import scala.jdk.CollectionConverters._
|
||||||
import docspell.common.syntax.all._
|
import docspell.common.syntax.all._
|
||||||
|
|
||||||
@ -33,7 +36,7 @@ object SystemCommand {
|
|||||||
wd: Option[Path] = None,
|
wd: Option[Path] = None,
|
||||||
stdin: Stream[F, Byte] = Stream.empty
|
stdin: Stream[F, Byte] = Stream.empty
|
||||||
): Stream[F, Result] =
|
): Stream[F, Result] =
|
||||||
startProcess(cmd, wd) { proc =>
|
startProcess(cmd, wd, stdin) { proc =>
|
||||||
Stream.eval {
|
Stream.eval {
|
||||||
for {
|
for {
|
||||||
_ <- writeToProcess(stdin, proc, blocker)
|
_ <- writeToProcess(stdin, proc, blocker)
|
||||||
@ -66,15 +69,20 @@ object SystemCommand {
|
|||||||
else Stream.emit(r)
|
else Stream.emit(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path])(
|
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path], stdin: Stream[F, Byte])(
|
||||||
f: Process => Stream[F, A]
|
f: Process => Stream[F, A]
|
||||||
): Stream[F, A] = {
|
): Stream[F, A] = {
|
||||||
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
|
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
|
||||||
val proc = log *> Sync[F].delay {
|
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
|
||||||
|
val proc = log *> hasStdin.flatMap(flag => Sync[F].delay {
|
||||||
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
||||||
|
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
|
||||||
|
.redirectError(Redirect.PIPE)
|
||||||
|
.redirectOutput(Redirect.PIPE)
|
||||||
|
|
||||||
wd.map(_.toFile).foreach(pb.directory)
|
wd.map(_.toFile).foreach(pb.directory)
|
||||||
pb.start()
|
pb.start()
|
||||||
}
|
})
|
||||||
Stream
|
Stream
|
||||||
.bracket(proc)(p =>
|
.bracket(proc)(p =>
|
||||||
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||||
|
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
108
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
package docspell.convert.extern
|
||||||
|
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import fs2.{Pipe, Stream}
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
object ExternConv {
|
||||||
|
|
||||||
|
def toPDF[F[_]: Sync: ContextShift](
|
||||||
|
name: String,
|
||||||
|
cmdCfg: SystemCommand.Config,
|
||||||
|
wd: Path,
|
||||||
|
chunkSize: Int,
|
||||||
|
blocker: Blocker,
|
||||||
|
logger: Logger[F]
|
||||||
|
): Pipe[F, Byte, Byte] =
|
||||||
|
in =>
|
||||||
|
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||||
|
val out = dir.resolve("out.pdf")
|
||||||
|
val sysCfg =
|
||||||
|
cmdCfg.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
||||||
|
|
||||||
|
SystemCommand
|
||||||
|
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
||||||
|
.flatMap(result =>
|
||||||
|
logResult(name, result, logger) ++ readResult[F](
|
||||||
|
out,
|
||||||
|
result,
|
||||||
|
blocker,
|
||||||
|
chunkSize,
|
||||||
|
logger
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
def toPDFviaFile[F[_]: Sync: ContextShift](
|
||||||
|
name: String,
|
||||||
|
cmdCfg: SystemCommand.Config,
|
||||||
|
wd: Path,
|
||||||
|
chunkSize: Int,
|
||||||
|
blocker: Blocker,
|
||||||
|
logger: Logger[F]
|
||||||
|
): Pipe[F, Byte, Byte] =
|
||||||
|
in =>
|
||||||
|
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||||
|
val inFile = dir.resolve("infile")
|
||||||
|
val out = dir.resolve("out.pdf")
|
||||||
|
val sysCfg =
|
||||||
|
cmdCfg.mapArgs(
|
||||||
|
_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString)
|
||||||
|
.replace("{{infile}}", inFile.toAbsolutePath.normalize.toString)
|
||||||
|
)
|
||||||
|
|
||||||
|
(Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
|
||||||
|
Stream.eval(storeFile(in, inFile, blocker))).flatMap { _ =>
|
||||||
|
SystemCommand
|
||||||
|
.execSuccess[F](sysCfg, blocker, Some(dir))
|
||||||
|
.flatMap(result =>
|
||||||
|
logResult(name, result, logger) ++ readResult[F](
|
||||||
|
out,
|
||||||
|
result,
|
||||||
|
blocker,
|
||||||
|
chunkSize,
|
||||||
|
logger
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def readResult[F[_]: Sync: ContextShift](
|
||||||
|
out: Path,
|
||||||
|
result: SystemCommand.Result,
|
||||||
|
blocker: Blocker,
|
||||||
|
chunkSize: Int,
|
||||||
|
logger: Logger[F]
|
||||||
|
): Stream[F, Byte] =
|
||||||
|
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
||||||
|
case true =>
|
||||||
|
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
||||||
|
else
|
||||||
|
Stream
|
||||||
|
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
||||||
|
.drain ++
|
||||||
|
File.readAll(out, blocker, chunkSize)
|
||||||
|
|
||||||
|
case false =>
|
||||||
|
Stream.raiseError[F](
|
||||||
|
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def logResult[F[_]: Sync](
|
||||||
|
name: String,
|
||||||
|
result: SystemCommand.Result,
|
||||||
|
logger: Logger[F]
|
||||||
|
): Stream[F, Nothing] =
|
||||||
|
Stream.eval(logger.debug(s"$name stdout: ${result.stdout}")).drain ++
|
||||||
|
Stream.eval(logger.debug(s"$name stderr: ${result.stderr}")).drain
|
||||||
|
|
||||||
|
private def storeFile[F[_]: Sync: ContextShift](
|
||||||
|
in: Stream[F, Byte],
|
||||||
|
target: Path,
|
||||||
|
blocker: Blocker
|
||||||
|
): F[Unit] =
|
||||||
|
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
|
||||||
|
}
|
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
18
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
package docspell.convert.extern
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import fs2.Pipe
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
object Unoconv {
|
||||||
|
|
||||||
|
def toPDF[F[_]: Sync: ContextShift](
|
||||||
|
cfg: UnoconvConfig,
|
||||||
|
chunkSize: Int,
|
||||||
|
blocker: Blocker,
|
||||||
|
logger: Logger[F],
|
||||||
|
): Pipe[F, Byte, Byte] =
|
||||||
|
ExternConv.toPDFviaFile[F]("unoconv", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||||
|
|
||||||
|
|
||||||
|
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
package docspell.convert.extern
|
||||||
|
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
import docspell.common.SystemCommand
|
||||||
|
|
||||||
|
case class UnoconvConfig (cmd: SystemCommand.Config, workingDir: Path)
|
@ -1,9 +1,7 @@
|
|||||||
package docspell.convert.extern
|
package docspell.convert.extern
|
||||||
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import fs2.{Pipe, Stream}
|
import fs2.Pipe
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
|
||||||
object WkHtmlPdf {
|
object WkHtmlPdf {
|
||||||
@ -12,39 +10,9 @@ object WkHtmlPdf {
|
|||||||
cfg: WkHtmlPdfConfig,
|
cfg: WkHtmlPdfConfig,
|
||||||
chunkSize: Int,
|
chunkSize: Int,
|
||||||
blocker: Blocker,
|
blocker: Blocker,
|
||||||
logger: Logger[F]
|
logger: Logger[F],
|
||||||
): Pipe[F, Byte, Byte] =
|
): Pipe[F, Byte, Byte] =
|
||||||
in =>
|
ExternConv.toPDF[F]("wkhtmltopdf", cfg.cmd, cfg.workingDir, chunkSize, blocker, logger)
|
||||||
Stream.resource(File.withTempDir[F](cfg.workingDir, "docspell-wkhtmltopdf")).flatMap { dir =>
|
|
||||||
val out = dir.resolve("out.pdf")
|
|
||||||
val sysCfg =
|
|
||||||
cfg.cmd.mapArgs(_.replace("{{outfile}}", out.toAbsolutePath.normalize.toString))
|
|
||||||
|
|
||||||
Stream.eval(logger.info(s"Running ${sysCfg.program}")).drain ++
|
|
||||||
SystemCommand
|
|
||||||
.execSuccess[F](sysCfg, blocker, Some(dir), in)
|
|
||||||
.flatMap(result => readResult[F](out, result, blocker, chunkSize, logger))
|
|
||||||
}
|
|
||||||
|
|
||||||
private def readResult[F[_]: Sync: ContextShift](
|
|
||||||
out: Path,
|
|
||||||
result: SystemCommand.Result,
|
|
||||||
blocker: Blocker,
|
|
||||||
chunkSize: Int,
|
|
||||||
logger: Logger[F]
|
|
||||||
): Stream[F, Byte] =
|
|
||||||
Stream.eval(File.existsNonEmpty[F](out)).flatMap {
|
|
||||||
case true =>
|
|
||||||
if (result.rc == 0) File.readAll(out, blocker, chunkSize)
|
|
||||||
else
|
|
||||||
Stream
|
|
||||||
.eval(logger.warn(s"Command not successful (rc=${result.rc}), but file exists."))
|
|
||||||
.drain ++
|
|
||||||
File.readAll(out, blocker, chunkSize)
|
|
||||||
|
|
||||||
case false =>
|
|
||||||
Stream.raiseError(
|
|
||||||
new Exception(s"Command result=${result.rc}. No output file found. ${result.stderr}")
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -40,8 +40,10 @@ object Markdown {
|
|||||||
map(str => toHtml(str, cfg))
|
map(str => toHtml(str, cfg))
|
||||||
|
|
||||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
||||||
s"""<html>
|
s"""<!DOCTYPE html>
|
||||||
|
|<html>
|
||||||
|<head>
|
|<head>
|
||||||
|
|<meta charset="utf-8"/>
|
||||||
|<style>
|
|<style>
|
||||||
|${cfg.internalCss}
|
|${cfg.internalCss}
|
||||||
|</style>
|
|</style>
|
||||||
|
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
80
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
package docspell.convert.extern
|
||||||
|
|
||||||
|
import java.nio.file.{Files, Path, Paths}
|
||||||
|
|
||||||
|
import fs2.Stream
|
||||||
|
import cats.effect._
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.files.{ExampleFiles, TestFiles}
|
||||||
|
import fs2.Pipe
|
||||||
|
import minitest.SimpleTestSuite
|
||||||
|
|
||||||
|
object ExternConvTest extends SimpleTestSuite {
|
||||||
|
val blocker = TestFiles.blocker
|
||||||
|
implicit val CS = TestFiles.CS
|
||||||
|
|
||||||
|
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||||
|
val target = Paths.get("target")
|
||||||
|
|
||||||
|
|
||||||
|
test("convert html to pdf") {
|
||||||
|
val cfg = SystemCommand.Config(
|
||||||
|
"wkhtmltopdf",
|
||||||
|
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
|
||||||
|
Duration.seconds(20)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||||
|
else {
|
||||||
|
File
|
||||||
|
.withTempDir[IO](target, "wkhtmltopdf")
|
||||||
|
.use(dir => IO {
|
||||||
|
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
||||||
|
val p = ExampleFiles.letter_de_html
|
||||||
|
.readURL[IO](8192, blocker)
|
||||||
|
.through(WkHtmlPdf.toPDF[IO](wkCfg, 8192, blocker, logger))
|
||||||
|
.through(storeFile(dir.resolve("test.pdf")))
|
||||||
|
.compile
|
||||||
|
.lastOrError
|
||||||
|
.unsafeRunSync()
|
||||||
|
|
||||||
|
assert(Files.exists(p) && Files.size(p) > 0)
|
||||||
|
})
|
||||||
|
.unsafeRunSync
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("convert office to pdf") {
|
||||||
|
val cfg = SystemCommand.Config(
|
||||||
|
"unoconv",
|
||||||
|
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
|
||||||
|
Duration.seconds(20)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||||
|
else {
|
||||||
|
File
|
||||||
|
.withTempDir[IO](target, "unoconv")
|
||||||
|
.use(dir => IO {
|
||||||
|
val ucCfg = UnoconvConfig(cfg, target)
|
||||||
|
val p = ExampleFiles.examples_sample_docx
|
||||||
|
.readURL[IO](8192, blocker)
|
||||||
|
.through(Unoconv.toPDF[IO](ucCfg, 8192, blocker, logger))
|
||||||
|
.through(storeFile(dir.resolve("test.pdf")))
|
||||||
|
.compile
|
||||||
|
.lastOrError
|
||||||
|
.unsafeRunSync()
|
||||||
|
|
||||||
|
assert(Files.exists(p) && Files.size(p) > 0)
|
||||||
|
})
|
||||||
|
.unsafeRunSync
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||||
|
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
||||||
|
|
||||||
|
def commandExists(cmd: String): Boolean =
|
||||||
|
Runtime.getRuntime().exec(Array("which", cmd)).waitFor() == 0
|
||||||
|
}
|
30
modules/files/src/test/resources/letter-de.html
Executable file
30
modules/files/src/test/resources/letter-de.html
Executable file
@ -0,0 +1,30 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
padding: 2em 5em;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<pre>
|
||||||
|
<code>
|
||||||
|
Max Mustermann
|
||||||
|
Lilienweg 21
|
||||||
|
12345 Nebendorf
|
||||||
|
E-Mail: max.muster@gmail.com
|
||||||
|
</code>
|
||||||
|
</pre>
|
||||||
|
<p>Max Mustermann, Lilienweg 21, 12345 Nebendorf</p>
|
||||||
|
<p>EasyCare AG<br> Abteilung Buchhaltung<br> Ackerweg 12<br> 12346 Ulmen<br></p>
|
||||||
|
<p>Nebendorf, 3. September 2019</p>
|
||||||
|
<h2>Sehr geehrte Damen und Herren</h2>
|
||||||
|
<p>hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum <em>nächstmöglichen</em> Termin.</p>
|
||||||
|
<p>Bitte senden Sie mir innerhalb der gesetzlichen Frist von <strong>14 Tagen</strong> eine Kündigungsbestätigung zu.</p>
|
||||||
|
<p>Vielen Dank im Vorraus!</p>
|
||||||
|
<p>Mit freundlichen Grüßen</p>
|
||||||
|
<p>Max Mustermann</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -51,11 +51,11 @@ object ConvertPdf {
|
|||||||
.map(_.mimetype)
|
.map(_.mimetype)
|
||||||
.getOrElse(Mimetype.`application/octet-stream`)
|
.getOrElse(Mimetype.`application/octet-stream`)
|
||||||
|
|
||||||
def convertSafe[F[_]: Sync](
|
def convertSafe[F[_]: Sync: ContextShift](
|
||||||
cfg: ConvertConfig,
|
cfg: ConvertConfig,
|
||||||
ctx: Context[F, ProcessItemArgs]
|
ctx: Context[F, ProcessItemArgs]
|
||||||
)(ra: RAttachment, mime: Mimetype): F[RAttachment] =
|
)(ra: RAttachment, mime: Mimetype): F[RAttachment] =
|
||||||
Conversion.create[F](cfg).use { conv =>
|
Conversion.create[F](cfg, ctx.blocker,ctx.logger).use { conv =>
|
||||||
ctx.logger
|
ctx.logger
|
||||||
.info(s"File ${ra.name} has mime ${mime.asString}. conv=$conv")
|
.info(s"File ${ra.name} has mime ${mime.asString}. conv=$conv")
|
||||||
.map(_ => ra)
|
.map(_ => ra)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user