mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
sbt scalafmtAll
This commit is contained in:
@ -13,7 +13,9 @@ import docspell.files.{ImageSize, TikaMimetype}
|
||||
|
||||
trait Conversion[F[_]] {
|
||||
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A]
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
|
||||
in: Stream[F, Byte]
|
||||
): F[A]
|
||||
|
||||
}
|
||||
|
||||
@ -26,7 +28,9 @@ object Conversion {
|
||||
): Resource[F, Conversion[F]] =
|
||||
Resource.pure(new Conversion[F] {
|
||||
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A] =
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
|
||||
in: Stream[F, Byte]
|
||||
): F[A] =
|
||||
TikaMimetype.resolve(dataType, in).flatMap {
|
||||
case MimeType.pdf =>
|
||||
handler.run(ConversionResult.successPdf(in))
|
||||
@ -112,10 +116,10 @@ object Conversion {
|
||||
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
mt match {
|
||||
case Office(_) => Some(mt)
|
||||
case Texts(_) => Some(mt)
|
||||
case Images(_) => Some(mt)
|
||||
case Office(_) => Some(mt)
|
||||
case Texts(_) => Some(mt)
|
||||
case Images(_) => Some(mt)
|
||||
case MimeType.html => Some(mt)
|
||||
case _ => None
|
||||
case _ => None
|
||||
}
|
||||
}
|
||||
|
@ -3,9 +3,11 @@ package docspell.convert
|
||||
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
||||
import docspell.convert.flexmark.MarkdownConfig
|
||||
|
||||
case class ConvertConfig(chunkSize: Int,
|
||||
maxImageSize: Int,
|
||||
markdown: MarkdownConfig,
|
||||
wkhtmlpdf: WkHtmlPdfConfig,
|
||||
tesseract: TesseractConfig,
|
||||
unoconv: UnoconvConfig)
|
||||
case class ConvertConfig(
|
||||
chunkSize: Int,
|
||||
maxImageSize: Int,
|
||||
markdown: MarkdownConfig,
|
||||
wkhtmlpdf: WkHtmlPdfConfig,
|
||||
tesseract: TesseractConfig,
|
||||
unoconv: UnoconvConfig
|
||||
)
|
||||
|
@ -20,7 +20,9 @@ private[extern] object ExternConv {
|
||||
logger: Logger[F],
|
||||
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
Stream
|
||||
.resource(File.withTempDir[F](wd, s"docspell-$name"))
|
||||
.flatMap { dir =>
|
||||
val inFile = dir.resolve("infile").toAbsolutePath.normalize
|
||||
val out = dir.resolve("out.pdf").toAbsolutePath.normalize
|
||||
val sysCfg =
|
||||
@ -40,12 +42,12 @@ private[extern] object ExternConv {
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, logger, Some(dir), if (useStdin) in else Stream.empty)
|
||||
.evalMap(result =>
|
||||
logResult(name, result, logger).
|
||||
flatMap(_ => reader(out, result)).
|
||||
flatMap(handler.run)
|
||||
logResult(name, result, logger).flatMap(_ => reader(out, result)).flatMap(handler.run)
|
||||
)
|
||||
}
|
||||
}.compile.lastOrError
|
||||
}
|
||||
.compile
|
||||
.lastOrError
|
||||
|
||||
def readResult[F[_]: Sync: ContextShift](
|
||||
blocker: Blocker,
|
||||
@ -60,9 +62,11 @@ private[extern] object ExternConv {
|
||||
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
|
||||
|
||||
case false =>
|
||||
ConversionResult.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
).pure[F]
|
||||
ConversionResult
|
||||
.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
)
|
||||
.pure[F]
|
||||
}
|
||||
|
||||
def readResultTesseract[F[_]: Sync: ContextShift](
|
||||
@ -75,7 +79,7 @@ private[extern] object ExternConv {
|
||||
File.existsNonEmpty[F](outPdf).flatMap {
|
||||
case true =>
|
||||
val outTxt = out.resolveSibling(s"$outPrefix.txt")
|
||||
File.exists(outTxt).flatMap(txtExists => {
|
||||
File.exists(outTxt).flatMap { txtExists =>
|
||||
val pdfData = File.readAll(out, blocker, chunkSize)
|
||||
if (result.rc == 0) {
|
||||
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F]
|
||||
@ -84,12 +88,14 @@ private[extern] object ExternConv {
|
||||
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
||||
successPdf(pdfData).pure[F]
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
case false =>
|
||||
ConversionResult.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
).pure[F]
|
||||
ConversionResult
|
||||
.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
)
|
||||
.pure[F]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,15 @@ object Tesseract {
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("tesseract", cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||
ExternConv.toPDF[F, A](
|
||||
"tesseract",
|
||||
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
|
||||
cfg.workingDir,
|
||||
false,
|
||||
blocker,
|
||||
logger,
|
||||
reader
|
||||
)(in, handler)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class TesseractConfig (command: SystemCommand.Config, workingDir: Path)
|
||||
case class TesseractConfig(command: SystemCommand.Config, workingDir: Path)
|
||||
|
@ -19,7 +19,10 @@ object Unoconv {
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(
|
||||
in,
|
||||
handler
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class UnoconvConfig (command: SystemCommand.Config, workingDir: Path)
|
||||
case class UnoconvConfig(command: SystemCommand.Config, workingDir: Path)
|
||||
|
@ -14,12 +14,16 @@ object WkHtmlPdf {
|
||||
cfg: WkHtmlPdfConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
logger: Logger[F]
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(in, handler)
|
||||
ExternConv
|
||||
.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(
|
||||
in,
|
||||
handler
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,4 +4,4 @@ import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class WkHtmlPdfConfig (command: SystemCommand.Config, workingDir: Path)
|
||||
case class WkHtmlPdfConfig(command: SystemCommand.Config, workingDir: Path)
|
||||
|
@ -22,24 +22,22 @@ object Markdown {
|
||||
val r = createRenderer()
|
||||
Try {
|
||||
val reader = new InputStreamReader(is, StandardCharsets.UTF_8)
|
||||
val doc = p.parseReader(reader)
|
||||
val doc = p.parseReader(reader)
|
||||
wrapHtml(r.render(doc), cfg)
|
||||
}.toEither
|
||||
}
|
||||
|
||||
|
||||
def toHtml(md: String, cfg: MarkdownConfig): String = {
|
||||
val p = createParser()
|
||||
val r = createRenderer()
|
||||
val p = createParser()
|
||||
val r = createRenderer()
|
||||
val doc = p.parse(md)
|
||||
wrapHtml(r.render(doc), cfg)
|
||||
}
|
||||
|
||||
def toHtml[F[_]: Sync](data: Stream[F, Byte], cfg: MarkdownConfig): F[String] =
|
||||
data.through(fs2.text.utf8Decode).compile.foldMonoid.
|
||||
map(str => toHtml(str, cfg))
|
||||
data.through(fs2.text.utf8Decode).compile.foldMonoid.map(str => toHtml(str, cfg))
|
||||
|
||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String =
|
||||
s"""<!DOCTYPE html>
|
||||
|<html>
|
||||
|<head>
|
||||
@ -53,13 +51,13 @@ object Markdown {
|
||||
|</body>
|
||||
|</html>
|
||||
|""".stripMargin
|
||||
}
|
||||
|
||||
private def createParser(): Parser = {
|
||||
val opts = new MutableDataSet()
|
||||
opts.set(Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
|
||||
util.Arrays.asList(TablesExtension.create(),
|
||||
StrikethroughExtension.create()));
|
||||
opts.set(
|
||||
Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
|
||||
util.Arrays.asList(TablesExtension.create(), StrikethroughExtension.create())
|
||||
);
|
||||
|
||||
Parser.builder(opts).build()
|
||||
}
|
||||
|
@ -55,5 +55,4 @@ trait FileChecks {
|
||||
def commandExists(cmd: String): Boolean =
|
||||
Runtime.getRuntime.exec(Array("which", cmd)).waitFor() == 0
|
||||
|
||||
|
||||
}
|
||||
|
@ -103,5 +103,4 @@ object ExternConvTest extends SimpleTestSuite with FileChecks {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user