Merge branch 'master' into update/poi-4.1.2

This commit is contained in:
Scala Steward 2020-02-27 22:12:22 +01:00
commit ea4c06870d
No known key found for this signature in database
GPG Key ID: 96BDF10FFAB8B6A6
91 changed files with 630 additions and 541 deletions

View File

@ -23,4 +23,4 @@ before_script:
- export TZ=Europe/Berlin
script:
- sbt ++$TRAVIS_SCALA_VERSION ";project root ;make"
- sbt ++$TRAVIS_SCALA_VERSION ";project root ;make ;test"

View File

@ -12,13 +12,13 @@ object Contact {
def annotate(text: String): Vector[NerLabel] =
TextSplitter
.splitToken[Nothing](text, " \t\r\n".toSet)
.map({ token =>
.map { token =>
if (isEmailAddress(token.value))
NerLabel(token.value, NerTag.Email, token.begin, token.end).some
else if (isWebsite(token.value))
NerLabel(token.value, NerTag.Website, token.begin, token.end).some
else None
})
}
.flatMap(_.map(Stream.emit).getOrElse(Stream.empty))
.toVector

View File

@ -11,7 +11,14 @@ import docspell.store.queries.{QAttachment, QItem}
import OItem.{AttachmentData, AttachmentSourceData, ItemData, ListItem, Query}
import bitpeace.{FileMeta, RangeDef}
import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp}
import docspell.store.records.{RAttachment, RAttachmentMeta, RAttachmentSource, RItem, RSource, RTagItem}
import docspell.store.records.{
RAttachment,
RAttachmentMeta,
RAttachmentSource,
RItem,
RSource,
RTagItem
}
trait OItem[F[_]] {
@ -80,8 +87,11 @@ object OItem {
val fileId = ra.fileId
}
case class AttachmentSourceData[F[_]](rs: RAttachmentSource, meta: FileMeta, data: Stream[F, Byte])
extends BinaryData[F] {
case class AttachmentSourceData[F[_]](
rs: RAttachmentSource,
meta: FileMeta,
data: Stream[F, Byte]
) extends BinaryData[F] {
val name = rs.name
val fileId = rs.fileId
}
@ -131,7 +141,11 @@ object OItem {
private def makeBinaryData[A](fileId: Ident)(f: FileMeta => A): F[Option[A]] =
store.bitpeace
.get(fileId.id).unNoneTerminate.compile.last.map(
.get(fileId.id)
.unNoneTerminate
.compile
.last
.map(
_.map(m => f(m))
)

View File

@ -51,7 +51,8 @@ object OSignup {
res <- if (ok) addUser(data).map(SignupResult.fromAddResult)
else SignupResult.invalidInvitationKey.pure[F]
_ <- if (retryInvite(res))
logger.fdebug(s"Adding account failed ($res). Allow retry with invite.") *> store
logger
.fdebug(s"Adding account failed ($res). Allow retry with invite.") *> store
.transact(
RInvitation.insert(RInvitation(inv, now))
)

View File

@ -26,9 +26,7 @@ object AccountId {
invalid
}
val separated = sepearatorChars.foldRight(invalid) { (c, v) =>
v.orElse(parse0(c))
}
val separated = sepearatorChars.foldRight(invalid)((c, v) => v.orElse(parse0(c)))
separated.orElse(Ident.fromString(str).map(id => AccountId(id, id)))
}

View File

@ -1,8 +1,6 @@
package docspell.common
sealed trait DataType {
}
sealed trait DataType {}
object DataType {
@ -10,7 +8,6 @@ object DataType {
case class Hint(hint: MimeTypeHint) extends DataType
def apply(mt: MimeType): DataType =
Exact(mt)

View File

@ -65,11 +65,13 @@ object File {
javaList.asScala.toList.sortBy(_.getFileName.toString)
}
def readAll[F[_]: Sync: ContextShift](file: Path, blocker: Blocker, chunkSize: Int): Stream[F, Byte] =
def readAll[F[_]: Sync: ContextShift](
file: Path,
blocker: Blocker,
chunkSize: Int
): Stream[F, Byte] =
fs2.io.file.readAll(file, blocker, chunkSize)
def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] =
readAll[F](file, blocker, 8192).
through(fs2.text.utf8Decode).
compile.foldMonoid
readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
}

View File

@ -66,9 +66,7 @@ case class LenientUri(
)
def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] =
readURL[F](chunkSize, blocker).
through(fs2.text.utf8Decode).
compile.foldMonoid
readURL[F](chunkSize, blocker).through(fs2.text.utf8Decode).compile.foldMonoid
def host: Option[String] =
authority.map(a =>

View File

@ -17,7 +17,6 @@ trait Logger[F[_]] {
object Logger {
def log4s[F[_]: Sync](log: Log4sLogger): Logger[F] = new Logger[F] {
def trace(msg: => String): F[Unit] =
log.ftrace(msg)

View File

@ -66,9 +66,7 @@ object MetaProposalList {
case None => map.updated(mp.proposalType, mp)
}
val merged = ml.foldLeft(init) { (map, el) =>
el.proposals.foldLeft(map)(updateMap)
}
val merged = ml.foldLeft(init)((map, el) => el.proposals.foldLeft(map)(updateMap))
fromMap(merged)
}

View File

@ -23,7 +23,8 @@ object SystemCommand {
repl.foldLeft(s) {
case (res, (k, v)) =>
res.replace(k, v)
})
}
)
def toCmd: List[String] =
program :: args.toList
@ -75,12 +76,18 @@ object SystemCommand {
else Stream.emit(r)
}
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path], logger: Logger[F], stdin: Stream[F, Byte])(
private def startProcess[F[_]: Sync, A](
cmd: Config,
wd: Option[Path],
logger: Logger[F],
stdin: Stream[F, Byte]
)(
f: Process => Stream[F, A]
): Stream[F, A] = {
val log = logger.debug(s"Running external command: ${cmd.cmdString}")
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
val proc = log *> hasStdin.flatMap(flag => Sync[F].delay {
val proc = log *> hasStdin.flatMap(flag =>
Sync[F].delay {
val pb = new ProcessBuilder(cmd.toCmd.asJava)
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
.redirectError(Redirect.PIPE)
@ -88,12 +95,11 @@ object SystemCommand {
wd.map(_.toFile).foreach(pb.directory)
pb.start()
})
}
)
Stream
.bracket(proc)(p =>
logger.debug(s"Closing process: `${cmd.cmdString}`").map { _ =>
p.destroy()
}
logger.debug(s"Closing process: `${cmd.cmdString}`").map(_ => p.destroy())
)
.flatMap(f)
}

View File

@ -13,7 +13,9 @@ import docspell.files.{ImageSize, TikaMimetype}
trait Conversion[F[_]] {
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A]
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
in: Stream[F, Byte]
): F[A]
}
@ -26,7 +28,9 @@ object Conversion {
): Resource[F, Conversion[F]] =
Resource.pure(new Conversion[F] {
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A] =
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(
in: Stream[F, Byte]
): F[A] =
TikaMimetype.resolve(dataType, in).flatMap {
case MimeType.pdf =>
handler.run(ConversionResult.successPdf(in))

View File

@ -3,9 +3,11 @@ package docspell.convert
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
import docspell.convert.flexmark.MarkdownConfig
case class ConvertConfig(chunkSize: Int,
case class ConvertConfig(
chunkSize: Int,
maxImageSize: Int,
markdown: MarkdownConfig,
wkhtmlpdf: WkHtmlPdfConfig,
tesseract: TesseractConfig,
unoconv: UnoconvConfig)
unoconv: UnoconvConfig
)

View File

@ -20,7 +20,9 @@ private[extern] object ExternConv {
logger: Logger[F],
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
Stream
.resource(File.withTempDir[F](wd, s"docspell-$name"))
.flatMap { dir =>
val inFile = dir.resolve("infile").toAbsolutePath.normalize
val out = dir.resolve("out.pdf").toAbsolutePath.normalize
val sysCfg =
@ -40,12 +42,12 @@ private[extern] object ExternConv {
SystemCommand
.execSuccess[F](sysCfg, blocker, logger, Some(dir), if (useStdin) in else Stream.empty)
.evalMap(result =>
logResult(name, result, logger).
flatMap(_ => reader(out, result)).
flatMap(handler.run)
logResult(name, result, logger).flatMap(_ => reader(out, result)).flatMap(handler.run)
)
}
}.compile.lastOrError
}
.compile
.lastOrError
def readResult[F[_]: Sync: ContextShift](
blocker: Blocker,
@ -60,9 +62,11 @@ private[extern] object ExternConv {
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
case false =>
ConversionResult.failure[F](
ConversionResult
.failure[F](
new Exception(s"Command result=${result.rc}. No output file found.")
).pure[F]
)
.pure[F]
}
def readResultTesseract[F[_]: Sync: ContextShift](
@ -75,7 +79,7 @@ private[extern] object ExternConv {
File.existsNonEmpty[F](outPdf).flatMap {
case true =>
val outTxt = out.resolveSibling(s"$outPrefix.txt")
File.exists(outTxt).flatMap(txtExists => {
File.exists(outTxt).flatMap { txtExists =>
val pdfData = File.readAll(out, blocker, chunkSize)
if (result.rc == 0) {
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F]
@ -84,12 +88,14 @@ private[extern] object ExternConv {
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
successPdf(pdfData).pure[F]
}
})
}
case false =>
ConversionResult.failure[F](
ConversionResult
.failure[F](
new Exception(s"Command result=${result.rc}. No output file found.")
).pure[F]
)
.pure[F]
}
}

View File

@ -21,7 +21,15 @@ object Tesseract {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
ExternConv.toPDF[F, A]("tesseract", cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
ExternConv.toPDF[F, A](
"tesseract",
cfg.command.replace(Map("{{lang}}" -> lang.iso3)),
cfg.workingDir,
false,
blocker,
logger,
reader
)(in, handler)
}
}

View File

@ -4,4 +4,4 @@ import java.nio.file.Path
import docspell.common.SystemCommand
case class TesseractConfig (command: SystemCommand.Config, workingDir: Path)
case class TesseractConfig(command: SystemCommand.Config, workingDir: Path)

View File

@ -19,7 +19,10 @@ object Unoconv {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger)
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(in, handler)
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(
in,
handler
)
}
}

View File

@ -4,4 +4,4 @@ import java.nio.file.Path
import docspell.common.SystemCommand
case class UnoconvConfig (command: SystemCommand.Config, workingDir: Path)
case class UnoconvConfig(command: SystemCommand.Config, workingDir: Path)

View File

@ -14,12 +14,16 @@ object WkHtmlPdf {
cfg: WkHtmlPdfConfig,
chunkSize: Int,
blocker: Blocker,
logger: Logger[F],
logger: Logger[F]
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
ExternConv.readResult[F](blocker, chunkSize, logger)
ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(in, handler)
ExternConv
.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(
in,
handler
)
}
}

View File

@ -4,4 +4,4 @@ import java.nio.file.Path
import docspell.common.SystemCommand
case class WkHtmlPdfConfig (command: SystemCommand.Config, workingDir: Path)
case class WkHtmlPdfConfig(command: SystemCommand.Config, workingDir: Path)

View File

@ -27,7 +27,6 @@ object Markdown {
}.toEither
}
def toHtml(md: String, cfg: MarkdownConfig): String = {
val p = createParser()
val r = createRenderer()
@ -36,10 +35,9 @@ object Markdown {
}
def toHtml[F[_]: Sync](data: Stream[F, Byte], cfg: MarkdownConfig): F[String] =
data.through(fs2.text.utf8Decode).compile.foldMonoid.
map(str => toHtml(str, cfg))
data.through(fs2.text.utf8Decode).compile.foldMonoid.map(str => toHtml(str, cfg))
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
private def wrapHtml(body: String, cfg: MarkdownConfig): String =
s"""<!DOCTYPE html>
|<html>
|<head>
@ -53,13 +51,13 @@ object Markdown {
|</body>
|</html>
|""".stripMargin
}
private def createParser(): Parser = {
val opts = new MutableDataSet()
opts.set(Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
util.Arrays.asList(TablesExtension.create(),
StrikethroughExtension.create()));
opts.set(
Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
util.Arrays.asList(TablesExtension.create(), StrikethroughExtension.create())
);
Parser.builder(opts).build()
}

View File

@ -55,5 +55,4 @@ trait FileChecks {
def commandExists(cmd: String): Boolean =
Runtime.getRuntime.exec(Array("which", cmd)).waitFor() == 0
}

View File

@ -103,5 +103,4 @@ object ExternConvTest extends SimpleTestSuite with FileChecks {
}
}
}

View File

@ -29,7 +29,7 @@ object Extraction {
data: Stream[F, Byte],
dataType: DataType,
lang: Language
): F[ExtractResult] = {
): F[ExtractResult] =
TikaMimetype.resolve(dataType, data).flatMap {
case MimeType.pdf =>
PdfExtract
@ -50,16 +50,23 @@ object Extraction {
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
.compile
.lastOrError
.map(_.trim)
.attempt
.map(ExtractResult.fromEither)
ImageSize.get(data).flatMap {
case Some(dim) =>
if (dim.product > cfg.ocr.maxImageSize) {
logger.info(s"Image size (${dim.product}) is too large (max ${cfg.ocr.maxImageSize}).") *>
ExtractResult.failure(new Exception(
s"Image size (${dim.width}x${dim.height}) is too large (max ${cfg.ocr.maxImageSize}).")
).pure[F]
logger.info(
s"Image size (${dim.product}) is too large (max ${cfg.ocr.maxImageSize})."
) *>
ExtractResult
.failure(
new Exception(
s"Image size (${dim.width}x${dim.height}) is too large (max ${cfg.ocr.maxImageSize})."
)
)
.pure[F]
} else {
doExtract
}
@ -69,10 +76,11 @@ object Extraction {
}
case OdfType.container =>
logger.info(s"File detected as ${OdfType.container}. Try to read as OpenDocument file.") *>
logger
.info(s"File detected as ${OdfType.container}. Try to read as OpenDocument file.") *>
OdfExtract.get(data).map(ExtractResult.fromEither)
case mt@MimeType("text", sub) if !sub.contains("html") =>
case mt @ MimeType("text", sub) if !sub.contains("html") =>
logger.info(s"File detected as ${mt.asString}. Returning itself as text.") *>
data.through(fs2.text.utf8Decode).compile.last.map { txt =>
ExtractResult.success(txt.getOrElse("").trim)
@ -83,6 +91,5 @@ object Extraction {
}
}
}
}

View File

@ -1,3 +1,3 @@
package docspell.extract
case class PdfConfig (minTextLen: Int)
case class PdfConfig(minTextLen: Int)

View File

@ -33,7 +33,8 @@ object PdfExtract {
//maybe better: inspect the pdf and decide whether ocr or not
for {
pdfboxRes <- logger.debug("Trying to strip text from pdf using pdfbox.") *> PdfboxExtract.get[F](in)
pdfboxRes <- logger.debug("Trying to strip text from pdf using pdfbox.") *> PdfboxExtract
.get[F](in)
res <- pdfboxRes.fold(
ex =>
logger.info(

View File

@ -10,8 +10,7 @@ case class OcrConfig(
pageRange: OcrConfig.PageRange,
unpaper: OcrConfig.Unpaper,
tesseract: OcrConfig.Tesseract
) {
}
) {}
object OcrConfig {

View File

@ -17,8 +17,8 @@ object OdfExtract {
def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
def get(is: InputStream) = Try {
def get(is: InputStream) =
Try {
val handler = new BodyContentHandler()
val pctx = new ParseContext()
val meta = new Metadata()

View File

@ -14,9 +14,7 @@ import fs2.Stream
object PdfboxExtract {
def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
data.compile.to(Array).map { bytes =>
Using(PDDocument.load(bytes))(readText).toEither.flatten
}
data.compile.to(Array).map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten)
def get(is: InputStream): Either[Throwable, String] =
Using(PDDocument.load(is))(readText).toEither.flatten

View File

@ -52,25 +52,25 @@ object PoiExtract {
def getDocx(is: InputStream): Either[Throwable, String] =
Try {
val xt = new XWPFWordExtractor(new XWPFDocument(is))
xt.getText.trim
Option(xt.getText).map(_.trim).getOrElse("")
}.toEither
def getDoc(is: InputStream): Either[Throwable, String] =
Try {
val xt = new WordExtractor(is)
xt.getText.trim
Option(xt.getText).map(_.trim).getOrElse("")
}.toEither
def getXlsx(is: InputStream): Either[Throwable, String] =
Try {
val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
xt.getText.trim
Option(xt.getText).map(_.trim).getOrElse("")
}.toEither
def getXls(is: InputStream): Either[Throwable, String] =
Try {
val xt = new ExcelExtractor(new HSSFWorkbook(is))
xt.getText.trim
Option(xt.getText).map(_.trim).getOrElse("")
}.toEither
def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =

View File

@ -14,7 +14,8 @@ object OdfExtractTest extends SimpleTestSuite {
)
test("test extract from odt") {
files.foreach { case (file, len) =>
files.foreach {
case (file, len) =>
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
val str1 = OdfExtract.get(is).fold(throw _, identity)
assertEquals(str1.length, len)

View File

@ -29,12 +29,11 @@ object ImageSize {
/** Return the image size from its header without reading
* the whole image into memory.
*/
def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = {
data.take(768).compile.to(Array).map(ar => {
def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] =
data.take(768).compile.to(Array).map { ar =>
val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
if (iis == null) sys.error("no reader given for the array")
else getDimension(iis)
})
}
private def getDimension(in: ImageInputStream): Option[Dimension] =

View File

@ -52,8 +52,8 @@ object TikaMimetype {
def detect[F[_]: Sync](file: Path): F[MimeType] =
Sync[F].delay {
val hint = MimeTypeHint.filename(file.getFileName.toString)
Using(new BufferedInputStream(Files.newInputStream(file), 64))({ in =>
Using(new BufferedInputStream(Files.newInputStream(file), 64)) { in =>
convert(tika.detect(in, makeMetadata(hint)))
}).toEither
}.toEither
}.rethrow
}

View File

@ -10,5 +10,4 @@ trait ExampleFilesSupport {
case None => sys.error(s"Resource '$resource' not found")
}
}

View File

@ -8,15 +8,14 @@ import scala.concurrent.ExecutionContext
object Playing extends IOApp {
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
def run(args: List[String]): IO[ExitCode] = IO {
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker)
val x = for {
odsm1 <- TikaMimetype.detect(rtf,
MimeTypeHint.filename(ExampleFiles.examples_sample_rtf.path.segments.last))
odsm1 <- TikaMimetype
.detect(rtf, MimeTypeHint.filename(ExampleFiles.examples_sample_rtf.path.segments.last))
odsm2 <- TikaMimetype.detect(rtf, MimeTypeHint.none)
} yield (odsm1, odsm2)
println(x.unsafeRunSync())

View File

@ -68,7 +68,9 @@ object ConvertPdf {
.through(ctx.store.bitpeace.fetchData2(RangeDef.all))
val handler = conversionHandler[F](ctx, cfg, ra, item)
ctx.logger.info(s"Converting file ${ra.name} (${mime.asString}) into a PDF") *>
conv.toPDF(DataType(MimeType(mime.primary, mime.sub)), ctx.args.meta.language, handler)(data)
conv.toPDF(DataType(MimeType(mime.primary, mime.sub)), ctx.args.meta.language, handler)(
data
)
}
}
@ -119,7 +121,9 @@ object ConvertPdf {
.compile
.lastOrError
.map(fm => Ident.unsafe(fm.id))
.flatMap(fmId => ctx.store.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName)).map(_ => fmId))
.flatMap(fmId =>
ctx.store.transact(RAttachment.updateFileIdAndName(ra.id, fmId, newName)).map(_ => fmId)
)
.map(fmId => ra.copy(fileId = fmId, name = newName))
}
}

View File

@ -95,10 +95,10 @@ object FindProposal {
labels => self.find(labels).map(f)
def next(f: Finder[F])(implicit F: FlatMap[F], F3: Applicative[F]): Finder[F] =
flatMap({ ml0 =>
flatMap { ml0 =>
if (ml0.hasResultsAll) Finder.unit[F](ml0)
else f.map(ml1 => ml0.fillEmptyFrom(ml1))
})
}
def nextWhenEmpty(f: Finder[F], mt0: MetaProposalType, mts: MetaProposalType*)(
implicit F: FlatMap[F],

View File

@ -19,9 +19,7 @@ object ItemHandler {
.map(_ => ())
def itemStateTask[F[_]: Sync, A](state: ItemState)(data: ItemData): Task[F, A, ItemData] =
Task { ctx =>
ctx.store.transact(RItem.updateState(data.item.id, state)).map(_ => data)
}
Task(ctx => ctx.store.transact(RItem.updateState(data.item.id, state)).map(_ => data))
def isLastRetry[F[_]: Sync, A](ctx: Context[F, A]): F[Boolean] =
for {

View File

@ -11,9 +11,7 @@ object TestTasks {
private[this] val logger = getLogger
def success[F[_]]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>
ctx.logger.info(s"Running task now: ${ctx.args}")
}
Task(ctx => ctx.logger.info(s"Running task now: ${ctx.args}"))
def failing[F[_]: Sync]: Task[F, ProcessItemArgs, Unit] =
Task { ctx =>

View File

@ -76,16 +76,15 @@ object TextExtraction {
.getOrElse(Mimetype.`application/octet-stream`)
findMime
.flatMap(mt =>
extr.extractText(data, DataType(MimeType(mt.primary, mt.sub)), lang))
.flatMap(mt => extr.extractText(data, DataType(MimeType(mt.primary, mt.sub)), lang))
}
private def extractTextFallback[F[_]: Sync: ContextShift](
ctx: Context[F, _],
cfg: ExtractConfig,
ra: RAttachment,
lang: Language,
)(fileIds: List[Ident]): F[Option[String]] = {
lang: Language
)(fileIds: List[Ident]): F[Option[String]] =
fileIds match {
case Nil =>
ctx.logger.error(s"Cannot extract text").map(_ => None)
@ -99,15 +98,18 @@ object TextExtraction {
txt.some.pure[F]
case ExtractResult.UnsupportedFormat(mt) =>
ctx.logger.warn(s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file.").
flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
ctx.logger
.warn(
s"Cannot extract text from file ${stripAttachmentName(ra)}: unsupported format ${mt.asString}. Try with converted file."
)
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
case ExtractResult.Failure(ex) =>
ctx.logger.warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file").
flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
ctx.logger
.warn(s"Cannot extract text: ${ex.getMessage}. Try with converted file")
.flatMap(_ => extractTextFallback[F](ctx, cfg, ra, lang)(rest))
})
}
}
/** Returns the fileIds to extract text from. First, the source file
* is tried. If that fails, the converted file is tried.

View File

@ -128,6 +128,9 @@ Please see the `nix/module-server.nix` and `nix/module-joex.nix` files
for the set of options. The nixos options are modelled after the
default configuration file.
The modules files are only applicable to the newest version of
Docspell. If you really need an older version, checkout the
appropriate commit.
## NixOs Example

View File

@ -9,7 +9,8 @@ title: Features and Limitations
- Multiple users per account
- Handle multiple documents as one unit
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
- Conversion to PDF: all files are converted into a PDF file
- Conversion to PDF: all files are converted into a PDF file, while
the original file is preserved
- Text is analysed to find and attach meta data automatically
- Manage document processing (cancel jobs, set priorities)
- Everything available via a documented [REST Api](api)

View File

@ -84,7 +84,7 @@ trait Conversions {
data.inReplyTo.map(mkIdName),
data.item.dueDate,
data.item.notes,
data.attachments.map((mkAttachment(data)_).tupled).toList,
data.attachments.map((mkAttachment(data) _).tupled).toList,
data.sources.map((mkAttachmentSource _).tupled).toList,
data.tags.map(mkTag).toList
)
@ -204,7 +204,8 @@ trait Conversions {
val files = mp.parts
.filter(p => p.name.forall(s => !s.equalsIgnoreCase("meta")))
.map(p => OUpload.File(p.filename, p.headers.get(`Content-Type`).map(fromContentType), p.body)
.map(p =>
OUpload.File(p.filename, p.headers.get(`Content-Type`).map(fromContentType), p.body)
)
for {
metaData <- meta

View File

@ -55,10 +55,10 @@ object AttachmentRoutes {
inm = req.headers.get(`If-None-Match`).flatMap(_.tags)
matches = matchETag(fileData.map(_.meta), inm)
resp <- fileData
.map({ data =>
.map { data =>
if (matches) withResponseHeaders(NotModified())(data)
else makeByteResp(data)
})
}
.getOrElse(NotFound(BasicResult(false, "Not found")))
} yield resp
@ -76,10 +76,10 @@ object AttachmentRoutes {
inm = req.headers.get(`If-None-Match`).flatMap(_.tags)
matches = matchETag(fileData.map(_.meta), inm)
resp <- fileData
.map({ data =>
.map { data =>
if (matches) withResponseHeaders(NotModified())(data)
else makeByteResp(data)
})
}
.getOrElse(NotFound(BasicResult(false, "Not found")))
} yield resp

View File

@ -14,10 +14,15 @@ object QAttachment {
def deleteById[F[_]: Sync](store: Store[F])(attachId: Ident, coll: Ident): F[Int] =
for {
raFile <- store.transact(RAttachment.findByIdAndCollective(attachId, coll)).map(_.map(_.fileId))
rsFile <- store.transact(RAttachmentSource.findByIdAndCollective(attachId, coll)).map(_.map(_.fileId))
raFile <- store
.transact(RAttachment.findByIdAndCollective(attachId, coll))
.map(_.map(_.fileId))
rsFile <- store
.transact(RAttachmentSource.findByIdAndCollective(attachId, coll))
.map(_.map(_.fileId))
n <- store.transact(RAttachment.delete(attachId))
f <- Stream.emits(raFile.toSeq ++ rsFile.toSeq)
f <- Stream
.emits(raFile.toSeq ++ rsFile.toSeq)
.map(_.id)
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
@ -29,10 +34,12 @@ object QAttachment {
for {
s <- store.transact(RAttachmentSource.findById(ra.id))
n <- store.transact(RAttachment.delete(ra.id))
f <- Stream.emits(ra.fileId.id +: s.map(_.fileId.id).toSeq).
flatMap(store.bitpeace.delete).
map(flag => if (flag) 1 else 0).
compile.foldMonoid
f <- Stream
.emits(ra.fileId.id +: s.map(_.fileId.id).toSeq)
.flatMap(store.bitpeace.delete)
.map(flag => if (flag) 1 else 0)
.compile
.foldMonoid
} yield n + f
def deleteItemAttachments[F[_]: Sync](store: Store[F])(itemId: Ident, coll: Ident): F[Int] =

View File

@ -27,7 +27,6 @@ object QCollective {
and(IC.cid.is(coll), IC.incoming.is(Direction.outgoing))
).query[Int].unique
val fileSize = sql"""
select sum(length) from (
with attachs as
@ -42,7 +41,6 @@ object QCollective {
inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs)
) as t""".query[Option[Long]].unique
val q3 = fr"SELECT" ++ commas(
TC.name.prefix("t").f,
fr"count(" ++ RC.itemId.prefix("r").f ++ fr")"

View File

@ -39,7 +39,8 @@ object QItem {
val EC = REquipment.Columns.all.map(_.prefix("e"))
val ICC = List(RItem.Columns.id, RItem.Columns.name).map(_.prefix("ref"))
val cq = selectSimple(IC ++ OC ++ P0C ++ P1C ++ EC ++ ICC, RItem.table ++ fr"i", Fragment.empty) ++
val cq =
selectSimple(IC ++ OC ++ P0C ++ P1C ++ EC ++ ICC, RItem.table ++ fr"i", Fragment.empty) ++
fr"LEFT JOIN" ++ ROrganization.table ++ fr"o ON" ++ RItem.Columns.corrOrg
.prefix("i")
.is(ROrganization.Columns.oid.prefix("o")) ++
@ -235,7 +236,8 @@ object QItem {
def findByFileIds(fileMetaIds: List[Ident]): ConnectionIO[Vector[RItem]] = {
val IC = RItem.Columns
val AC = RAttachment.Columns
val q = fr"SELECT DISTINCT" ++ commas(IC.all.map(_.prefix("i").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
val q =
fr"SELECT DISTINCT" ++ commas(IC.all.map(_.prefix("i").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++
fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.itemId
.prefix("a")
.is(IC.id.prefix("i")) ++

View File

@ -21,11 +21,11 @@ object QJob {
Stream
.range(0, 10)
.evalMap(n => takeNextJob1(store)(priority, worker, retryPause, n))
.evalTap({ x =>
.evalTap { x =>
if (x.isLeft)
logger.fdebug[F]("Cannot mark job, probably due to concurrent updates. Will retry.")
else ().pure[F]
})
}
.find(_.isRight)
.flatMap({
case Right(job) =>
@ -97,7 +97,8 @@ object QJob {
val sql2 = fr"SELECT min(" ++ jgroup.f ++ fr") as g FROM" ++ RJob.table ++ fr"a" ++
fr"WHERE" ++ stateCond
val union = sql"SELECT g FROM ((" ++ sql1 ++ sql") UNION ALL (" ++ sql2 ++ sql")) as t0 WHERE g is not null"
val union =
sql"SELECT g FROM ((" ++ sql1 ++ sql") UNION ALL (" ++ sql2 ++ sql")) as t0 WHERE g is not null"
union
.query[Ident]

View File

@ -34,11 +34,11 @@ object JobQueue {
def insert(job: RJob): F[Unit] =
store
.transact(RJob.insert(job))
.flatMap({ n =>
.flatMap { n =>
if (n != 1)
Effect[F].raiseError(new Exception(s"Inserting job failed. Update count: $n"))
else ().pure[F]
})
}
def insertAll(jobs: Seq[RJob]): F[Unit] =
jobs.toList

View File

@ -104,7 +104,8 @@ object RAttachment {
def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachment, FileMeta)]] = {
import bitpeace.sql._
val q = fr"SELECT a.*,m.* FROM" ++ table ++ fr"a, filemeta m WHERE a.filemetaid = m.id AND a.itemid = $id ORDER BY a.position ASC"
val q =
fr"SELECT a.*,m.* FROM" ++ table ++ fr"a, filemeta m WHERE a.filemetaid = m.id AND a.itemid = $id ORDER BY a.position ASC"
q.query[(RAttachment, FileMeta)].to[Vector]
}

View File

@ -38,14 +38,16 @@ object RAttachmentSource {
def insert(v: RAttachmentSource): ConnectionIO[Int] =
insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run
def findById(attachId: Ident): ConnectionIO[Option[RAttachmentSource]] =
selectSimple(all, table, id.is(attachId)).query[RAttachmentSource].option
def delete(attachId: Ident): ConnectionIO[Int] =
deleteFrom(table, id.is(attachId)).update.run
def findByIdAndCollective(attachId: Ident, collective: Ident): ConnectionIO[Option[RAttachmentSource]] = {
def findByIdAndCollective(
attachId: Ident,
collective: Ident
): ConnectionIO[Option[RAttachmentSource]] = {
val bId = RAttachment.Columns.id.prefix("b")
val aId = Columns.id.prefix("a")
val bItem = RAttachment.Columns.itemId.prefix("b")
@ -77,8 +79,9 @@ object RAttachmentSource {
RAttachment.table ++ fr"b ON" ++ aId.is(bId)
val where = bItem.is(id)
(selectSimple(cols, from, where) ++ orderBy(bPos.asc)).
query[(RAttachmentSource, FileMeta)].to[Vector]
(selectSimple(cols, from, where) ++ orderBy(bPos.asc))
.query[(RAttachmentSource, FileMeta)]
.to[Vector]
}
}

View File

@ -407,6 +407,20 @@ update key flags next msg model =
)
m4
( m6, c6 ) =
update key
flags
next
(ConcEquipMsg
(Comp.Dropdown.SetSelection
(item.concEquipment
|> Maybe.map List.singleton
|> Maybe.withDefault []
)
)
)
m5
proposalCmd =
if item.state == "created" then
Api.getItemProposals flags item.id GetProposalResp
@ -414,7 +428,7 @@ update key flags next msg model =
else
Cmd.none
in
( { m5
( { m6
| item = item
, nameModel = item.name
, notesModel = item.notes
@ -428,6 +442,7 @@ update key flags next msg model =
, c3
, c4
, c5
, c6
, getOptions flags
, proposalCmd
, Api.getSentMails flags item.id SentMailsResp

View File

@ -21,8 +21,8 @@ object Dependencies {
val LogbackVersion = "1.2.3"
val MariaDbVersion = "2.5.4"
val MiniTestVersion = "2.7.0"
val PdfboxVersion = "2.0.18"
val PoiVersion = "4.1.2"
val PdfboxVersion = "2.0.19"
val PoiVersion = "4.1.1"
val PostgresVersion = "42.2.10"
val PureConfigVersion = "0.12.2"
val Slf4jVersion = "1.7.30"

View File

@ -15,6 +15,9 @@
# url.2=...
#
# Lines starting with a `#' are ignored.
#
# The `-e|--exists' option allows to skip uploading and only check
# whether a given file exists in docspell.
# saner programming env: these switches turn some bugs into errors
set -o errexit -o pipefail -o noclobber -o nounset
@ -30,8 +33,8 @@ if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
exit 1
fi
OPTIONS=c:hsd
LONGOPTS=config:,help,skip,delete
OPTIONS=c:hsde
LONGOPTS=config:,help,skip,delete,exists
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
@ -43,7 +46,7 @@ fi
# read getopts output this way to handle the quoting right:
eval set -- "$PARSED"
delete=n help=n config="${XDG_CONFIG_HOME:-$HOME/.config}/docspell/ds.conf"
exists=n delete=n help=n config="${XDG_CONFIG_HOME:-$HOME/.config}/docspell/ds.conf"
while true; do
case "$1" in
-h|--help)
@ -58,6 +61,10 @@ while true; do
delete="y"
shift
;;
-e|--exists)
exists=y
shift
;;
--)
shift
break
@ -121,9 +128,10 @@ showUsage() {
info " -c | --config Provide a config file. (value: $config)"
info " -d | --delete Delete the files when successfully uploaded (value: $delete)"
info " -h | --help Prints this help text. (value: $help)"
info " -e | --exists Checks for the existence of a file instead of uploading (value: $exists)"
info ""
info "Arguments:"
info " One or more PDF files to upload."
info " One or more files to check for existence or upload."
info ""
}
@ -153,6 +161,13 @@ done <<< $($GREP_CMD -v '^#.*' "$config")
IFS=$'\n'
for file in $*; do
for url in "${urls[@]}"; do
if [ "$exists" = "y" ]; then
if checkFile "$url" "$file"; then
info "$url $file: true"
else
info "$url $file: false"
fi
else
info "Uploading '$file' to '$url'"
set +e
upload "$file" "$url"
@ -161,5 +176,6 @@ for file in $*; do
info "Deleting file: $file"
rm -f "$file"
fi
fi
done
done