Updated following dependencies as they need changes to the code to work properly:

- Scala
- fs2
- http4s
This commit is contained in:
Rehan Mahmood
2023-10-31 14:24:00 -04:00
parent c9ebd15b23
commit 2a39b2f6a6
64 changed files with 224 additions and 150 deletions

View File

@ -9,6 +9,7 @@ package docspell.extract
import cats.effect._
import cats.implicits._
import fs2.Stream
import fs2.io.file.Files
import docspell.common._
import docspell.extract.internal.Text
@ -32,7 +33,7 @@ trait Extraction[F[_]] {
object Extraction {
def create[F[_]: Async](
def create[F[_]: Async: Files](
logger: Logger[F],
cfg: ExtractConfig
): Extraction[F] =

View File

@ -9,6 +9,7 @@ package docspell.extract
import cats.effect._
import cats.implicits._
import fs2.Stream
import fs2.io.file.Files
import docspell.common.Language
import docspell.extract.internal.Text
@ -24,7 +25,7 @@ object PdfExtract {
Result(t._1, t._2)
}
def get[F[_]: Async](
def get[F[_]: Async: Files](
in: Stream[F, Byte],
lang: Language,
stripMinLen: Int,

View File

@ -8,7 +8,7 @@ package docspell.extract.ocr
import cats.effect._
import fs2.Stream
import fs2.io.file.Path
import fs2.io.file.{Files, Path}
import docspell.common._
import docspell.common.util.File
@ -17,7 +17,7 @@ import docspell.logging.Logger
object Ocr {
/** Extract the text of all pages in the given pdf file. */
def extractPdf[F[_]: Async](
def extractPdf[F[_]: Async: Files](
pdf: Stream[F, Byte],
logger: Logger[F],
lang: String,
@ -40,7 +40,7 @@ object Ocr {
): Stream[F, String] =
runTesseractStdin(img, logger, lang, config)
def extractPdFFile[F[_]: Async](
def extractPdFFile[F[_]: Async: Files](
pdf: Path,
logger: Logger[F],
lang: String,
@ -65,7 +65,7 @@ object Ocr {
/** Run ghostscript to extract all pdf pages into tiff files. The files are stored to a
* temporary location on disk and returned.
*/
private[extract] def runGhostscript[F[_]: Async](
private[extract] def runGhostscript[F[_]: Async: Files](
pdf: Stream[F, Byte],
cfg: OcrConfig,
wd: Path,
@ -91,7 +91,7 @@ object Ocr {
/** Run ghostscript to extract all pdf pages into tiff files. The files are stored to a
* temporary location on disk and returned.
*/
private[extract] def runGhostscriptFile[F[_]: Async](
private[extract] def runGhostscriptFile[F[_]: Async: Files](
pdf: Path,
ghostscript: SystemCommand.Config,
wd: Path,

View File

@ -8,6 +8,7 @@ package docspell.extract.ocr
import cats.effect._
import fs2.Stream
import fs2.io.file.Files
import docspell.common._
import docspell.extract.internal.Text
@ -16,7 +17,7 @@ import docspell.logging.Logger
object TextExtract {
def extract[F[_]: Async](
def extract[F[_]: Async: Files](
in: Stream[F, Byte],
logger: Logger[F],
lang: String,
@ -24,7 +25,7 @@ object TextExtract {
): Stream[F, Text] =
extractOCR(in, logger, lang, config)
def extractOCR[F[_]: Async](
def extractOCR[F[_]: Async: Files](
in: Stream[F, Byte],
logger: Logger[F],
lang: String,