Merge pull request #2354 from eikek/build-updates

Build updates
This commit is contained in:
mergify[bot] 2023-11-05 23:19:05 +00:00 committed by GitHub
commit 667cd3b920
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 83 additions and 92 deletions

View File

@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
java: [ 'openjdk@1.11' ]
java: [ 'openjdk@1.17' ]
steps:
- uses: actions/checkout@v4.1.1
with:

View File

@ -10,8 +10,7 @@ val elmCompileMode = settingKey[ElmCompileMode]("How to compile elm sources")
val scalafixSettings = Seq(
semanticdbEnabled := true, // enable SemanticDB
semanticdbVersion := scalafixSemanticdb.revision, // "4.4.0"
ThisBuild / scalafixDependencies ++= Dependencies.organizeImports
semanticdbVersion := scalafixSemanticdb.revision // "4.4.0"
)
val sharedSettings = Seq(

View File

@ -19,7 +19,7 @@ import docspell.common.syntax.file._
import docspell.common.util.Zip
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.yaml.{parser => YamlParser}
import io.circe.yaml.{Parser => YamlParser}
import io.circe.{Decoder, Encoder}
import io.circe.{parser => JsonParser}
@ -161,7 +161,7 @@ object AddonMeta {
.rethrow
def fromYamlString(str: String): Either[Throwable, AddonMeta] =
YamlParser.parse(str).flatMap(_.as[AddonMeta])
YamlParser.default.parse(str).flatMap(_.as[AddonMeta])
def fromYamlBytes[F[_]: Sync](bytes: Stream[F, Byte]): F[AddonMeta] =
bytes
@ -171,12 +171,16 @@ object AddonMeta {
.map(fromYamlString)
.rethrow
def fromYamlFile[F[_]: Sync](file: Path): F[AddonMeta] =
Sync[F]
.blocking(YamlParser.parse(java.nio.file.Files.newBufferedReader(file.toNioPath)))
.rethrow
.map(_.as[AddonMeta])
def fromYamlFile[F[_]: Sync](file: Path): F[AddonMeta] = {
val reader: F[java.io.Reader] =
Sync[F].blocking(java.nio.file.Files.newBufferedReader(file.toNioPath))
reader
.flatMap(r =>
Sync[F]
.blocking(YamlParser.default.decode[AddonMeta](r))
)
.rethrow
}
def findInDirectory[F[_]: Sync: Files](dir: Path): F[AddonMeta] = {
val logger = docspell.logging.getLogger[F]

View File

@ -16,6 +16,7 @@ import docspell.logging.Logger
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException
import org.apache.pdfbox.{Loader => PdfboxLoader}
/** Using PDFBox, the incoming pdf is loaded while trying the given passwords. */
object RemovePdfEncryption {
@ -76,7 +77,7 @@ object RemovePdfEncryption {
}
private def load(bytes: Array[Byte], pw: Password): Option[PDDocument] =
try Option(PDDocument.load(bytes, pw.pass))
try Option(PdfboxLoader.loadPDF(bytes, pw.pass))
catch {
case _: InvalidPasswordException =>
None

View File

@ -22,8 +22,8 @@ import docspell.common.util.File
import docspell.convert.ConversionResult.Handler
import docspell.files.TikaMimetype
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException
import org.apache.pdfbox.{Loader => PdfboxLoader}
trait FileChecks {
@ -42,7 +42,7 @@ trait FileChecks {
isType(MimeType.text("plain"))
def isUnencryptedPDF: Boolean =
Try(PDDocument.load(p.toNioPath.toFile)).map(_.close()).isSuccess
Try(PdfboxLoader.loadPDF(p.toNioPath.toFile)).map(_.close()).isSuccess
}
implicit class ByteStreamOps(delegate: Stream[IO, Byte]) {
@ -58,14 +58,14 @@ trait FileChecks {
def isUnencryptedPDF: IO[Boolean] =
delegate.compile
.to(Array)
.map(PDDocument.load(_))
.map(PdfboxLoader.loadPDF)
.map(_.close())
.map(_ => true)
def isEncryptedPDF: IO[Boolean] =
delegate.compile
.to(Array)
.map(PDDocument.load(_))
.map(PdfboxLoader.loadPDF)
.attempt
.map(e =>
e.fold(

View File

@ -11,11 +11,12 @@ import cats.implicits._
import fs2.Stream
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.{Loader => PdfboxLoader}
object PdfLoader {
private def readBytes1[F[_]: Sync](bytes: Array[Byte]): F[PDDocument] =
Sync[F].delay(PDDocument.load(bytes))
Sync[F].delay(PdfboxLoader.loadPDF(bytes))
private def closePDDocument[F[_]: Sync](pd: PDDocument): F[Unit] =
Sync[F].delay(pd.close())

View File

@ -6,8 +6,6 @@
package docspell.extract.pdfbox
import java.io.InputStream
import scala.util.{Try, Using}
import cats.effect.Sync
@ -20,6 +18,7 @@ import docspell.extract.internal.Text
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.text.PDFTextStripper
import org.apache.pdfbox.{Loader => PdfboxLoader}
object PdfboxExtract {
@ -44,11 +43,8 @@ object PdfboxExtract {
.attempt
.map(_.flatten)
def getText(is: InputStream): Either[Throwable, Text] =
Using(PDDocument.load(is))(readText).toEither.flatten
def getText(inFile: Path): Either[Throwable, Text] =
Using(PDDocument.load(inFile.toNioPath.toFile))(readText).toEither.flatten
Using(PdfboxLoader.loadPDF(inFile.toNioPath.toFile))(readText).toEither.flatten
private def readText(doc: PDDocument): Either[Throwable, Text] =
Try {
@ -64,11 +60,8 @@ object PdfboxExtract {
.attempt
.map(_.flatten)
def getMetaData(is: InputStream): Either[Throwable, PdfMetaData] =
Using(PDDocument.load(is))(readMetaData).toEither.flatten
def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] =
Using(PDDocument.load(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
Using(PdfboxLoader.loadPDF(inFile.toNioPath.toFile))(readMetaData).toEither.flatten
private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] =
Try {
@ -83,7 +76,7 @@ object PdfboxExtract {
mkValue(info.getKeywords),
mkValue(info.getCreator),
Option(info.getCreationDate).map(c => Timestamp(c.toInstant)),
doc.getNumberOfPages()
doc.getNumberOfPages
)
}.toEither
}

View File

@ -21,16 +21,6 @@ class PdfboxExtractTest extends FunSuite with TestLoggingConfig {
ExampleFiles.letter_en_pdf -> TestFiles.letterENText
)
test("extract text from text PDFs by inputstream") {
textPDFs.foreach { case (file, txt) =>
val url = file.toJavaUrl.fold(sys.error, identity)
val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity)
val received = removeFormatting(str.value)
val expect = removeFormatting(txt)
assertEquals(received, expect)
}
}
test("extract text from text PDFs via Stream") {
textPDFs.foreach { case (file, txt) =>
val data = file.readURL[IO](8192)
@ -42,18 +32,18 @@ class PdfboxExtractTest extends FunSuite with TestLoggingConfig {
}
test("extract text from image PDFs") {
val url = ExampleFiles.scanner_pdf13_pdf.toJavaUrl.fold(sys.error, identity)
val pdfData = ExampleFiles.scanner_pdf13_pdf.readURL[IO](8192)
val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity)
val str = PdfboxExtract.getText(pdfData).unsafeRunSync().fold(throw _, identity)
assertEquals(str.value, "")
}
test("extract metadata from pdf") {
val url = ExampleFiles.keywords_pdf.toJavaUrl.fold(sys.error, identity)
val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity)
val pdfData = ExampleFiles.keywords_pdf.readURL[IO](8192)
val str = PdfboxExtract.getText(pdfData).unsafeRunSync().fold(throw _, identity)
assert(str.value.startsWith("Keywords in PDF"))
val md = PdfboxExtract.getMetaData(url.openStream()).fold(throw _, identity)
val md = PdfboxExtract.getMetaData(pdfData).unsafeRunSync().fold(throw _, identity)
assertEquals(md.author, Some("E.K."))
assertEquals(md.title, Some("Keywords in PDF"))
assertEquals(md.subject, Some("This is a subject"))

View File

@ -12,7 +12,7 @@ import io.circe.syntax._
import io.circe.{Encoder, Json}
import perfolation._
import scribe.LogRecord
import scribe.data.MDC
import scribe.mdc.MDC
import scribe.message.Message
// From: https://github.com/outr/scribe/blob/8e99521e1ee1f0c421629764dd96e4eb193d84bd/json/shared/src/main/scala/scribe/json/JsonWriter.scala

View File

@ -12,7 +12,7 @@ import cats.effect.Sync
import docspell.logging.{Level, LogEvent, Logger}
import scribe.LoggerSupport
import scribe.data.{MDC, MDCMap}
import scribe.mdc.{MDC, MDCMap}
import scribe.message.LoggableMessage
private[logging] object ScribeWrapper {
@ -44,10 +44,13 @@ private[logging] object ScribeWrapper {
private[this] def convert(ev: LogEvent) = {
val level = convertLevel(ev.level)
val additional: List[LoggableMessage] = ev.additional.map {
case Right(ex) => LoggableMessage.throwable2Message(ex)
case Left(msg) => LoggableMessage.string2Message(msg)
}.toList
val additional: List[LoggableMessage] = ev.additional
.map {
case Right(ex) => LoggableMessage.throwableList2Messages(List(ex))
case Left(msg) => LoggableMessage.stringList2Messages(List(msg))
}
.toList
.flatten
LoggerSupport(
level,
ev.msg() :: additional,

View File

@ -32,7 +32,7 @@ object SignatureAlgo {
case object ES256 extends SignatureAlgo
case object ES384 extends SignatureAlgo
case object ES512 extends SignatureAlgo
case object Ed25519 extends SignatureAlgo
case object EdDSA extends SignatureAlgo
case object HMD5 extends SignatureAlgo
case object HS224 extends SignatureAlgo
@ -48,7 +48,7 @@ object SignatureAlgo {
ES256,
ES384,
ES512,
Ed25519,
EdDSA,
HMD5,
HS224,
HS256,
@ -58,19 +58,19 @@ object SignatureAlgo {
def fromString(str: String): Either[String, SignatureAlgo] =
str.toUpperCase() match {
case "RS256" => Right(RS256)
case "RS384" => Right(RS384)
case "RS512" => Right(RS512)
case "ES256" => Right(ES256)
case "ES384" => Right(ES384)
case "ES512" => Right(ES512)
case "ED25519" => Right(Ed25519)
case "HMD5" => Right(HMD5)
case "HS224" => Right(HS224)
case "HS256" => Right(HS256)
case "HS384" => Right(HS384)
case "HS512" => Right(HS512)
case _ => Left(s"Unknown signature algo: $str")
case "RS256" => Right(RS256)
case "RS384" => Right(RS384)
case "RS512" => Right(RS512)
case "ES256" => Right(ES256)
case "ES384" => Right(ES384)
case "ES512" => Right(ES512)
case "EDDSA" => Right(EdDSA)
case "HMD5" => Right(HMD5)
case "HS224" => Right(HS224)
case "HS256" => Right(HS256)
case "HS384" => Right(HS384)
case "HS512" => Right(HS512)
case _ => Left(s"Unknown signature algo: $str")
}
def unsafeFromString(str: String): SignatureAlgo =
@ -127,11 +127,11 @@ object SignatureAlgo {
.toEither
} yield Jwt.create(decoded)
case Ed25519 =>
case EdDSA =>
for {
pubKey <- createPublicKey(sigKey, "EdDSA")
decoded <- JwtCirce
.decodeJsonAll(token, pubKey, Seq(JwtAlgorithm.Ed25519))
.decodeJsonAll(token, pubKey, Seq(JwtAlgorithm.EdDSA))
.toEither
} yield Jwt.create(decoded)

View File

@ -11,6 +11,7 @@ import scala.concurrent.ExecutionContext
import cats.effect._
import cats.~>
import fs2._
import fs2.io.file.Files
import docspell.store.file.{FileRepository, FileRepositoryConfig}
import docspell.store.impl.StoreImpl
@ -42,7 +43,7 @@ trait Store[F[_]] {
object Store {
def create[F[_]: Async](
def create[F[_]: Async: Files](
jdbc: JdbcConfig,
schemaCfg: SchemaMigrateConfig,
fileRepoConfig: FileRepositoryConfig,

View File

@ -9,7 +9,7 @@ package docspell.store.file
import javax.sql.DataSource
import cats.effect._
import fs2.io.file.Path
import fs2.io.file.{Files, Path}
import docspell.common._
import docspell.files.TikaMimetype
@ -97,7 +97,7 @@ object BinnyUtils {
PathMapping(toPath)(toId)
}
def binaryStore[F[_]: Async](
def binaryStore[F[_]: Async: Files](
cfg: FileRepositoryConfig,
ds: DataSource,
logger: Logger[F]

View File

@ -10,6 +10,7 @@ import javax.sql.DataSource
import cats.effect._
import fs2._
import fs2.io.file.Files
import docspell.common._
@ -34,7 +35,7 @@ trait FileRepository[F[_]] {
object FileRepository {
def apply[F[_]: Async](
def apply[F[_]: Async: Files](
xa: Transactor[F],
ds: DataSource,
cfg: FileRepositoryConfig,

View File

@ -12,6 +12,7 @@ import cats.arrow.FunctionK
import cats.effect.Async
import cats.implicits._
import cats.~>
import fs2.io.file.Files
import docspell.store._
import docspell.store.file.{FileRepository, FileRepositoryConfig}
@ -20,7 +21,7 @@ import docspell.store.migrate.FlywayMigrate
import doobie._
import doobie.implicits._
final class StoreImpl[F[_]: Async](
final class StoreImpl[F[_]: Async: Files](
val fileRepo: FileRepository[F],
jdbc: JdbcConfig,
schemaCfg: SchemaMigrateConfig,

View File

@ -75,7 +75,7 @@ in
pkgs.jq
pkgs.inetutils
pkgs.htop
pkgs.openjdk
pkgs.jdk17
];

View File

@ -34,7 +34,7 @@
pname = "docspell-server";
src = fetchzip current_version.server;
buildInputs = [ jdk11 ];
buildInputs = [ jdk17 ];
buildPhase = "true";
installPhase = ''
@ -42,7 +42,7 @@
cp -R * $out/docspell-restserver-${version}/
cat > $out/bin/docspell-restserver <<-EOF
#!${bash}/bin/bash
$out/docspell-restserver-${version}/bin/docspell-restserver -java-home ${jdk11} "\$@"
$out/docspell-restserver-${version}/bin/docspell-restserver -java-home ${jdk17} "\$@"
EOF
chmod 755 $out/bin/docspell-restserver
'';
@ -54,7 +54,7 @@
src = fetchzip current_version.joex;
buildInputs = [ jdk11 ];
buildInputs = [ jdk17 ];
buildPhase = "true";
@ -63,7 +63,7 @@
cp -R * $out/docspell-joex-${version}/
cat > $out/bin/docspell-joex <<-EOF
#!${bash}/bin/bash
$out/docspell-joex-${version}/bin/docspell-joex -java-home ${jdk11} "\$@"
$out/docspell-joex-${version}/bin/docspell-joex -java-home ${jdk17} "\$@"
EOF
chmod 755 $out/bin/docspell-joex
'';

View File

@ -7,26 +7,26 @@ object Dependencies {
val BcryptVersion = "0.4"
val BetterMonadicForVersion = "0.3.1"
val BinnyVersion = "0.9.1"
val BinnyVersion = "0.10.0"
val CalevVersion = "0.7.0"
val CatsVersion = "2.7.0"
val CatsEffectVersion = "3.5.2"
val CatsParseVersion = "0.3.10"
val CirceVersion = "0.14.6"
val CirceVersion = "0.15.0-M1"
val CirceGenericExtrasVersion = "0.14.3"
val CirceYamlVersion = "0.14.2"
val CirceYamlVersion = "0.15.1"
val ClipboardJsVersion = "2.0.11"
val DoobieVersion = "1.0.0-RC2"
val EmilVersion = "0.13.0"
val FlexmarkVersion = "0.64.8"
val FlywayVersion = "9.22.3"
val FlywayVersion = "10.0.0"
val Fs2Version = "3.9.2"
val H2Version = "2.2.224"
val Http4sVersion = "0.23.23"
val Icu4jVersion = "74.1"
val JavaOtpVersion = "0.4.0"
val JsoupVersion = "1.16.2"
val JwtScalaVersion = "9.2.0"
val JwtScalaVersion = "9.4.4"
val KindProjectorVersion = "0.10.3"
val KittensVersion = "3.1.0"
val LevigoJbig2Version = "2.0"
@ -35,15 +35,14 @@ object Dependencies {
val MariaDbVersion = "3.2.0"
val MUnitVersion = "0.7.29"
val MUnitCatsEffectVersion = "1.0.7"
val OrganizeImportsVersion = "0.6.0"
val PdfboxVersion = "2.0.29"
val PdfboxVersion = "3.0.0"
val PdfjsViewerVersion = "2.12.313"
val PoiVersion = "4.1.2"
val PostgresVersion = "42.6.0"
val PureConfigVersion = "0.17.4"
val ScalaJavaTimeVersion = "2.5.0"
val ScodecBitsVersion = "1.1.38"
val ScribeVersion = "3.11.9"
val ScribeVersion = "3.12.2"
val Slf4jVersion = "2.0.9"
val SourcecodeVersion = "0.3.1"
val StanfordNlpVersion = "4.5.5"
@ -312,6 +311,7 @@ object Dependencies {
// ASL 2.0
val flyway = Seq(
"org.flywaydb" % "flyway-core" % FlywayVersion,
"org.flywaydb" % "flyway-database-postgresql" % FlywayVersion,
"org.flywaydb" % "flyway-mysql" % FlywayVersion
)
@ -342,7 +342,4 @@ object Dependencies {
"com.ibm.icu" % "icu4j" % Icu4jVersion
)
val organizeImports = Seq(
"com.github.liancheng" %% "organize-imports" % OrganizeImportsVersion
)
}

View File

@ -1,19 +1,19 @@
let
nixpkgs = builtins.fetchTarball {
#url = "https://github.com/NixOS/nixpkgs/archive/92e990a8d6bc35f1089c76dd8ba68b78da90da59.tar.gz";
url = "channel:nixos-21.11";
url = "channel:nixos-23.05";
};
pkgs = import nixpkgs { };
initScript = pkgs.writeScript "docspell-build-init" ''
export LD_LIBRARY_PATH=
${pkgs.bash}/bin/bash -c "sbt -mem 4096 -java-home ${pkgs.openjdk11}/lib/openjdk"
${pkgs.bash}/bin/bash -c "sbt -mem 4096 -java-home ${pkgs.openjdk17}/lib/openjdk"
'';
in with pkgs;
buildFHSUserEnv {
name = "docspell-sbt";
targetPkgs = pkgs: with pkgs; [
netcat jdk11 wget which dpkg sbt git elmPackages.elm ncurses fakeroot mc
netcat jdk17 wget which dpkg sbt git elmPackages.elm ncurses fakeroot mc
zola yarn
# haskells http client needs this (to download elm packages)

View File

@ -23,9 +23,9 @@ It is enough to install the JRE. The JDK is required, if you want to
build docspell from source. For newer versions, the JRE is not shipped
anymore, simply use JDK then.
Docspell has been tested with Java 11 (or sometimes referred to as JDK
11). The provided packages are build using JDK 11. However, it also
works on newer java versions. The provided docker images use JDK11.
Docspell has been tested with Java 17 (or sometimes referred to as JDK
17). The provided packages are build using JDK 17. However, it also
works on newer java versions. The provided docker images use JDK17.
The next tools are only required on machines running the *Joex*
component.