mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 09:58:26 +00:00
Initial version.
Features: - Upload PDF files let them analyze - Manage meta data and items - See processing in webapp
This commit is contained in:
@ -1,32 +1,66 @@
|
||||
package docspell.build
|
||||
|
||||
import sbt._
|
||||
|
||||
object Dependencies {
|
||||
|
||||
val BetterMonadicForVersion = "0.3.0"
|
||||
val BitpeaceVersion = "0.4.0-M2"
|
||||
val CirceVersion = "0.12.0-M4"
|
||||
val DoobieVersion = "0.8.0-M1"
|
||||
val BcryptVersion = "0.4"
|
||||
val BetterMonadicForVersion = "0.3.1"
|
||||
val BitpeaceVersion = "0.4.0-M2" // 0.4.0
|
||||
val CirceVersion = "0.12.1"
|
||||
val DoobieVersion = "0.8.0-RC1" // 0.8.2
|
||||
val FastparseVersion = "2.1.3"
|
||||
val FlywayVersion = "6.0.0-beta2"
|
||||
val Fs2Version = "1.1.0-M1"
|
||||
val FlywayVersion = "6.0.3"
|
||||
val Fs2Version = "1.1.0-M1" // 2.0.0
|
||||
val H2Version = "1.4.199"
|
||||
val Http4sVersion = "0.21.0-M2"
|
||||
val Http4sVersion = "0.21.0-M4" // waiting for new version supporting cats2/fs2-2
|
||||
val KindProjectorVersion = "0.10.3"
|
||||
val Log4sVersion = "1.8.2"
|
||||
val LogbackVersion = "1.2.3"
|
||||
val MariaDbVersion = "2.4.2"
|
||||
val MiniTestVersion = "2.5.0"
|
||||
val PostgresVersion = "42.2.6"
|
||||
val PureConfigVersion = "0.11.1"
|
||||
val MariaDbVersion = "2.4.4"
|
||||
val MiniTestVersion = "2.7.0"
|
||||
val PostgresVersion = "42.2.8"
|
||||
val PureConfigVersion = "0.12.0"
|
||||
val SqliteVersion = "3.28.0"
|
||||
val TikaVersion = "1.20"
|
||||
val StanfordNlpVersion = "3.9.2"
|
||||
val TikaVersion = "1.22"
|
||||
val javaxMailVersion = "1.6.2"
|
||||
val dnsJavaVersion = "2.1.9"
|
||||
val YamuscaVersion = "0.6.0-M2"
|
||||
val YamuscaVersion = "0.6.0"
|
||||
|
||||
val stanfordNlpCore = Seq(
|
||||
"edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion excludeAll(
|
||||
ExclusionRule("com.io7m.xom", "xom"),
|
||||
ExclusionRule("javax.servlet", "javax.servlet-api"),
|
||||
ExclusionRule("org.apache.lucene", "lucene-queryparser"),
|
||||
ExclusionRule("org.apache.lucene", "lucene-queries"),
|
||||
ExclusionRule("org.apache.lucene", "lucene-analyzers-common"),
|
||||
ExclusionRule("org.apache.lucene", "lucene-core"),
|
||||
ExclusionRule("com.sun.xml.bind", "jaxb-impl"),
|
||||
ExclusionRule("com.sun.xml.bind", "jaxb-core"),
|
||||
ExclusionRule("javax.xml.bind", "jaxb-api"),
|
||||
ExclusionRule("de.jollyday", "jollyday"),
|
||||
ExclusionRule("com.apple", "AppleJavaExtensions"),
|
||||
ExclusionRule("org.glassfish", "javax.json")
|
||||
)
|
||||
)
|
||||
|
||||
val stanfordNlpModels = Seq(
|
||||
"edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion classifier "models-german",
|
||||
"edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion classifier "models-english"
|
||||
)
|
||||
|
||||
val tika = Seq(
|
||||
"org.apache.tika" % "tika-core" % TikaVersion
|
||||
)
|
||||
|
||||
val bcrypt = Seq(
|
||||
"org.mindrot" % "jbcrypt" % BcryptVersion
|
||||
)
|
||||
|
||||
val fs2 = Seq(
|
||||
"co.fs2" %% "fs2-core" % Fs2Version
|
||||
"co.fs2" %% "fs2-core" % Fs2Version,
|
||||
"co.fs2" %% "fs2-io" % Fs2Version
|
||||
)
|
||||
|
||||
val http4s = Seq(
|
||||
@ -34,6 +68,10 @@ object Dependencies {
|
||||
"org.http4s" %% "http4s-circe" % Http4sVersion,
|
||||
"org.http4s" %% "http4s-dsl" % Http4sVersion,
|
||||
)
|
||||
|
||||
val http4sClient = Seq(
|
||||
"org.http4s" %% "http4s-blaze-client" % Http4sVersion
|
||||
)
|
||||
|
||||
val circe = Seq(
|
||||
"io.circe" %% "circe-generic" % CirceVersion,
|
||||
@ -46,7 +84,7 @@ object Dependencies {
|
||||
)
|
||||
|
||||
val logging = Seq(
|
||||
"ch.qos.logback" % "logback-classic" % LogbackVersion % Runtime
|
||||
"ch.qos.logback" % "logback-classic" % LogbackVersion
|
||||
)
|
||||
|
||||
// https://github.com/melrief/pureconfig
|
||||
@ -113,7 +151,7 @@ object Dependencies {
|
||||
val betterMonadicFor = "com.olegpy" %% "better-monadic-for" % BetterMonadicForVersion
|
||||
|
||||
val webjars = Seq(
|
||||
"swagger-ui" -> "3.22.2",
|
||||
"swagger-ui" -> "3.23.8",
|
||||
"Semantic-UI" -> "2.4.1",
|
||||
"jquery" -> "3.4.1"
|
||||
).map({case (a, v) => "org.webjars" % a % v })
|
||||
|
65
project/NerModelsPlugin.scala
Normal file
65
project/NerModelsPlugin.scala
Normal file
@ -0,0 +1,65 @@
|
||||
package docspell.build
|
||||
|
||||
import sbt.{Def, _}
|
||||
import sbt.Keys._
|
||||
|
||||
/** Take some files from dependencies and put them into the resources
|
||||
* of a local sbt project.
|
||||
*
|
||||
* The reason is that the stanford ner model files are very very
|
||||
* large: the jar file for the english models is about 1G and the jar
|
||||
* file for the german models is about 170M. But I only need one file
|
||||
* that is about 60M from each jar. So just for the sake to save 1GB
|
||||
* file size when packaging docspell, this ugly plugin exists….
|
||||
*
|
||||
* The jar files to filter must be added to the libraryDependencies
|
||||
* in config "NerModels".
|
||||
*/
|
||||
object NerModelsPlugin extends AutoPlugin {
|
||||
|
||||
object autoImport {
|
||||
val NerModels = config("NerModels")
|
||||
|
||||
val nerModelsFilter = settingKey[String => Boolean]("Which files to keep.")
|
||||
val nerModelsRunFilter = taskKey[Seq[File]]("Extract files from libraryDependencies")
|
||||
|
||||
}
|
||||
|
||||
import autoImport._
|
||||
|
||||
def nerModelSettings: Seq[Setting[_]] = Seq(
|
||||
nerModelsFilter := (_ => false),
|
||||
nerModelsRunFilter := {
|
||||
filterArtifacts(streams.value.log
|
||||
, Classpaths.managedJars(NerModels, Set("jar", "zip"), update.value)
|
||||
, nerModelsFilter.value
|
||||
, (Compile/resourceManaged).value)
|
||||
},
|
||||
Compile / resourceGenerators += nerModelsRunFilter.taskValue
|
||||
)
|
||||
|
||||
def nerClassifierSettings: Seq[Setting[_]] = Seq(
|
||||
libraryDependencies ++= Dependencies.stanfordNlpModels.map(_ % NerModels),
|
||||
nerModelsFilter := {
|
||||
name => nerModels.exists(name.endsWith)
|
||||
}
|
||||
)
|
||||
|
||||
override def projectConfigurations: Seq[Configuration] =
|
||||
Seq(NerModels)
|
||||
|
||||
override def projectSettings: Seq[Setting[_]] =
|
||||
nerModelSettings
|
||||
|
||||
def filterArtifacts(logger: Logger, cp: Classpath, nameFilter: NameFilter, out: File): Seq[File] = {
|
||||
logger.info(s"NerModels: Filtering artifacts...")
|
||||
cp.files.flatMap(f => {
|
||||
IO.unzip(f, out, nameFilter)
|
||||
})
|
||||
}
|
||||
|
||||
private val nerModels = List(
|
||||
"german.conll.germeval2014.hgc_175m_600.crf.ser.gz",
|
||||
"english.all.3class.distsim.crf.ser.gz"
|
||||
)
|
||||
}
|
16
project/build.nix
Normal file
16
project/build.nix
Normal file
@ -0,0 +1,16 @@
|
||||
with import <nixpkgs> { };
|
||||
let
|
||||
initScript = writeScript "docspell-build-init" ''
|
||||
export LD_LIBRARY_PATH=
|
||||
${bash}/bin/bash -c sbt
|
||||
'';
|
||||
in
|
||||
buildFHSUserEnv {
|
||||
name = "docspell-sbt";
|
||||
targetPkgs = pkgs: with pkgs; [
|
||||
netcat jdk8 wget which zsh dpkg sbt git elmPackages.elm ncurses fakeroot mc jekyll
|
||||
# haskells http client needs this (to download elm packages)
|
||||
iana-etc
|
||||
];
|
||||
runScript = initScript;
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
addSbtPlugin("io.get-coursier" % "sbt-coursier" % "2.0.0-RC2")
|
||||
addSbtPlugin("com.github.eikek" % "sbt-openapi-schema" % "0.5.0-SNAPSHOT")
|
||||
addSbtPlugin("com.github.eikek" % "sbt-openapi-schema" % "0.5.0")
|
||||
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.0")
|
||||
addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.3.25")
|
||||
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.0-M2")
|
||||
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.4.1")
|
||||
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1-M3")
|
||||
addSbtPlugin("com.47deg" % "sbt-microsites" % "0.9.2")
|
||||
|
Reference in New Issue
Block a user