Initial module setup

2025-09-15 21:46:53 +00:00 · 2020-06-14 22:53:20 +02:00
parent 492f4d304f
commit c7f598e3b0
8 changed files with 154 additions and 1 deletions
--- a/build.sbt
+++ b/build.sbt
@@ -260,6 +260,26 @@ val analysis = project.in(file("modules/analysis")).
      Dependencies.stanfordNlpCore
  ).dependsOn(common, files % "test->test")
 val ftsclient = project.in(file("modules/fts-client")).
  disablePlugins(RevolverPlugin).
  settings(sharedSettings).
  settings(testSettings).
  settings(
    name := "docspell-fts-client",
    libraryDependencies ++= Seq.empty
  ).dependsOn(common)
 val ftssolr = project.in(file("modules/fts-solr")).
  disablePlugins(RevolverPlugin).
  settings(sharedSettings).
  settings(testSettings).
  settings(
    name := "docspell-fts-solr",
    libraryDependencies ++=
      Dependencies.http4sClient ++
      Dependencies.circe
  ).dependsOn(common, ftsclient)
 val restapi = project.in(file("modules/restapi")).
  disablePlugins(RevolverPlugin).
  enablePlugins(OpenApiSchema).
@@ -303,7 +323,7 @@ val backend = project.in(file("modules/backend")).
      Dependencies.bcrypt ++
      Dependencies.http4sClient ++
      Dependencies.emil
-  ).dependsOn(store, joexapi)
+  ).dependsOn(store, joexapi, ftsclient, ftssolr)
 val webapp = project.in(file("modules/webapp")).
  disablePlugins(RevolverPlugin).
@@ -472,6 +492,8 @@ val root = project.in(file(".")).
    , extract
    , convert
    , analysis
    , ftsclient
    , ftssolr
    , files
    , store
    , joexapi
--- a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsBasicResult.scala
+++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsBasicResult.scala
@@ -0,0 +1,19 @@
 package docspell.ftsclient
 import cats.data.NonEmptyList
 import cats.implicits._
 import docspell.common._
 import FtsBasicResult.AttachmentMatch
 final case class FtsBasicResult(item: Ident, attachments: NonEmptyList[AttachmentMatch]) {
  def score: Double =
    attachments.map(_.score).toList.max
 }
 object FtsBasicResult {
  case class AttachmentMatch(id: Ident, score: Double)
 }
--- a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala
+++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala
@@ -0,0 +1,18 @@
 package docspell.ftsclient
 import fs2.Stream
 /** The fts client is the interface for docspell to a fulltext search
  * engine.
  *
  * It defines all operations required for integration into docspell.
  * It uses data structures and terms of docspell. Implementation
  * modules need to translate it to the engine that provides the
  * features.
  */
 trait FtsClient[F[_]] {
  def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
  def indexData(data: TextData): F[Unit]
 }
--- a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala
+++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala
@@ -0,0 +1,10 @@
 package docspell.ftsclient
 import docspell.common._
 /** A fulltext query.
  *
  * The query itself is a raw string. Each implementation may
  * interpret it according to the system in use.
  */
 final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int)
--- a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala
+++ b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala
@@ -0,0 +1,5 @@
 package docspell.ftsclient
 import docspell.common._
 final case class TextData(item: Ident, attachment: Ident, collective: Ident, text: String)
--- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala
+++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala
@@ -0,0 +1,12 @@
 package docspell.ftssolr
 import fs2.Stream
 import docspell.ftsclient._
 final class SolrFtsClient[F[_]] extends FtsClient[F] {
  def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
    ???
  def indexData(data: TextData): F[Unit] =
    ???
 }
--- a/modules/microsite/docs/dev/adr/0014_fulltext_search_engine.md
+++ b/modules/microsite/docs/dev/adr/0014_fulltext_search_engine.md
@@ -0,0 +1,51 @@
 ---
 layout: docs
 title: Fulltext Search Engine
 ---
 # Choose a Fulltext Search Engine
 It should be possible to search the contents of all documents.
 ## Context and Problem Statement
 To allow searching the documents contents efficiently, a separate
 index is necessary. The "defacto standard" for fulltext search on the
 JVM is something backed by [Lucene](https://lucene.apache.org).
 Another option is to use a RDBMS that supports fulltext search.
 This adds another component to the mix, which increases the complexity
 of the setup and the software. Since docspell works great without this
 feature, it shouldn't have a huge impact on the application, i.e. if
 the fulltext search component is down or broken, docspell should still
 work (just the fulltext search is then not working).
 ## Considered Options
 * [Apache SOLR](https://lucene.apache.org/solr)
 * [ElasticSearch](https://www.elastic.co/elasticsearch/)
 * [PostgreSQL](https://www.postgresql.org/docs/12/textsearch.html)
 * All of them or a subset
 ## Decision Outcome
 If docspell is running on PostgreSQL, it would be the best option to
 also use it for fulltext search. But I don't want to lock the database
 to PostgreSQL *only* because of the fulltext search feature. This
 would be a too large impact on the whole application.
 ElasticSearch and Apache SOLR are quite similiar in features. SOLR is
 part of Lucene and therefore lives in the Apache ecosystem. I would
 choose this over ElasticSearch, which is backed by a company (the oss
 version is released under the Apache License, afaiu). Regarding
 features, both are great.
 The last option (supporting all) is interesting, since it would enable
 to use PostgreSQL for fulltext search, when already using PostgreSQL
 as the database for docspell.
 So in a first step, identify what docspell needs from a fulltext
 search component and create this interface and an implementation for
 Apache SOLR. This enables all users to use the fulltext search
 feature. As a later step, an implementation based on PostgreSQL could
 be provided, too.
--- a/modules/microsite/docs/dev/adr/0015_fulltext_search_design.md
+++ b/modules/microsite/docs/dev/adr/0015_fulltext_search_design.md
@@ -0,0 +1,16 @@
 ---
 layout: docs
 title: Fulltext Search Design
 ---
 # How to integrate Fulltext Search
 ## Context and Problem Statement
 ## Considered Options
 ## Decision Outcome