mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-02 13:32:51 +00:00
Initial module setup
This commit is contained in:
parent
492f4d304f
commit
c7f598e3b0
24
build.sbt
24
build.sbt
@ -259,6 +259,26 @@ val analysis = project.in(file("modules/analysis")).
|
||||
Dependencies.fs2 ++
|
||||
Dependencies.stanfordNlpCore
|
||||
).dependsOn(common, files % "test->test")
|
||||
|
||||
val ftsclient = project.in(file("modules/fts-client")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(
|
||||
name := "docspell-fts-client",
|
||||
libraryDependencies ++= Seq.empty
|
||||
).dependsOn(common)
|
||||
|
||||
val ftssolr = project.in(file("modules/fts-solr")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(
|
||||
name := "docspell-fts-solr",
|
||||
libraryDependencies ++=
|
||||
Dependencies.http4sClient ++
|
||||
Dependencies.circe
|
||||
).dependsOn(common, ftsclient)
|
||||
|
||||
val restapi = project.in(file("modules/restapi")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
@ -303,7 +323,7 @@ val backend = project.in(file("modules/backend")).
|
||||
Dependencies.bcrypt ++
|
||||
Dependencies.http4sClient ++
|
||||
Dependencies.emil
|
||||
).dependsOn(store, joexapi)
|
||||
).dependsOn(store, joexapi, ftsclient, ftssolr)
|
||||
|
||||
val webapp = project.in(file("modules/webapp")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
@ -472,6 +492,8 @@ val root = project.in(file(".")).
|
||||
, extract
|
||||
, convert
|
||||
, analysis
|
||||
, ftsclient
|
||||
, ftssolr
|
||||
, files
|
||||
, store
|
||||
, joexapi
|
||||
|
@ -0,0 +1,19 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
|
||||
import FtsBasicResult.AttachmentMatch
|
||||
|
||||
final case class FtsBasicResult(item: Ident, attachments: NonEmptyList[AttachmentMatch]) {
|
||||
|
||||
def score: Double =
|
||||
attachments.map(_.score).toList.max
|
||||
}
|
||||
|
||||
object FtsBasicResult {
|
||||
|
||||
case class AttachmentMatch(id: Ident, score: Double)
|
||||
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import fs2.Stream
|
||||
|
||||
/** The fts client is the interface for docspell to a fulltext search
|
||||
* engine.
|
||||
*
|
||||
* It defines all operations required for integration into docspell.
|
||||
* It uses data structures and terms of docspell. Implementation
|
||||
* modules need to translate it to the engine that provides the
|
||||
* features.
|
||||
*/
|
||||
trait FtsClient[F[_]] {
|
||||
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
|
||||
|
||||
def indexData(data: TextData): F[Unit]
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import docspell.common._
|
||||
|
||||
/** A fulltext query.
|
||||
*
|
||||
* The query itself is a raw string. Each implementation may
|
||||
* interpret it according to the system in use.
|
||||
*/
|
||||
final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int)
|
@ -0,0 +1,5 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import docspell.common._
|
||||
|
||||
final case class TextData(item: Ident, attachment: Ident, collective: Ident, text: String)
|
@ -0,0 +1,12 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
import fs2.Stream
|
||||
import docspell.ftsclient._
|
||||
|
||||
final class SolrFtsClient[F[_]] extends FtsClient[F] {
|
||||
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
|
||||
???
|
||||
def indexData(data: TextData): F[Unit] =
|
||||
???
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Fulltext Search Engine
|
||||
---
|
||||
|
||||
# Choose a Fulltext Search Engine
|
||||
|
||||
It should be possible to search the contents of all documents.
|
||||
|
||||
## Context and Problem Statement
|
||||
|
||||
To allow searching the documents contents efficiently, a separate
|
||||
index is necessary. The "defacto standard" for fulltext search on the
|
||||
JVM is something backed by [Lucene](https://lucene.apache.org).
|
||||
Another option is to use a RDBMS that supports fulltext search.
|
||||
|
||||
This adds another component to the mix, which increases the complexity
|
||||
of the setup and the software. Since docspell works great without this
|
||||
feature, it shouldn't have a huge impact on the application, i.e. if
|
||||
the fulltext search component is down or broken, docspell should still
|
||||
work (just the fulltext search is then not working).
|
||||
|
||||
## Considered Options
|
||||
|
||||
* [Apache SOLR](https://lucene.apache.org/solr)
|
||||
* [ElasticSearch](https://www.elastic.co/elasticsearch/)
|
||||
* [PostgreSQL](https://www.postgresql.org/docs/12/textsearch.html)
|
||||
* All of them or a subset
|
||||
|
||||
## Decision Outcome
|
||||
|
||||
If docspell is running on PostgreSQL, it would be the best option to
|
||||
also use it for fulltext search. But I don't want to lock the database
|
||||
to PostgreSQL *only* because of the fulltext search feature. This
|
||||
would be a too large impact on the whole application.
|
||||
|
||||
ElasticSearch and Apache SOLR are quite similiar in features. SOLR is
|
||||
part of Lucene and therefore lives in the Apache ecosystem. I would
|
||||
choose this over ElasticSearch, which is backed by a company (the oss
|
||||
version is released under the Apache License, afaiu). Regarding
|
||||
features, both are great.
|
||||
|
||||
The last option (supporting all) is interesting, since it would enable
|
||||
to use PostgreSQL for fulltext search, when already using PostgreSQL
|
||||
as the database for docspell.
|
||||
|
||||
So in a first step, identify what docspell needs from a fulltext
|
||||
search component and create this interface and an implementation for
|
||||
Apache SOLR. This enables all users to use the fulltext search
|
||||
feature. As a later step, an implementation based on PostgreSQL could
|
||||
be provided, too.
|
@ -0,0 +1,16 @@
|
||||
---
|
||||
layout: docs
|
||||
title: Fulltext Search Design
|
||||
---
|
||||
|
||||
# How to integrate Fulltext Search
|
||||
|
||||
|
||||
|
||||
## Context and Problem Statement
|
||||
|
||||
|
||||
## Considered Options
|
||||
|
||||
|
||||
## Decision Outcome
|
Loading…
x
Reference in New Issue
Block a user