mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 10:28:27 +00:00
Merge pull request #1462 from eikek/postgres-fulltext
Postgres fulltext
This commit is contained in:
44
build.sbt
44
build.sbt
@ -319,19 +319,6 @@ val common = project
|
|||||||
)
|
)
|
||||||
.dependsOn(loggingApi)
|
.dependsOn(loggingApi)
|
||||||
|
|
||||||
val config = project
|
|
||||||
.in(file("modules/config"))
|
|
||||||
.disablePlugins(RevolverPlugin)
|
|
||||||
.settings(sharedSettings)
|
|
||||||
.withTestSettings
|
|
||||||
.settings(
|
|
||||||
name := "docspell-config",
|
|
||||||
libraryDependencies ++=
|
|
||||||
Dependencies.fs2 ++
|
|
||||||
Dependencies.pureconfig
|
|
||||||
)
|
|
||||||
.dependsOn(common, loggingApi)
|
|
||||||
|
|
||||||
val loggingScribe = project
|
val loggingScribe = project
|
||||||
.in(file("modules/logging/scribe"))
|
.in(file("modules/logging/scribe"))
|
||||||
.disablePlugins(RevolverPlugin)
|
.disablePlugins(RevolverPlugin)
|
||||||
@ -619,6 +606,20 @@ val ftssolr = project
|
|||||||
)
|
)
|
||||||
.dependsOn(common, ftsclient)
|
.dependsOn(common, ftsclient)
|
||||||
|
|
||||||
|
val ftspsql = project
|
||||||
|
.in(file("modules/fts-psql"))
|
||||||
|
.disablePlugins(RevolverPlugin)
|
||||||
|
.settings(sharedSettings)
|
||||||
|
.withTestSettings
|
||||||
|
.settings(
|
||||||
|
name := "docspell-fts-psql",
|
||||||
|
libraryDependencies ++=
|
||||||
|
Dependencies.doobie ++
|
||||||
|
Dependencies.postgres ++
|
||||||
|
Dependencies.flyway
|
||||||
|
)
|
||||||
|
.dependsOn(common, ftsclient, store % "compile->test;test->test")
|
||||||
|
|
||||||
val restapi = project
|
val restapi = project
|
||||||
.in(file("modules/restapi"))
|
.in(file("modules/restapi"))
|
||||||
.disablePlugins(RevolverPlugin)
|
.disablePlugins(RevolverPlugin)
|
||||||
@ -715,6 +716,20 @@ val webapp = project
|
|||||||
)
|
)
|
||||||
.dependsOn(query.js)
|
.dependsOn(query.js)
|
||||||
|
|
||||||
|
// Config project shared among the two applications only
|
||||||
|
val config = project
|
||||||
|
.in(file("modules/config"))
|
||||||
|
.disablePlugins(RevolverPlugin)
|
||||||
|
.settings(sharedSettings)
|
||||||
|
.withTestSettings
|
||||||
|
.settings(
|
||||||
|
name := "docspell-config",
|
||||||
|
libraryDependencies ++=
|
||||||
|
Dependencies.fs2 ++
|
||||||
|
Dependencies.pureconfig
|
||||||
|
)
|
||||||
|
.dependsOn(common, loggingApi, ftspsql, store)
|
||||||
|
|
||||||
// --- Application(s)
|
// --- Application(s)
|
||||||
|
|
||||||
val joex = project
|
val joex = project
|
||||||
@ -769,6 +784,7 @@ val joex = project
|
|||||||
joexapi,
|
joexapi,
|
||||||
restapi,
|
restapi,
|
||||||
ftssolr,
|
ftssolr,
|
||||||
|
ftspsql,
|
||||||
pubsubNaive,
|
pubsubNaive,
|
||||||
notificationImpl,
|
notificationImpl,
|
||||||
schedulerImpl
|
schedulerImpl
|
||||||
@ -841,6 +857,7 @@ val restserver = project
|
|||||||
backend,
|
backend,
|
||||||
webapp,
|
webapp,
|
||||||
ftssolr,
|
ftssolr,
|
||||||
|
ftspsql,
|
||||||
oidc,
|
oidc,
|
||||||
pubsubNaive,
|
pubsubNaive,
|
||||||
notificationImpl,
|
notificationImpl,
|
||||||
@ -926,6 +943,7 @@ val root = project
|
|||||||
analysis,
|
analysis,
|
||||||
ftsclient,
|
ftsclient,
|
||||||
ftssolr,
|
ftssolr,
|
||||||
|
ftspsql,
|
||||||
files,
|
files,
|
||||||
store,
|
store,
|
||||||
joexapi,
|
joexapi,
|
||||||
|
@ -62,7 +62,14 @@ object CreateIndex {
|
|||||||
val items = store
|
val items = store
|
||||||
.transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
|
.transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
|
||||||
.map(nn =>
|
.map(nn =>
|
||||||
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
|
TextData.item(
|
||||||
|
nn.id,
|
||||||
|
nn.collective,
|
||||||
|
nn.folder,
|
||||||
|
Option(nn.name),
|
||||||
|
nn.notes,
|
||||||
|
nn.language
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
fts.indexData(logger, attachs ++ items)
|
fts.indexData(logger, attachs ++ items)
|
||||||
|
@ -605,7 +605,14 @@ object OItem {
|
|||||||
.transact(RItem.updateNotes(item, collective, notes))
|
.transact(RItem.updateNotes(item, collective, notes))
|
||||||
)
|
)
|
||||||
.flatTap(
|
.flatTap(
|
||||||
onSuccessIgnoreError(fts.updateItemNotes(logger, item, collective, notes))
|
onSuccessIgnoreError {
|
||||||
|
store
|
||||||
|
.transact(RCollective.findLanguage(collective))
|
||||||
|
.map(_.getOrElse(Language.English))
|
||||||
|
.flatMap(lang =>
|
||||||
|
fts.updateItemNotes(logger, item, collective, lang, notes)
|
||||||
|
)
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def setName(item: Ident, name: String, collective: Ident): F[UpdateResult] =
|
def setName(item: Ident, name: String, collective: Ident): F[UpdateResult] =
|
||||||
@ -615,7 +622,14 @@ object OItem {
|
|||||||
.transact(RItem.updateName(item, collective, name))
|
.transact(RItem.updateName(item, collective, name))
|
||||||
)
|
)
|
||||||
.flatTap(
|
.flatTap(
|
||||||
onSuccessIgnoreError(fts.updateItemName(logger, item, collective, name))
|
onSuccessIgnoreError {
|
||||||
|
store
|
||||||
|
.transact(RCollective.findLanguage(collective))
|
||||||
|
.map(_.getOrElse(Language.English))
|
||||||
|
.flatMap(lang =>
|
||||||
|
fts.updateItemName(logger, item, collective, lang, name)
|
||||||
|
)
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
def setNameMultiple(
|
def setNameMultiple(
|
||||||
@ -733,10 +747,17 @@ object OItem {
|
|||||||
)
|
)
|
||||||
.flatTap(
|
.flatTap(
|
||||||
onSuccessIgnoreError(
|
onSuccessIgnoreError(
|
||||||
OptionT(store.transact(RAttachment.findItemId(attachId)))
|
OptionT(store.transact(RAttachment.findItemAndLanguage(attachId)))
|
||||||
.semiflatMap(itemId =>
|
.semiflatMap { case (itemId, lang) =>
|
||||||
fts.updateAttachmentName(logger, itemId, attachId, collective, name)
|
fts.updateAttachmentName(
|
||||||
|
logger,
|
||||||
|
itemId,
|
||||||
|
attachId,
|
||||||
|
collective,
|
||||||
|
lang.getOrElse(Language.English),
|
||||||
|
name
|
||||||
)
|
)
|
||||||
|
}
|
||||||
.fold(())(identity)
|
.fold(())(identity)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -14,7 +14,7 @@ case class Banner(
|
|||||||
configFile: Option[String],
|
configFile: Option[String],
|
||||||
appId: Ident,
|
appId: Ident,
|
||||||
baseUrl: LenientUri,
|
baseUrl: LenientUri,
|
||||||
ftsUrl: Option[LenientUri],
|
ftsInfo: Option[String],
|
||||||
fileStoreConfig: FileStoreConfig
|
fileStoreConfig: FileStoreConfig
|
||||||
) {
|
) {
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ case class Banner(
|
|||||||
s"Id: ${appId.id}",
|
s"Id: ${appId.id}",
|
||||||
s"Base-Url: ${baseUrl.asString}",
|
s"Base-Url: ${baseUrl.asString}",
|
||||||
s"Database: ${jdbcUrl.asString}",
|
s"Database: ${jdbcUrl.asString}",
|
||||||
s"Fts: ${ftsUrl.map(_.asString).getOrElse("-")}",
|
s"Fts: ${ftsInfo.getOrElse("-")}",
|
||||||
s"Config: ${configFile.getOrElse("")}",
|
s"Config: ${configFile.getOrElse("")}",
|
||||||
s"FileRepo: ${fileStoreConfig}",
|
s"FileRepo: ${fileStoreConfig}",
|
||||||
""
|
""
|
||||||
|
27
modules/config/src/main/scala/docspell/config/FtsType.scala
Normal file
27
modules/config/src/main/scala/docspell/config/FtsType.scala
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.config
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
|
||||||
|
sealed trait FtsType {
|
||||||
|
def name: String
|
||||||
|
}
|
||||||
|
|
||||||
|
object FtsType {
|
||||||
|
case object Solr extends FtsType { val name = "solr" }
|
||||||
|
case object PostgreSQL extends FtsType { val name = "postgresql" }
|
||||||
|
|
||||||
|
val all: NonEmptyList[FtsType] =
|
||||||
|
NonEmptyList.of(Solr, PostgreSQL)
|
||||||
|
|
||||||
|
def fromName(str: String): Either[String, FtsType] =
|
||||||
|
all.find(_.name.equalsIgnoreCase(str)).toRight(s"Unknown fts type: $str")
|
||||||
|
|
||||||
|
def unsafeFromName(str: String): FtsType =
|
||||||
|
fromName(str).fold(sys.error, identity)
|
||||||
|
}
|
@ -10,9 +10,11 @@ import java.nio.file.{Path => JPath}
|
|||||||
|
|
||||||
import scala.reflect.ClassTag
|
import scala.reflect.ClassTag
|
||||||
|
|
||||||
|
import cats.syntax.all._
|
||||||
import fs2.io.file.Path
|
import fs2.io.file.Path
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
import docspell.ftspsql.{PgQueryParser, RankNormalization}
|
||||||
import docspell.logging.{Level, LogConfig}
|
import docspell.logging.{Level, LogConfig}
|
||||||
|
|
||||||
import com.github.eikek.calev.CalEvent
|
import com.github.eikek.calev.CalEvent
|
||||||
@ -85,11 +87,28 @@ object Implicits {
|
|||||||
implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
|
implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
|
||||||
ConfigReader[String].emap(reason(FileStoreType.fromString))
|
ConfigReader[String].emap(reason(FileStoreType.fromString))
|
||||||
|
|
||||||
def reason[A: ClassTag](
|
implicit val pgQueryParserReader: ConfigReader[PgQueryParser] =
|
||||||
f: String => Either[String, A]
|
ConfigReader[String].emap(reason(PgQueryParser.fromName))
|
||||||
): String => Either[FailureReason, A] =
|
|
||||||
|
implicit val pgRankNormalizationReader: ConfigReader[RankNormalization] =
|
||||||
|
ConfigReader[List[Int]].emap(
|
||||||
|
reason(ints => ints.traverse(RankNormalization.byNumber).map(_.reduce(_ && _)))
|
||||||
|
)
|
||||||
|
|
||||||
|
implicit val languageReader: ConfigReader[Language] =
|
||||||
|
ConfigReader[String].emap(reason(Language.fromString))
|
||||||
|
|
||||||
|
implicit def languageMapReader[B: ConfigReader]: ConfigReader[Map[Language, B]] =
|
||||||
|
pureconfig.configurable.genericMapReader[Language, B](reason(Language.fromString))
|
||||||
|
|
||||||
|
implicit val ftsTypeReader: ConfigReader[FtsType] =
|
||||||
|
ConfigReader[String].emap(reason(FtsType.fromName))
|
||||||
|
|
||||||
|
def reason[T, A: ClassTag](
|
||||||
|
f: T => Either[String, A]
|
||||||
|
): T => Either[FailureReason, A] =
|
||||||
in =>
|
in =>
|
||||||
f(in).left.map(str =>
|
f(in).left.map(str =>
|
||||||
CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str)
|
CannotConvert(in.toString, implicitly[ClassTag[A]].runtimeClass.toString, str)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.config
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.ftspsql._
|
||||||
|
import docspell.store.JdbcConfig
|
||||||
|
|
||||||
|
case class PgFtsConfig(
|
||||||
|
useDefaultConnection: Boolean,
|
||||||
|
jdbc: JdbcConfig,
|
||||||
|
pgQueryParser: PgQueryParser,
|
||||||
|
pgRankNormalization: RankNormalization,
|
||||||
|
pgConfig: Map[Language, String]
|
||||||
|
) {
|
||||||
|
|
||||||
|
def toPsqlConfig(stdConn: JdbcConfig): PsqlConfig = {
|
||||||
|
val db =
|
||||||
|
if (useDefaultConnection) stdConn
|
||||||
|
else jdbc
|
||||||
|
|
||||||
|
PsqlConfig(
|
||||||
|
db.url,
|
||||||
|
db.user,
|
||||||
|
Password(db.password),
|
||||||
|
pgConfig,
|
||||||
|
pgQueryParser,
|
||||||
|
pgRankNormalization
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object PgFtsConfig {}
|
@ -68,19 +68,24 @@ trait FtsClient[F[_]] {
|
|||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
itemId: Ident,
|
itemId: Ident,
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
|
language: Language,
|
||||||
name: String
|
name: String
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None))
|
updateIndex(
|
||||||
|
logger,
|
||||||
|
TextData.item(itemId, collective, None, Some(name), None, language)
|
||||||
|
)
|
||||||
|
|
||||||
def updateItemNotes(
|
def updateItemNotes(
|
||||||
logger: Logger[F],
|
logger: Logger[F],
|
||||||
itemId: Ident,
|
itemId: Ident,
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
|
language: Language,
|
||||||
notes: Option[String]
|
notes: Option[String]
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
updateIndex(
|
updateIndex(
|
||||||
logger,
|
logger,
|
||||||
TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")))
|
TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")), language)
|
||||||
)
|
)
|
||||||
|
|
||||||
def updateAttachmentName(
|
def updateAttachmentName(
|
||||||
@ -88,6 +93,7 @@ trait FtsClient[F[_]] {
|
|||||||
itemId: Ident,
|
itemId: Ident,
|
||||||
attachId: Ident,
|
attachId: Ident,
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
|
language: Language,
|
||||||
name: Option[String]
|
name: Option[String]
|
||||||
): F[Unit] =
|
): F[Unit] =
|
||||||
updateIndex(
|
updateIndex(
|
||||||
@ -97,7 +103,7 @@ trait FtsClient[F[_]] {
|
|||||||
attachId,
|
attachId,
|
||||||
collective,
|
collective,
|
||||||
None,
|
None,
|
||||||
Language.English,
|
language,
|
||||||
Some(name.getOrElse("")),
|
Some(name.getOrElse("")),
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
|
@ -18,6 +18,8 @@ sealed trait TextData {
|
|||||||
|
|
||||||
def folder: Option[Ident]
|
def folder: Option[Ident]
|
||||||
|
|
||||||
|
def language: Language
|
||||||
|
|
||||||
final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
|
final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
|
||||||
this match {
|
this match {
|
||||||
case a: TextData.Attachment => f(a)
|
case a: TextData.Attachment => f(a)
|
||||||
@ -32,7 +34,7 @@ object TextData {
|
|||||||
attachId: Ident,
|
attachId: Ident,
|
||||||
collective: Ident,
|
collective: Ident,
|
||||||
folder: Option[Ident],
|
folder: Option[Ident],
|
||||||
lang: Language,
|
language: Language,
|
||||||
name: Option[String],
|
name: Option[String],
|
||||||
text: Option[String]
|
text: Option[String]
|
||||||
) extends TextData {
|
) extends TextData {
|
||||||
@ -57,7 +59,8 @@ object TextData {
|
|||||||
collective: Ident,
|
collective: Ident,
|
||||||
folder: Option[Ident],
|
folder: Option[Ident],
|
||||||
name: Option[String],
|
name: Option[String],
|
||||||
notes: Option[String]
|
notes: Option[String],
|
||||||
|
language: Language
|
||||||
) extends TextData {
|
) extends TextData {
|
||||||
|
|
||||||
val id = Ident.unsafe("item") / item
|
val id = Ident.unsafe("item") / item
|
||||||
@ -69,8 +72,9 @@ object TextData {
|
|||||||
collective: Ident,
|
collective: Ident,
|
||||||
folder: Option[Ident],
|
folder: Option[Ident],
|
||||||
name: Option[String],
|
name: Option[String],
|
||||||
notes: Option[String]
|
notes: Option[String],
|
||||||
|
lang: Language
|
||||||
): TextData =
|
): TextData =
|
||||||
Item(item, collective, folder, name, notes)
|
Item(item, collective, folder, name, notes, lang)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,27 @@
|
|||||||
|
create table "ftspsql_search"(
|
||||||
|
"id" varchar(254) not null primary key,
|
||||||
|
"item_id" varchar(254) not null,
|
||||||
|
"collective" varchar(254) not null,
|
||||||
|
"lang" varchar(254) not null,
|
||||||
|
"attach_id" varchar(254),
|
||||||
|
"folder_id" varchar(254),
|
||||||
|
"updated_at" timestamptz not null default current_timestamp,
|
||||||
|
--- content columns
|
||||||
|
"attach_name" text,
|
||||||
|
"attach_content" text,
|
||||||
|
"item_name" text,
|
||||||
|
"item_notes" text,
|
||||||
|
--- index column
|
||||||
|
"fts_config" regconfig not null,
|
||||||
|
"text_index" tsvector
|
||||||
|
generated always as (
|
||||||
|
setweight(to_tsvector("fts_config", coalesce("attach_name", '')), 'B') ||
|
||||||
|
setweight(to_tsvector("fts_config", coalesce("item_name", '')), 'B') ||
|
||||||
|
setweight(to_tsvector("fts_config", coalesce("attach_content", '')), 'C') ||
|
||||||
|
setweight(to_tsvector("fts_config", coalesce("item_notes", '')), 'C')) stored
|
||||||
|
);
|
||||||
|
|
||||||
|
create index "ftspsql_search_ftsidx" on "ftspsql_search" using GIN ("text_index");
|
||||||
|
create index "ftpsql_search_item_idx" on "ftspsql_search"("item_id");
|
||||||
|
create index "ftpsql_search_attach_idx" on "ftspsql_search"("attach_id");
|
||||||
|
create index "ftpsql_search_folder_idx" on "ftspsql_search"("folder_id");
|
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
|
||||||
|
import org.flywaydb.core.Flyway
|
||||||
|
import org.flywaydb.core.api.output.MigrateResult
|
||||||
|
|
||||||
|
final class DbMigration[F[_]: Sync](cfg: PsqlConfig) {
|
||||||
|
private[this] val logger = docspell.logging.getLogger[F]
|
||||||
|
private val location: String = "classpath:db/psqlfts"
|
||||||
|
|
||||||
|
def run: F[MigrateResult] =
|
||||||
|
for {
|
||||||
|
fw <- createFlyway
|
||||||
|
_ <- logger.info(s"Running FTS migrations")
|
||||||
|
result <- Sync[F].blocking(fw.migrate())
|
||||||
|
} yield result
|
||||||
|
|
||||||
|
def createFlyway: F[Flyway] =
|
||||||
|
for {
|
||||||
|
_ <- logger.info(s"Creating Flyway for: $location")
|
||||||
|
fw = Flyway
|
||||||
|
.configure()
|
||||||
|
.table("flyway_fts_history")
|
||||||
|
.cleanDisabled(true)
|
||||||
|
.dataSource(cfg.url.asString, cfg.user, cfg.password.pass)
|
||||||
|
.locations(location)
|
||||||
|
.baselineOnMigrate(true)
|
||||||
|
.load()
|
||||||
|
} yield fw
|
||||||
|
}
|
||||||
|
|
||||||
|
object DbMigration {
|
||||||
|
def apply[F[_]: Sync](cfg: PsqlConfig): DbMigration[F] =
|
||||||
|
new DbMigration[F](cfg)
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
import doobie._
|
||||||
|
import doobie.util.log.Success
|
||||||
|
|
||||||
|
trait DoobieMeta {
|
||||||
|
|
||||||
|
implicit val sqlLogging: LogHandler = LogHandler {
|
||||||
|
case e @ Success(_, _, _, _) =>
|
||||||
|
DoobieMeta.logger.debug("SQL " + e)
|
||||||
|
case e =>
|
||||||
|
DoobieMeta.logger.error(s"SQL Failure: $e")
|
||||||
|
}
|
||||||
|
|
||||||
|
implicit val metaIdent: Meta[Ident] =
|
||||||
|
Meta[String].timap(Ident.unsafe)(_.id)
|
||||||
|
|
||||||
|
implicit val metaLanguage: Meta[Language] =
|
||||||
|
Meta[String].timap(Language.unsafe)(_.iso3)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
object DoobieMeta {
|
||||||
|
private val logger = org.log4s.getLogger
|
||||||
|
}
|
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.syntax.all._
|
||||||
|
|
||||||
|
import docspell.common.{Ident, Language}
|
||||||
|
import docspell.ftsclient.TextData
|
||||||
|
|
||||||
|
final case class FtsRecord(
|
||||||
|
id: Ident,
|
||||||
|
itemId: Ident,
|
||||||
|
collective: Ident,
|
||||||
|
language: Language,
|
||||||
|
attachId: Option[Ident],
|
||||||
|
folderId: Option[Ident],
|
||||||
|
attachName: Option[String],
|
||||||
|
attachContent: Option[String],
|
||||||
|
itemName: Option[String],
|
||||||
|
itemNotes: Option[String]
|
||||||
|
)
|
||||||
|
|
||||||
|
object FtsRecord {
|
||||||
|
def fromTextData(td: TextData): FtsRecord =
|
||||||
|
td match {
|
||||||
|
case TextData.Attachment(
|
||||||
|
item,
|
||||||
|
attachId,
|
||||||
|
collective,
|
||||||
|
folder,
|
||||||
|
language,
|
||||||
|
name,
|
||||||
|
text
|
||||||
|
) =>
|
||||||
|
FtsRecord(
|
||||||
|
td.id,
|
||||||
|
item,
|
||||||
|
collective,
|
||||||
|
language,
|
||||||
|
attachId.some,
|
||||||
|
folder,
|
||||||
|
name,
|
||||||
|
text,
|
||||||
|
None,
|
||||||
|
None
|
||||||
|
)
|
||||||
|
case TextData.Item(item, collective, folder, name, notes, language) =>
|
||||||
|
FtsRecord(
|
||||||
|
td.id,
|
||||||
|
item,
|
||||||
|
collective,
|
||||||
|
language,
|
||||||
|
None,
|
||||||
|
folder,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
name,
|
||||||
|
notes
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,197 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
import fs2.Chunk
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.ftsclient.FtsQuery
|
||||||
|
|
||||||
|
import doobie._
|
||||||
|
import doobie.implicits._
|
||||||
|
|
||||||
|
object FtsRepository extends DoobieMeta {
|
||||||
|
val table = fr"ftspsql_search"
|
||||||
|
|
||||||
|
def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
|
||||||
|
q: FtsQuery
|
||||||
|
): ConnectionIO[SearchSummary] = {
|
||||||
|
val selectRank = mkSelectRank(rn)
|
||||||
|
val query = mkQueryPart(pq, q)
|
||||||
|
|
||||||
|
sql"""select count(id), coalesce(max($selectRank), 0)
|
||||||
|
|from $table, $query
|
||||||
|
|where ${mkCondition(q)} AND query @@ text_index
|
||||||
|
|""".stripMargin
|
||||||
|
.query[SearchSummary]
|
||||||
|
.unique
|
||||||
|
}
|
||||||
|
|
||||||
|
def search(pq: PgQueryParser, rn: RankNormalization)(
|
||||||
|
q: FtsQuery,
|
||||||
|
withHighlighting: Boolean
|
||||||
|
): ConnectionIO[Vector[SearchResult]] = {
|
||||||
|
val selectRank = mkSelectRank(rn)
|
||||||
|
|
||||||
|
val hlOption =
|
||||||
|
s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
|
||||||
|
|
||||||
|
val selectHl =
|
||||||
|
if (!withHighlighting) fr"null as highlight"
|
||||||
|
else
|
||||||
|
fr"""ts_headline(
|
||||||
|
| fts_config,
|
||||||
|
| coalesce(attach_name, '') ||
|
||||||
|
| ' ' || coalesce(attach_content, '') ||
|
||||||
|
| ' ' || coalesce(item_name, '') ||
|
||||||
|
| ' ' || coalesce(item_notes, ''), query, $hlOption) as highlight""".stripMargin
|
||||||
|
|
||||||
|
val select =
|
||||||
|
fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
|
||||||
|
|
||||||
|
val query = mkQueryPart(pq, q)
|
||||||
|
|
||||||
|
sql"""select $select
|
||||||
|
|from $table, $query
|
||||||
|
|where ${mkCondition(q)} AND query @@ text_index
|
||||||
|
|order by rank desc
|
||||||
|
|limit ${q.limit}
|
||||||
|
|offset ${q.offset}
|
||||||
|
|""".stripMargin
|
||||||
|
.query[SearchResult]
|
||||||
|
.to[Vector]
|
||||||
|
}
|
||||||
|
|
||||||
|
private def mkCondition(q: FtsQuery): Fragment = {
|
||||||
|
val coll = fr"collective = ${q.collective}"
|
||||||
|
val items =
|
||||||
|
NonEmptyList.fromList(q.items.toList).map { nel =>
|
||||||
|
val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
|
||||||
|
fr"item_id in ($ids)"
|
||||||
|
}
|
||||||
|
|
||||||
|
val folders =
|
||||||
|
NonEmptyList.fromList(q.folders.toList).map { nel =>
|
||||||
|
val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
|
||||||
|
fr"folder_id in ($ids)"
|
||||||
|
}
|
||||||
|
|
||||||
|
List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
|
||||||
|
val fname = Fragment.const(p.name)
|
||||||
|
fr"$fname(fts_config, ${q.q}) query"
|
||||||
|
}
|
||||||
|
|
||||||
|
private def mkSelectRank(rn: RankNormalization): Fragment = {
|
||||||
|
val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
|
||||||
|
fr"ts_rank_cd(text_index, query, $bits)"
|
||||||
|
}
|
||||||
|
|
||||||
|
def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||||
|
r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
|
||||||
|
|
||||||
|
def replace(
|
||||||
|
pgConfig: Language => String
|
||||||
|
)(r: FtsRecord): ConnectionIO[Int] =
|
||||||
|
(fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
|
||||||
|
commas(
|
||||||
|
sql"${r.id}",
|
||||||
|
sql"${r.itemId}",
|
||||||
|
sql"${r.collective}",
|
||||||
|
sql"${r.language}",
|
||||||
|
sql"${r.attachId}",
|
||||||
|
sql"${r.folderId}",
|
||||||
|
sql"${r.attachName}",
|
||||||
|
sql"${r.attachContent}",
|
||||||
|
sql"${r.itemName}",
|
||||||
|
sql"${r.itemNotes}",
|
||||||
|
sql"${pgConfig(r.language)}::regconfig"
|
||||||
|
) ++ sql") on conflict (id) do update set " ++ commas(
|
||||||
|
sql"lang = ${r.language}",
|
||||||
|
sql"folder_id = ${r.folderId}",
|
||||||
|
sql"attach_name = ${r.attachName}",
|
||||||
|
sql"attach_content = ${r.attachContent}",
|
||||||
|
sql"item_name = ${r.itemName}",
|
||||||
|
sql"item_notes = ${r.itemNotes}",
|
||||||
|
sql"fts_config = ${pgConfig(r.language)}::regconfig"
|
||||||
|
)).update.run
|
||||||
|
|
||||||
|
def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
|
||||||
|
(fr"UPDATE $table SET" ++ commas(
|
||||||
|
sql"lang = ${r.language}",
|
||||||
|
sql"folder_id = ${r.folderId}",
|
||||||
|
sql"attach_name = ${r.attachName}",
|
||||||
|
sql"attach_content = ${r.attachContent}",
|
||||||
|
sql"item_name = ${r.itemName}",
|
||||||
|
sql"item_notes = ${r.itemNotes}",
|
||||||
|
sql"fts_config = ${pgConfig(r.language)}::regconfig"
|
||||||
|
) ++ fr"WHERE id = ${r.id}").update.run
|
||||||
|
|
||||||
|
def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||||
|
r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
|
||||||
|
|
||||||
|
def updateFolder(
|
||||||
|
itemId: Ident,
|
||||||
|
collective: Ident,
|
||||||
|
folder: Option[Ident]
|
||||||
|
): ConnectionIO[Int] =
|
||||||
|
(sql"UPDATE $table" ++
|
||||||
|
fr"SET folder_id = $folder" ++
|
||||||
|
fr"WHERE item_id = $itemId AND collective = $collective").update.run
|
||||||
|
|
||||||
|
def deleteByItemId(itemId: Ident): ConnectionIO[Int] =
|
||||||
|
sql"DELETE FROM $table WHERE item_id = $itemId".update.run
|
||||||
|
|
||||||
|
def deleteByAttachId(attachId: Ident): ConnectionIO[Int] =
|
||||||
|
sql"DELETE FROM $table WHERE attach_id = $attachId".update.run
|
||||||
|
|
||||||
|
def deleteAll: ConnectionIO[Int] =
|
||||||
|
sql"DELETE FROM $table".update.run
|
||||||
|
|
||||||
|
def delete(collective: Ident): ConnectionIO[Int] =
|
||||||
|
sql"DELETE FROM $table WHERE collective = $collective".update.run
|
||||||
|
|
||||||
|
def resetAll: ConnectionIO[Int] = {
|
||||||
|
val dropFlyway = sql"DROP TABLE IF EXISTS flyway_fts_history".update.run
|
||||||
|
val dropSearch = sql"DROP TABLE IF EXISTS $table".update.run
|
||||||
|
for {
|
||||||
|
a <- dropFlyway
|
||||||
|
b <- dropSearch
|
||||||
|
} yield a + b
|
||||||
|
}
|
||||||
|
|
||||||
|
private def commas(fr: Fragment, frn: Fragment*): Fragment =
|
||||||
|
frn.foldLeft(fr)(_ ++ fr"," ++ _)
|
||||||
|
|
||||||
|
def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
|
||||||
|
select.applyOrElse(language, defaultPgConfig)
|
||||||
|
|
||||||
|
def defaultPgConfig(language: Language): String =
|
||||||
|
language match {
|
||||||
|
case Language.English => "english"
|
||||||
|
case Language.German => "german"
|
||||||
|
case Language.French => "french"
|
||||||
|
case Language.Italian => "italian"
|
||||||
|
case Language.Spanish => "spanish"
|
||||||
|
case Language.Hungarian => "hungarian"
|
||||||
|
case Language.Portuguese => "portuguese"
|
||||||
|
case Language.Danish => "danish"
|
||||||
|
case Language.Finnish => "finnish"
|
||||||
|
case Language.Norwegian => "norwegian"
|
||||||
|
case Language.Swedish => "swedish"
|
||||||
|
case Language.Russian => "russian"
|
||||||
|
case Language.Romanian => "romanian"
|
||||||
|
case Language.Dutch => "dutch"
|
||||||
|
case Language.Czech => "simple"
|
||||||
|
case Language.Latvian => "simple"
|
||||||
|
case Language.Japanese => "simple"
|
||||||
|
case Language.Hebrew => "simple"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
|
||||||
|
sealed trait PgQueryParser {
|
||||||
|
def name: String
|
||||||
|
}
|
||||||
|
|
||||||
|
object PgQueryParser {
|
||||||
|
|
||||||
|
case object ToTsQuery extends PgQueryParser {
|
||||||
|
val name = "to_tsquery"
|
||||||
|
}
|
||||||
|
case object Plain extends PgQueryParser {
|
||||||
|
val name = "plainto_tsquery"
|
||||||
|
}
|
||||||
|
case object Phrase extends PgQueryParser {
|
||||||
|
val name = "phraseto_tsquery"
|
||||||
|
}
|
||||||
|
case object Websearch extends PgQueryParser {
|
||||||
|
val name = "websearch_to_tsquery"
|
||||||
|
}
|
||||||
|
|
||||||
|
val all: NonEmptyList[PgQueryParser] =
|
||||||
|
NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
|
||||||
|
|
||||||
|
def fromName(name: String): Either[String, PgQueryParser] =
|
||||||
|
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
|
||||||
|
|
||||||
|
def unsafeFromName(name: String): PgQueryParser =
|
||||||
|
fromName(name).fold(sys.error, identity)
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
final case class PsqlConfig(
|
||||||
|
url: LenientUri,
|
||||||
|
user: String,
|
||||||
|
password: Password,
|
||||||
|
pgConfigSelect: PartialFunction[Language, String],
|
||||||
|
pgQueryParser: PgQueryParser,
|
||||||
|
rankNormalization: RankNormalization
|
||||||
|
)
|
||||||
|
|
||||||
|
object PsqlConfig {
|
||||||
|
|
||||||
|
def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
|
||||||
|
PsqlConfig(
|
||||||
|
url,
|
||||||
|
user,
|
||||||
|
password,
|
||||||
|
PartialFunction.empty,
|
||||||
|
PgQueryParser.Websearch,
|
||||||
|
RankNormalization.Mhd && RankNormalization.Scale
|
||||||
|
)
|
||||||
|
}
|
@ -0,0 +1,155 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import scala.concurrent.ExecutionContext
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
import fs2.Stream
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.ftsclient._
|
||||||
|
import docspell.logging.Logger
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariDataSource
|
||||||
|
import doobie._
|
||||||
|
import doobie.hikari.HikariTransactor
|
||||||
|
import doobie.implicits._
|
||||||
|
|
||||||
|
final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
|
||||||
|
extends FtsClient[F] {
|
||||||
|
val engine = Ident.unsafe("postgres")
|
||||||
|
|
||||||
|
val config = cfg
|
||||||
|
private[ftspsql] val transactor = xa
|
||||||
|
|
||||||
|
private[this] val searchSummary =
|
||||||
|
FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
|
||||||
|
private[this] val search =
|
||||||
|
FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
|
||||||
|
|
||||||
|
private[this] val replaceChunk =
|
||||||
|
FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
|
||||||
|
private[this] val updateChunk =
|
||||||
|
FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
|
||||||
|
|
||||||
|
def initialize: F[List[FtsMigration[F]]] =
|
||||||
|
Sync[F].pure(
|
||||||
|
List(
|
||||||
|
FtsMigration(
|
||||||
|
0,
|
||||||
|
engine,
|
||||||
|
"initialize",
|
||||||
|
DbMigration[F](cfg).run.as(FtsMigration.Result.WorkDone)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def initializeNew: List[FtsMigration[F]] =
|
||||||
|
List(
|
||||||
|
FtsMigration(
|
||||||
|
10,
|
||||||
|
engine,
|
||||||
|
"reset",
|
||||||
|
FtsRepository.resetAll.transact(xa).as(FtsMigration.Result.workDone)
|
||||||
|
),
|
||||||
|
FtsMigration(
|
||||||
|
20,
|
||||||
|
engine,
|
||||||
|
"schema",
|
||||||
|
DbMigration[F](cfg).run.as(FtsMigration.Result.workDone)
|
||||||
|
),
|
||||||
|
FtsMigration(20, engine, "index all", FtsMigration.Result.indexAll.pure[F])
|
||||||
|
)
|
||||||
|
|
||||||
|
def search(q: FtsQuery): F[FtsResult] =
|
||||||
|
for {
|
||||||
|
startNanos <- Sync[F].delay(System.nanoTime())
|
||||||
|
summary <- searchSummary(q).transact(xa)
|
||||||
|
results <- search(q, true).transact(xa)
|
||||||
|
endNanos <- Sync[F].delay(System.nanoTime())
|
||||||
|
duration = Duration.nanos(endNanos - startNanos)
|
||||||
|
res = SearchResult
|
||||||
|
.toFtsResult(summary, results)
|
||||||
|
.copy(qtime = duration)
|
||||||
|
} yield res
|
||||||
|
|
||||||
|
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
|
||||||
|
data
|
||||||
|
.map(FtsRecord.fromTextData)
|
||||||
|
.chunkN(50)
|
||||||
|
.evalMap(chunk =>
|
||||||
|
logger.debug(s"Add to fts index ${chunk.size} records") *>
|
||||||
|
replaceChunk(chunk).transact(xa)
|
||||||
|
)
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
|
||||||
|
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
|
||||||
|
data
|
||||||
|
.map(FtsRecord.fromTextData)
|
||||||
|
.chunkN(50)
|
||||||
|
.evalMap(chunk =>
|
||||||
|
logger.debug(s"Update fts index with ${chunk.size} records") *>
|
||||||
|
updateChunk(chunk).transact(xa)
|
||||||
|
)
|
||||||
|
.compile
|
||||||
|
.drain
|
||||||
|
|
||||||
|
def updateFolder(
|
||||||
|
logger: Logger[F],
|
||||||
|
itemId: Ident,
|
||||||
|
collective: Ident,
|
||||||
|
folder: Option[Ident]
|
||||||
|
): F[Unit] =
|
||||||
|
logger.debug(s"Update folder '${folder
|
||||||
|
.map(_.id)}' in fts for collective ${collective.id} and item ${itemId.id}") *>
|
||||||
|
FtsRepository.updateFolder(itemId, collective, folder).transact(xa).void
|
||||||
|
|
||||||
|
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
|
||||||
|
logger.debug(s"Removing item from fts index: ${itemId.id}") *>
|
||||||
|
FtsRepository.deleteByItemId(itemId).transact(xa).void
|
||||||
|
|
||||||
|
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
|
||||||
|
logger.debug(s"Removing attachment from fts index: ${attachId.id}") *>
|
||||||
|
FtsRepository.deleteByAttachId(attachId).transact(xa).void
|
||||||
|
|
||||||
|
def clearAll(logger: Logger[F]): F[Unit] =
|
||||||
|
logger.info(s"Deleting complete FTS index") *>
|
||||||
|
FtsRepository.deleteAll.transact(xa).void
|
||||||
|
|
||||||
|
def clear(logger: Logger[F], collective: Ident): F[Unit] =
|
||||||
|
logger.info(s"Deleting index for collective ${collective.id}") *>
|
||||||
|
FtsRepository.delete(collective).transact(xa).void
|
||||||
|
}
|
||||||
|
|
||||||
|
object PsqlFtsClient {
|
||||||
|
def apply[F[_]: Async](
|
||||||
|
cfg: PsqlConfig,
|
||||||
|
connectEC: ExecutionContext
|
||||||
|
): Resource[F, PsqlFtsClient[F]] = {
|
||||||
|
val acquire = Sync[F].delay(new HikariDataSource())
|
||||||
|
val free: HikariDataSource => F[Unit] = ds => Sync[F].delay(ds.close())
|
||||||
|
|
||||||
|
for {
|
||||||
|
ds <- Resource.make(acquire)(free)
|
||||||
|
_ = Resource.pure {
|
||||||
|
ds.setJdbcUrl(cfg.url.asString)
|
||||||
|
ds.setUsername(cfg.user)
|
||||||
|
ds.setPassword(cfg.password.pass)
|
||||||
|
ds.setDriverClassName("org.postgresql.Driver")
|
||||||
|
}
|
||||||
|
xa = HikariTransactor[F](ds, connectEC)
|
||||||
|
|
||||||
|
pc = new PsqlFtsClient[F](cfg, xa)
|
||||||
|
} yield pc
|
||||||
|
}
|
||||||
|
|
||||||
|
def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
|
||||||
|
new PsqlFtsClient[F](cfg, xa)
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.Order
|
||||||
|
import cats.data.NonEmptySet
|
||||||
|
|
||||||
|
sealed trait RankNormalization { self =>
|
||||||
|
def value: NonEmptySet[Int]
|
||||||
|
|
||||||
|
def &&(other: RankNormalization): RankNormalization =
|
||||||
|
new RankNormalization { val value = self.value ++ other.value }
|
||||||
|
}
|
||||||
|
|
||||||
|
object RankNormalization {
|
||||||
|
// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||||||
|
|
||||||
|
case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
|
||||||
|
case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
|
||||||
|
case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
|
||||||
|
case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
|
||||||
|
case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
|
||||||
|
case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
|
||||||
|
case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
|
||||||
|
|
||||||
|
def byNumber(n: Int): Either[String, RankNormalization] =
|
||||||
|
all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
|
||||||
|
|
||||||
|
implicit val order: Order[RankNormalization] =
|
||||||
|
Order.by(_.value.reduce)
|
||||||
|
|
||||||
|
val all: NonEmptySet[RankNormalization] =
|
||||||
|
NonEmptySet.of(
|
||||||
|
IgnoreDocLength,
|
||||||
|
LogDocLength,
|
||||||
|
DocLength,
|
||||||
|
Mhd,
|
||||||
|
UniqueWords,
|
||||||
|
LogUniqueWords,
|
||||||
|
Scale
|
||||||
|
)
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.ftsclient.FtsResult
|
||||||
|
import docspell.ftsclient.FtsResult.{ItemMatch, MatchData}
|
||||||
|
|
||||||
|
final case class SearchResult(
|
||||||
|
id: Ident,
|
||||||
|
itemId: Ident,
|
||||||
|
collective: Ident,
|
||||||
|
language: Language,
|
||||||
|
attachId: Option[Ident],
|
||||||
|
folderId: Option[Ident],
|
||||||
|
attachName: Option[String],
|
||||||
|
itemName: Option[String],
|
||||||
|
rank: Double,
|
||||||
|
highlight: Option[String]
|
||||||
|
)
|
||||||
|
|
||||||
|
object SearchResult {
|
||||||
|
|
||||||
|
def toFtsResult(summary: SearchSummary, results: Vector[SearchResult]): FtsResult = {
|
||||||
|
def mkEntry(r: SearchResult): (ItemMatch, (Ident, List[String])) = {
|
||||||
|
def create(md: MatchData) = ItemMatch(r.id, r.itemId, r.collective, r.rank, md)
|
||||||
|
|
||||||
|
val itemMatch =
|
||||||
|
r.attachId match {
|
||||||
|
case Some(aId) =>
|
||||||
|
create(FtsResult.AttachmentData(aId, r.attachName.getOrElse("")))
|
||||||
|
case None =>
|
||||||
|
create(FtsResult.ItemData)
|
||||||
|
}
|
||||||
|
|
||||||
|
(itemMatch, r.id -> r.highlight.toList)
|
||||||
|
}
|
||||||
|
|
||||||
|
val (items, hl) = results.map(mkEntry).unzip
|
||||||
|
|
||||||
|
FtsResult(
|
||||||
|
Duration.zero,
|
||||||
|
summary.count.toInt,
|
||||||
|
summary.maxScore,
|
||||||
|
hl.toMap,
|
||||||
|
items.toList
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
case class SearchSummary(count: Long, maxScore: Double)
|
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.logging.TestLoggingConfig
|
||||||
|
import docspell.logging.{Level, LogConfig}
|
||||||
|
|
||||||
|
import com.dimafeng.testcontainers.PostgreSQLContainer
|
||||||
|
import com.dimafeng.testcontainers.munit.TestContainerForAll
|
||||||
|
import doobie.implicits._
|
||||||
|
import munit.CatsEffectSuite
|
||||||
|
import org.testcontainers.utility.DockerImageName
|
||||||
|
|
||||||
|
class MigrationTest
|
||||||
|
extends CatsEffectSuite
|
||||||
|
with PgFixtures
|
||||||
|
with TestContainerForAll
|
||||||
|
with TestLoggingConfig {
|
||||||
|
override val containerDef: PostgreSQLContainer.Def =
|
||||||
|
PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
|
||||||
|
|
||||||
|
override def docspellLogConfig: LogConfig =
|
||||||
|
LogConfig(Level.Debug, LogConfig.Format.Fancy)
|
||||||
|
|
||||||
|
override def rootMinimumLevel = Level.Warn
|
||||||
|
|
||||||
|
test("create schema") {
|
||||||
|
withContainers { cnt =>
|
||||||
|
val jdbc =
|
||||||
|
PsqlConfig.defaults(
|
||||||
|
LenientUri.unsafe(cnt.jdbcUrl),
|
||||||
|
cnt.username,
|
||||||
|
Password(cnt.password)
|
||||||
|
)
|
||||||
|
|
||||||
|
for {
|
||||||
|
_ <- DbMigration[IO](jdbc).run
|
||||||
|
n <- runQuery(cnt)(
|
||||||
|
sql"SELECT count(*) FROM ${FtsRepository.table}".query[Int].unique
|
||||||
|
)
|
||||||
|
_ = assertEquals(n, 0)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import javax.sql.DataSource
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.syntax.all._
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
import docspell.ftsclient.TextData
|
||||||
|
import docspell.store.{JdbcConfig, StoreFixture}
|
||||||
|
|
||||||
|
import com.dimafeng.testcontainers.PostgreSQLContainer
|
||||||
|
import doobie._
|
||||||
|
import doobie.implicits._
|
||||||
|
|
||||||
|
trait PgFixtures {
|
||||||
|
def ident(n: String): Ident = Ident.unsafe(n)
|
||||||
|
|
||||||
|
def psqlConfig(cnt: PostgreSQLContainer): PsqlConfig =
|
||||||
|
PsqlConfig.defaults(
|
||||||
|
LenientUri.unsafe(cnt.jdbcUrl),
|
||||||
|
cnt.username,
|
||||||
|
Password(cnt.password)
|
||||||
|
)
|
||||||
|
|
||||||
|
def jdbcConfig(cnt: PostgreSQLContainer): JdbcConfig =
|
||||||
|
JdbcConfig(LenientUri.unsafe(cnt.jdbcUrl), cnt.username, cnt.password)
|
||||||
|
|
||||||
|
def dataSource(cnt: PostgreSQLContainer): Resource[IO, DataSource] =
|
||||||
|
StoreFixture.dataSource(jdbcConfig(cnt))
|
||||||
|
|
||||||
|
def transactor(cnt: PostgreSQLContainer): Resource[IO, Transactor[IO]] =
|
||||||
|
dataSource(cnt).flatMap(StoreFixture.makeXA)
|
||||||
|
|
||||||
|
def psqlFtsClient(cnt: PostgreSQLContainer): Resource[IO, PsqlFtsClient[IO]] =
|
||||||
|
transactor(cnt)
|
||||||
|
.map(xa => PsqlFtsClient.fromTransactor(psqlConfig(cnt), xa))
|
||||||
|
.evalTap(client => DbMigration[IO](client.config).run)
|
||||||
|
|
||||||
|
def runQuery[A](cnt: PostgreSQLContainer)(q: ConnectionIO[A]): IO[A] =
|
||||||
|
transactor(cnt).use(q.transact(_))
|
||||||
|
|
||||||
|
implicit class QueryOps[A](self: ConnectionIO[A]) {
|
||||||
|
def exec(implicit client: PsqlFtsClient[IO]): IO[A] =
|
||||||
|
self.transact(client.transactor)
|
||||||
|
}
|
||||||
|
|
||||||
|
val collective1 = ident("coll1")
|
||||||
|
val collective2 = ident("coll2")
|
||||||
|
|
||||||
|
val itemData: TextData.Item =
|
||||||
|
TextData.Item(
|
||||||
|
item = ident("item-id-1"),
|
||||||
|
collective = collective1,
|
||||||
|
folder = None,
|
||||||
|
name = "mydoc.pdf".some,
|
||||||
|
notes = Some("my notes are these"),
|
||||||
|
language = Language.English
|
||||||
|
)
|
||||||
|
|
||||||
|
val attachData: TextData.Attachment =
|
||||||
|
TextData.Attachment(
|
||||||
|
item = ident("item-id-1"),
|
||||||
|
attachId = ident("attach-id-1"),
|
||||||
|
collective = collective1,
|
||||||
|
folder = None,
|
||||||
|
language = Language.English,
|
||||||
|
name = "mydoc.pdf".some,
|
||||||
|
text = "lorem ipsum dolores est".some
|
||||||
|
)
|
||||||
|
}
|
@ -0,0 +1,151 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 Eike K. & Contributors
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
package docspell.ftspsql
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.syntax.all._
|
||||||
|
|
||||||
|
import docspell.ftsclient.{FtsQuery, TextData}
|
||||||
|
import docspell.logging.{Level, LogConfig, TestLoggingConfig}
|
||||||
|
|
||||||
|
import com.dimafeng.testcontainers.PostgreSQLContainer
|
||||||
|
import com.dimafeng.testcontainers.munit.TestContainerForAll
|
||||||
|
import doobie.implicits._
|
||||||
|
import munit.CatsEffectSuite
|
||||||
|
import org.testcontainers.utility.DockerImageName
|
||||||
|
|
||||||
|
class PsqlFtsClientTest
|
||||||
|
extends CatsEffectSuite
|
||||||
|
with PgFixtures
|
||||||
|
with TestContainerForAll
|
||||||
|
with TestLoggingConfig {
|
||||||
|
override val containerDef: PostgreSQLContainer.Def =
|
||||||
|
PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
|
||||||
|
|
||||||
|
val logger = docspell.logging.getLogger[IO]
|
||||||
|
|
||||||
|
private val table = FtsRepository.table
|
||||||
|
|
||||||
|
override def docspellLogConfig: LogConfig =
|
||||||
|
LogConfig(Level.Debug, LogConfig.Format.Fancy)
|
||||||
|
|
||||||
|
override def rootMinimumLevel = Level.Warn
|
||||||
|
|
||||||
|
test("insert data into index") {
|
||||||
|
withContainers { cnt =>
|
||||||
|
psqlFtsClient(cnt).use { implicit client =>
|
||||||
|
def assertions(id: TextData.Item, ad: TextData.Attachment) =
|
||||||
|
for {
|
||||||
|
n <- sql"SELECT count(*) from $table".query[Int].unique.exec
|
||||||
|
_ = assertEquals(n, 2)
|
||||||
|
itemStored <-
|
||||||
|
sql"select item_name, item_notes from $table WHERE id = ${id.id}"
|
||||||
|
.query[(Option[String], Option[String])]
|
||||||
|
.unique
|
||||||
|
.exec
|
||||||
|
_ = assertEquals(itemStored, (id.name, id.notes))
|
||||||
|
attachStored <-
|
||||||
|
sql"select attach_name, attach_content from $table where id = ${ad.id}"
|
||||||
|
.query[(Option[String], Option[String])]
|
||||||
|
.unique
|
||||||
|
.exec
|
||||||
|
_ = assertEquals(attachStored, (ad.name, ad.text))
|
||||||
|
} yield ()
|
||||||
|
|
||||||
|
for {
|
||||||
|
_ <- client.indexData(logger, itemData, attachData)
|
||||||
|
_ <- assertions(itemData, attachData)
|
||||||
|
_ <- client.indexData(logger, itemData, attachData)
|
||||||
|
_ <- assertions(itemData, attachData)
|
||||||
|
|
||||||
|
_ <- client.indexData(
|
||||||
|
logger,
|
||||||
|
itemData.copy(notes = None),
|
||||||
|
attachData.copy(name = "ha.pdf".some)
|
||||||
|
)
|
||||||
|
_ <- assertions(
|
||||||
|
itemData.copy(notes = None),
|
||||||
|
attachData.copy(name = "ha.pdf".some)
|
||||||
|
)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("clear index") {
|
||||||
|
withContainers { cnt =>
|
||||||
|
psqlFtsClient(cnt).use { implicit client =>
|
||||||
|
for {
|
||||||
|
_ <- client.indexData(logger, itemData, attachData)
|
||||||
|
_ <- client.clearAll(logger)
|
||||||
|
n <- sql"select count(*) from $table".query[Int].unique.exec
|
||||||
|
_ = assertEquals(n, 0)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("clear index by collective") {
|
||||||
|
withContainers { cnt =>
|
||||||
|
psqlFtsClient(cnt).use { implicit client =>
|
||||||
|
for {
|
||||||
|
_ <- client.indexData(
|
||||||
|
logger,
|
||||||
|
itemData,
|
||||||
|
attachData,
|
||||||
|
itemData.copy(collective = collective2, item = ident("item-id-2")),
|
||||||
|
attachData.copy(collective = collective2, item = ident("item-id-2"))
|
||||||
|
)
|
||||||
|
n <- sql"select count(*) from $table".query[Int].unique.exec
|
||||||
|
_ = assertEquals(n, 4)
|
||||||
|
|
||||||
|
_ <- client.clear(logger, collective1)
|
||||||
|
n <- sql"select count(*) from $table".query[Int].unique.exec
|
||||||
|
_ = assertEquals(n, 2)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("search by query") {
|
||||||
|
def query(s: String): FtsQuery =
|
||||||
|
FtsQuery(
|
||||||
|
q = s,
|
||||||
|
collective = collective1,
|
||||||
|
items = Set.empty,
|
||||||
|
folders = Set.empty,
|
||||||
|
limit = 10,
|
||||||
|
offset = 0,
|
||||||
|
highlight = FtsQuery.HighlightSetting.default
|
||||||
|
)
|
||||||
|
|
||||||
|
withContainers { cnt =>
|
||||||
|
psqlFtsClient(cnt).use { implicit client =>
|
||||||
|
for {
|
||||||
|
_ <- client.indexData(
|
||||||
|
logger,
|
||||||
|
itemData,
|
||||||
|
attachData,
|
||||||
|
itemData.copy(collective = collective2, item = ident("item-id-2")),
|
||||||
|
attachData.copy(collective = collective2, item = ident("item-id-2"))
|
||||||
|
)
|
||||||
|
|
||||||
|
res0 <- client.search(query("lorem uiaeduiae"))
|
||||||
|
_ = assertEquals(res0.count, 0)
|
||||||
|
|
||||||
|
res1 <- client.search(query("lorem"))
|
||||||
|
_ = assertEquals(res1.count, 1)
|
||||||
|
_ = assertEquals(res1.results.head.id, attachData.id)
|
||||||
|
|
||||||
|
res2 <- client.search(query("note"))
|
||||||
|
_ = assertEquals(res2.count, 1)
|
||||||
|
_ = assertEquals(res2.results.head.id, itemData.id)
|
||||||
|
} yield ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -22,7 +22,7 @@ trait JsonCodec {
|
|||||||
new Encoder[TextData.Attachment] {
|
new Encoder[TextData.Attachment] {
|
||||||
final def apply(td: TextData.Attachment): Json = {
|
final def apply(td: TextData.Attachment): Json = {
|
||||||
val cnt =
|
val cnt =
|
||||||
(Field.contentField(td.lang).name, Json.fromString(td.text.getOrElse("")))
|
(Field.contentField(td.language).name, Json.fromString(td.text.getOrElse("")))
|
||||||
|
|
||||||
Json.fromFields(
|
Json.fromFields(
|
||||||
cnt :: List(
|
cnt :: List(
|
||||||
@ -165,7 +165,7 @@ trait JsonCodec {
|
|||||||
val setter = List(
|
val setter = List(
|
||||||
td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
|
td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
|
||||||
td.text.map(txt =>
|
td.text.map(txt =>
|
||||||
(Field.contentField(td.lang).name, Map("set" -> txt.asJson).asJson)
|
(Field.contentField(td.language).name, Map("set" -> txt.asJson).asJson)
|
||||||
)
|
)
|
||||||
).flatten
|
).flatten
|
||||||
Json.fromFields(
|
Json.fromFields(
|
||||||
|
@ -697,6 +697,9 @@ Docpell Update Check
|
|||||||
# Currently the SOLR search platform is supported.
|
# Currently the SOLR search platform is supported.
|
||||||
enabled = false
|
enabled = false
|
||||||
|
|
||||||
|
# Which backend to use, either solr or postgresql
|
||||||
|
backend = "solr"
|
||||||
|
|
||||||
# Configuration for the SOLR backend.
|
# Configuration for the SOLR backend.
|
||||||
solr = {
|
solr = {
|
||||||
# The URL to solr
|
# The URL to solr
|
||||||
@ -713,6 +716,43 @@ Docpell Update Check
|
|||||||
q-op = "OR"
|
q-op = "OR"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Configuration for PostgreSQL backend
|
||||||
|
postgresql = {
|
||||||
|
# Whether to use the default database, only works if it is
|
||||||
|
# postgresql
|
||||||
|
use-default-connection = false
|
||||||
|
|
||||||
|
# The database connection.
|
||||||
|
jdbc {
|
||||||
|
url = "jdbc:postgresql://server:5432/db"
|
||||||
|
user = "pguser"
|
||||||
|
password = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# A mapping from a language to a postgres text search config. By
|
||||||
|
# default a language is mapped to a predefined config.
|
||||||
|
# PostgreSQL has predefined configs for some languages. This
|
||||||
|
# setting allows to create a custom text search config and
|
||||||
|
# define it here for some or all languages.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# { german = "my-german" }
|
||||||
|
#
|
||||||
|
# See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
|
||||||
|
pg-config = {
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define which query parser to use.
|
||||||
|
#
|
||||||
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
||||||
|
pg-query-parser = "websearch_to_tsquery"
|
||||||
|
|
||||||
|
# Allows to define a normalization for the ranking.
|
||||||
|
#
|
||||||
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||||||
|
pg-rank-normalization = [ 4 ]
|
||||||
|
}
|
||||||
|
|
||||||
# Settings for running the index migration tasks
|
# Settings for running the index migration tasks
|
||||||
migration = {
|
migration = {
|
||||||
# Chunk size to use when indexing data from the database. This
|
# Chunk size to use when indexing data from the database. This
|
||||||
|
@ -13,6 +13,7 @@ import docspell.analysis.TextAnalysisConfig
|
|||||||
import docspell.analysis.classifier.TextClassifierConfig
|
import docspell.analysis.classifier.TextClassifierConfig
|
||||||
import docspell.backend.Config.Files
|
import docspell.backend.Config.Files
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
import docspell.config.{FtsType, PgFtsConfig}
|
||||||
import docspell.convert.ConvertConfig
|
import docspell.convert.ConvertConfig
|
||||||
import docspell.extract.ExtractConfig
|
import docspell.extract.ExtractConfig
|
||||||
import docspell.ftssolr.SolrConfig
|
import docspell.ftssolr.SolrConfig
|
||||||
@ -65,9 +66,25 @@ object Config {
|
|||||||
|
|
||||||
case class FullTextSearch(
|
case class FullTextSearch(
|
||||||
enabled: Boolean,
|
enabled: Boolean,
|
||||||
|
backend: FtsType,
|
||||||
migration: FullTextSearch.Migration,
|
migration: FullTextSearch.Migration,
|
||||||
solr: SolrConfig
|
solr: SolrConfig,
|
||||||
)
|
postgresql: PgFtsConfig
|
||||||
|
) {
|
||||||
|
|
||||||
|
def info: String =
|
||||||
|
if (!enabled) "Disabled."
|
||||||
|
else
|
||||||
|
backend match {
|
||||||
|
case FtsType.Solr =>
|
||||||
|
s"Solr(${solr.url.asString})"
|
||||||
|
case FtsType.PostgreSQL =>
|
||||||
|
if (postgresql.useDefaultConnection)
|
||||||
|
"PostgreSQL(default)"
|
||||||
|
else
|
||||||
|
s"PostgreSQL(${postgresql.jdbc.url.asString})"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
object FullTextSearch {
|
object FullTextSearch {
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ package docspell.joex
|
|||||||
import cats.effect.Async
|
import cats.effect.Async
|
||||||
|
|
||||||
import docspell.config.Implicits._
|
import docspell.config.Implicits._
|
||||||
import docspell.config.{ConfigFactory, Validation}
|
import docspell.config.{ConfigFactory, FtsType, Validation}
|
||||||
import docspell.scheduler.CountingScheme
|
import docspell.scheduler.CountingScheme
|
||||||
|
|
||||||
import emil.MailAddress
|
import emil.MailAddress
|
||||||
@ -53,6 +53,14 @@ object ConfigFile {
|
|||||||
cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
|
cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
|
||||||
"No subject given for enabled update check!"
|
"No subject given for enabled update check!"
|
||||||
),
|
),
|
||||||
Validation(cfg => cfg.files.validate.map(_ => cfg))
|
Validation(cfg => cfg.files.validate.map(_ => cfg)),
|
||||||
|
Validation.failWhen(
|
||||||
|
cfg =>
|
||||||
|
cfg.fullTextSearch.enabled &&
|
||||||
|
cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
|
||||||
|
cfg.fullTextSearch.postgresql.useDefaultConnection &&
|
||||||
|
!cfg.jdbc.dbmsName.contains("postgresql"),
|
||||||
|
s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,8 @@ object JoexAppImpl extends MailAddressCodec {
|
|||||||
termSignal: SignallingRef[F, Boolean],
|
termSignal: SignallingRef[F, Boolean],
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
httpClient: Client[F],
|
httpClient: Client[F],
|
||||||
pubSub: PubSub[F]
|
pubSub: PubSub[F],
|
||||||
|
pools: Pools
|
||||||
): Resource[F, JoexApp[F]] =
|
): Resource[F, JoexApp[F]] =
|
||||||
for {
|
for {
|
||||||
joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
|
joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
|
||||||
@ -120,6 +121,7 @@ object JoexAppImpl extends MailAddressCodec {
|
|||||||
|
|
||||||
tasks <- JoexTasks.resource(
|
tasks <- JoexTasks.resource(
|
||||||
cfg,
|
cfg,
|
||||||
|
pools,
|
||||||
jobStoreModule,
|
jobStoreModule,
|
||||||
httpClient,
|
httpClient,
|
||||||
pubSubT,
|
pubSubT,
|
||||||
|
@ -52,7 +52,7 @@ object JoexServer {
|
|||||||
httpClient
|
httpClient
|
||||||
)(Topics.all.map(_.topic))
|
)(Topics.all.map(_.topic))
|
||||||
|
|
||||||
joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub)
|
joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub, pools)
|
||||||
|
|
||||||
httpApp = Router(
|
httpApp = Router(
|
||||||
"/internal" -> InternalHeader(settings.internalRouteKey) {
|
"/internal" -> InternalHeader(settings.internalRouteKey) {
|
||||||
|
@ -12,7 +12,9 @@ import docspell.analysis.TextAnalyser
|
|||||||
import docspell.backend.fulltext.CreateIndex
|
import docspell.backend.fulltext.CreateIndex
|
||||||
import docspell.backend.ops._
|
import docspell.backend.ops._
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
import docspell.config.FtsType
|
||||||
import docspell.ftsclient.FtsClient
|
import docspell.ftsclient.FtsClient
|
||||||
|
import docspell.ftspsql.PsqlFtsClient
|
||||||
import docspell.ftssolr.SolrFtsClient
|
import docspell.ftssolr.SolrFtsClient
|
||||||
import docspell.joex.analysis.RegexNerFile
|
import docspell.joex.analysis.RegexNerFile
|
||||||
import docspell.joex.emptytrash.EmptyTrashTask
|
import docspell.joex.emptytrash.EmptyTrashTask
|
||||||
@ -211,6 +213,7 @@ object JoexTasks {
|
|||||||
|
|
||||||
def resource[F[_]: Async](
|
def resource[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
|
pools: Pools,
|
||||||
jobStoreModule: JobStoreModuleBuilder.Module[F],
|
jobStoreModule: JobStoreModuleBuilder.Module[F],
|
||||||
httpClient: Client[F],
|
httpClient: Client[F],
|
||||||
pubSub: PubSubT[F],
|
pubSub: PubSubT[F],
|
||||||
@ -221,7 +224,7 @@ object JoexTasks {
|
|||||||
joex <- OJoex(pubSub)
|
joex <- OJoex(pubSub)
|
||||||
store = jobStoreModule.store
|
store = jobStoreModule.store
|
||||||
upload <- OUpload(store, jobStoreModule.jobs)
|
upload <- OUpload(store, jobStoreModule.jobs)
|
||||||
fts <- createFtsClient(cfg)(httpClient)
|
fts <- createFtsClient(cfg, pools, store, httpClient)
|
||||||
createIndex <- CreateIndex.resource(fts, store)
|
createIndex <- CreateIndex.resource(fts, store)
|
||||||
itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs)
|
itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs)
|
||||||
itemSearchOps <- OItemSearch(store)
|
itemSearchOps <- OItemSearch(store)
|
||||||
@ -249,8 +252,24 @@ object JoexTasks {
|
|||||||
)
|
)
|
||||||
|
|
||||||
private def createFtsClient[F[_]: Async](
|
private def createFtsClient[F[_]: Async](
|
||||||
cfg: Config
|
cfg: Config,
|
||||||
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
pools: Pools,
|
||||||
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
store: Store[F],
|
||||||
|
client: Client[F]
|
||||||
|
): Resource[F, FtsClient[F]] =
|
||||||
|
if (cfg.fullTextSearch.enabled)
|
||||||
|
cfg.fullTextSearch.backend match {
|
||||||
|
case FtsType.Solr =>
|
||||||
|
SolrFtsClient(cfg.fullTextSearch.solr, client)
|
||||||
|
|
||||||
|
case FtsType.PostgreSQL =>
|
||||||
|
val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.jdbc)
|
||||||
|
if (cfg.fullTextSearch.postgresql.useDefaultConnection)
|
||||||
|
Resource.pure[F, FtsClient[F]](
|
||||||
|
new PsqlFtsClient[F](psqlCfg, store.transactor)
|
||||||
|
)
|
||||||
|
else
|
||||||
|
PsqlFtsClient(psqlCfg, pools.connectEC)
|
||||||
|
}
|
||||||
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
|
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,7 @@ object Main extends IOApp {
|
|||||||
Option(System.getProperty("config.file")),
|
Option(System.getProperty("config.file")),
|
||||||
cfg.appId,
|
cfg.appId,
|
||||||
cfg.baseUrl,
|
cfg.baseUrl,
|
||||||
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
|
Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
|
||||||
cfg.files.defaultStoreConfig
|
cfg.files.defaultStoreConfig
|
||||||
)
|
)
|
||||||
_ <- logger.info(s"\n${banner.render("***>")}")
|
_ <- logger.info(s"\n${banner.render("***>")}")
|
||||||
|
@ -47,9 +47,10 @@ object TextExtraction {
|
|||||||
ctx.args.meta.collective,
|
ctx.args.meta.collective,
|
||||||
ctx.args.meta.folderId,
|
ctx.args.meta.folderId,
|
||||||
item.item.name.some,
|
item.item.name.some,
|
||||||
None
|
None,
|
||||||
|
ctx.args.meta.language
|
||||||
)
|
)
|
||||||
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)).toSeq: _*)
|
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)): _*)
|
||||||
dur <- start
|
dur <- start
|
||||||
extractedTags = txt.flatMap(_.tags).distinct.toList
|
extractedTags = txt.flatMap(_.tags).distinct.toList
|
||||||
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.")
|
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.")
|
||||||
|
@ -289,6 +289,9 @@ docspell.server {
|
|||||||
# Currently the SOLR search platform is supported.
|
# Currently the SOLR search platform is supported.
|
||||||
enabled = false
|
enabled = false
|
||||||
|
|
||||||
|
# Which backend to use, either solr or postgresql
|
||||||
|
backend = "solr"
|
||||||
|
|
||||||
# Configuration for the SOLR backend.
|
# Configuration for the SOLR backend.
|
||||||
solr = {
|
solr = {
|
||||||
# The URL to solr
|
# The URL to solr
|
||||||
@ -304,6 +307,43 @@ docspell.server {
|
|||||||
# The default combiner for tokens. One of {AND, OR}.
|
# The default combiner for tokens. One of {AND, OR}.
|
||||||
q-op = "OR"
|
q-op = "OR"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Configuration for PostgreSQL backend
|
||||||
|
postgresql = {
|
||||||
|
# Whether to use the default database, only works if it is
|
||||||
|
# postgresql
|
||||||
|
use-default-connection = false
|
||||||
|
|
||||||
|
# The database connection.
|
||||||
|
jdbc {
|
||||||
|
url = "jdbc:postgresql://server:5432/db"
|
||||||
|
user = "pguser"
|
||||||
|
password = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
# A mapping from a language to a postgres text search config. By
|
||||||
|
# default a language is mapped to a predefined config.
|
||||||
|
# PostgreSQL has predefined configs for some languages. This
|
||||||
|
# setting allows to create a custom text search config and
|
||||||
|
# define it here for some or all languages.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
# { german = "my-german" }
|
||||||
|
#
|
||||||
|
# See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
|
||||||
|
pg-config = {
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define which query parser to use.
|
||||||
|
#
|
||||||
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
||||||
|
pg-query-parser = "websearch_to_tsquery"
|
||||||
|
|
||||||
|
# Allows to define a normalization for the ranking.
|
||||||
|
#
|
||||||
|
# https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||||||
|
pg-rank-normalization = [ 4 ]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Configuration for the backend.
|
# Configuration for the backend.
|
||||||
|
@ -9,6 +9,7 @@ package docspell.restserver
|
|||||||
import docspell.backend.auth.Login
|
import docspell.backend.auth.Login
|
||||||
import docspell.backend.{Config => BackendConfig}
|
import docspell.backend.{Config => BackendConfig}
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
|
import docspell.config.{FtsType, PgFtsConfig}
|
||||||
import docspell.ftssolr.SolrConfig
|
import docspell.ftssolr.SolrConfig
|
||||||
import docspell.logging.LogConfig
|
import docspell.logging.LogConfig
|
||||||
import docspell.oidc.ProviderConfig
|
import docspell.oidc.ProviderConfig
|
||||||
@ -92,7 +93,26 @@ object Config {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
|
case class FullTextSearch(
|
||||||
|
enabled: Boolean,
|
||||||
|
backend: FtsType,
|
||||||
|
solr: SolrConfig,
|
||||||
|
postgresql: PgFtsConfig
|
||||||
|
) {
|
||||||
|
|
||||||
|
def info: String =
|
||||||
|
if (!enabled) "Disabled."
|
||||||
|
else
|
||||||
|
backend match {
|
||||||
|
case FtsType.Solr =>
|
||||||
|
s"Solr(${solr.url.asString})"
|
||||||
|
case FtsType.PostgreSQL =>
|
||||||
|
if (postgresql.useDefaultConnection)
|
||||||
|
"PostgreSQL(default)"
|
||||||
|
else
|
||||||
|
s"PostgreSQL(${postgresql.jdbc.url.asString})"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
object FullTextSearch {}
|
object FullTextSearch {}
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ import cats.effect.Async
|
|||||||
|
|
||||||
import docspell.backend.signup.{Config => SignupConfig}
|
import docspell.backend.signup.{Config => SignupConfig}
|
||||||
import docspell.config.Implicits._
|
import docspell.config.Implicits._
|
||||||
import docspell.config.{ConfigFactory, Validation}
|
import docspell.config.{ConfigFactory, FtsType, Validation}
|
||||||
import docspell.oidc.{ProviderConfig, SignatureAlgo}
|
import docspell.oidc.{ProviderConfig, SignatureAlgo}
|
||||||
import docspell.restserver.auth.OpenId
|
import docspell.restserver.auth.OpenId
|
||||||
|
|
||||||
@ -106,4 +106,15 @@ object ConfigFile {
|
|||||||
|
|
||||||
def filesValidate: Validation[Config] =
|
def filesValidate: Validation[Config] =
|
||||||
Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
|
Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
|
||||||
|
|
||||||
|
def postgresFtsValidate: Validation[Config] =
|
||||||
|
Validation.failWhen(
|
||||||
|
cfg =>
|
||||||
|
cfg.fullTextSearch.enabled &&
|
||||||
|
cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
|
||||||
|
cfg.fullTextSearch.postgresql.useDefaultConnection &&
|
||||||
|
!cfg.backend.jdbc.dbmsName.contains("postgresql"),
|
||||||
|
s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
|
||||||
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ object Main extends IOApp {
|
|||||||
Option(System.getProperty("config.file")),
|
Option(System.getProperty("config.file")),
|
||||||
cfg.appId,
|
cfg.appId,
|
||||||
cfg.baseUrl,
|
cfg.baseUrl,
|
||||||
Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
|
Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
|
||||||
cfg.backend.files.defaultStoreConfig
|
cfg.backend.files.defaultStoreConfig
|
||||||
)
|
)
|
||||||
_ <- logger.info(s"\n${banner.render("***>")}")
|
_ <- logger.info(s"\n${banner.render("***>")}")
|
||||||
|
@ -12,7 +12,10 @@ import fs2.concurrent.Topic
|
|||||||
|
|
||||||
import docspell.backend.BackendApp
|
import docspell.backend.BackendApp
|
||||||
import docspell.backend.auth.{AuthToken, ShareToken}
|
import docspell.backend.auth.{AuthToken, ShareToken}
|
||||||
|
import docspell.common.Pools
|
||||||
|
import docspell.config.FtsType
|
||||||
import docspell.ftsclient.FtsClient
|
import docspell.ftsclient.FtsClient
|
||||||
|
import docspell.ftspsql.PsqlFtsClient
|
||||||
import docspell.ftssolr.SolrFtsClient
|
import docspell.ftssolr.SolrFtsClient
|
||||||
import docspell.notification.api.NotificationModule
|
import docspell.notification.api.NotificationModule
|
||||||
import docspell.notification.impl.NotificationModuleImpl
|
import docspell.notification.impl.NotificationModuleImpl
|
||||||
@ -155,6 +158,7 @@ object RestAppImpl {
|
|||||||
|
|
||||||
def create[F[_]: Async](
|
def create[F[_]: Async](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
|
pools: Pools,
|
||||||
store: Store[F],
|
store: Store[F],
|
||||||
httpClient: Client[F],
|
httpClient: Client[F],
|
||||||
pubSub: PubSub[F],
|
pubSub: PubSub[F],
|
||||||
@ -163,7 +167,7 @@ object RestAppImpl {
|
|||||||
val logger = docspell.logging.getLogger[F](s"restserver-${cfg.appId.id}")
|
val logger = docspell.logging.getLogger[F](s"restserver-${cfg.appId.id}")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
ftsClient <- createFtsClient(cfg)(httpClient)
|
ftsClient <- createFtsClient(cfg, pools, store, httpClient)
|
||||||
pubSubT = PubSubT(pubSub, logger)
|
pubSubT = PubSubT(pubSub, logger)
|
||||||
javaEmil = JavaMailEmil(cfg.backend.mailSettings)
|
javaEmil = JavaMailEmil(cfg.backend.mailSettings)
|
||||||
notificationMod <- Resource.eval(
|
notificationMod <- Resource.eval(
|
||||||
@ -188,8 +192,25 @@ object RestAppImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private def createFtsClient[F[_]: Async](
|
private def createFtsClient[F[_]: Async](
|
||||||
cfg: Config
|
cfg: Config,
|
||||||
)(client: Client[F]): Resource[F, FtsClient[F]] =
|
pools: Pools,
|
||||||
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
|
store: Store[F],
|
||||||
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
|
client: Client[F]
|
||||||
|
): Resource[F, FtsClient[F]] =
|
||||||
|
if (cfg.fullTextSearch.enabled)
|
||||||
|
cfg.fullTextSearch.backend match {
|
||||||
|
case FtsType.Solr =>
|
||||||
|
SolrFtsClient(cfg.fullTextSearch.solr, client)
|
||||||
|
|
||||||
|
case FtsType.PostgreSQL =>
|
||||||
|
val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.backend.jdbc)
|
||||||
|
if (cfg.fullTextSearch.postgresql.useDefaultConnection)
|
||||||
|
Resource.pure[F, FtsClient[F]](
|
||||||
|
new PsqlFtsClient[F](psqlCfg, store.transactor)
|
||||||
|
)
|
||||||
|
else
|
||||||
|
PsqlFtsClient(psqlCfg, pools.connectEC)
|
||||||
|
}
|
||||||
|
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -88,7 +88,7 @@ object RestServer {
|
|||||||
store,
|
store,
|
||||||
httpClient
|
httpClient
|
||||||
)(Topics.all.map(_.topic))
|
)(Topics.all.map(_.topic))
|
||||||
restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic)
|
restApp <- RestAppImpl.create[F](cfg, pools, store, httpClient, pubSub, wsTopic)
|
||||||
} yield (restApp, pubSub, setting)
|
} yield (restApp, pubSub, setting)
|
||||||
|
|
||||||
def createHttpApp[F[_]: Async](
|
def createHttpApp[F[_]: Async](
|
||||||
|
@ -34,6 +34,8 @@ trait Store[F[_]] {
|
|||||||
): FileRepository[F]
|
): FileRepository[F]
|
||||||
|
|
||||||
def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
|
def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
|
||||||
|
|
||||||
|
def transactor: Transactor[F]
|
||||||
}
|
}
|
||||||
|
|
||||||
object Store {
|
object Store {
|
||||||
|
@ -24,8 +24,9 @@ final class StoreImpl[F[_]: Async](
|
|||||||
val fileRepo: FileRepository[F],
|
val fileRepo: FileRepository[F],
|
||||||
jdbc: JdbcConfig,
|
jdbc: JdbcConfig,
|
||||||
ds: DataSource,
|
ds: DataSource,
|
||||||
xa: Transactor[F]
|
val transactor: Transactor[F]
|
||||||
) extends Store[F] {
|
) extends Store[F] {
|
||||||
|
private[this] val xa = transactor
|
||||||
|
|
||||||
def createFileRepository(
|
def createFileRepository(
|
||||||
cfg: FileRepositoryConfig,
|
cfg: FileRepositoryConfig,
|
||||||
|
@ -576,7 +576,8 @@ object QItem {
|
|||||||
collective: Ident,
|
collective: Ident,
|
||||||
folder: Option[Ident],
|
folder: Option[Ident],
|
||||||
name: String,
|
name: String,
|
||||||
notes: Option[String]
|
notes: Option[String],
|
||||||
|
language: Language
|
||||||
)
|
)
|
||||||
def allNameAndNotes(
|
def allNameAndNotes(
|
||||||
coll: Option[Ident],
|
coll: Option[Ident],
|
||||||
@ -584,10 +585,11 @@ object QItem {
|
|||||||
chunkSize: Int
|
chunkSize: Int
|
||||||
): Stream[ConnectionIO, NameAndNotes] = {
|
): Stream[ConnectionIO, NameAndNotes] = {
|
||||||
val i = RItem.as("i")
|
val i = RItem.as("i")
|
||||||
|
val c = RCollective.as("c")
|
||||||
|
|
||||||
Select(
|
Select(
|
||||||
select(i.id, i.cid, i.folder, i.name, i.notes),
|
select(i.id, i.cid, i.folder, i.name, i.notes, c.language),
|
||||||
from(i)
|
from(i).innerJoin(c, c.id === i.cid)
|
||||||
).where(
|
).where(
|
||||||
i.state.in(ItemState.validStates) &&?
|
i.state.in(ItemState.validStates) &&?
|
||||||
itemIds.map(ids => i.id.in(ids)) &&?
|
itemIds.map(ids => i.id.in(ids)) &&?
|
||||||
|
@ -236,8 +236,18 @@ object RAttachment {
|
|||||||
n3 <- DML.delete(T, T.id === attachId)
|
n3 <- DML.delete(T, T.id === attachId)
|
||||||
} yield n0 + n1 + n2 + n3
|
} yield n0 + n1 + n2 + n3
|
||||||
|
|
||||||
def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
|
def findItemAndLanguage(
|
||||||
Select(T.itemId.s, from(T), T.id === attachId).build.query[Ident].option
|
attachId: Ident
|
||||||
|
): ConnectionIO[Option[(Ident, Option[Language])]] = {
|
||||||
|
val a = RAttachment.as("a")
|
||||||
|
val m = RAttachmentMeta.as("m")
|
||||||
|
|
||||||
|
Select(
|
||||||
|
select(a.itemId, m.language),
|
||||||
|
from(a).leftJoin(m, m.id === a.id),
|
||||||
|
a.id === attachId
|
||||||
|
).build.query[(Ident, Option[Language])].option
|
||||||
|
}
|
||||||
|
|
||||||
def findAll(
|
def findAll(
|
||||||
coll: Option[Ident],
|
coll: Option[Ident],
|
||||||
|
@ -23,7 +23,7 @@ class PostgresqlMigrateTest
|
|||||||
with TestContainerForAll
|
with TestContainerForAll
|
||||||
with TestLoggingConfig {
|
with TestLoggingConfig {
|
||||||
override val containerDef: PostgreSQLContainer.Def =
|
override val containerDef: PostgreSQLContainer.Def =
|
||||||
PostgreSQLContainer.Def(DockerImageName.parse("postgres:13"))
|
PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
|
||||||
|
|
||||||
test("postgres empty schema migration") {
|
test("postgres empty schema migration") {
|
||||||
assume(Docker.existsUnsafe, "docker doesn't exist!")
|
assume(Docker.existsUnsafe, "docker doesn't exist!")
|
||||||
|
@ -4,6 +4,11 @@ let
|
|||||||
full-text-search = {
|
full-text-search = {
|
||||||
enabled = true;
|
enabled = true;
|
||||||
solr.url = "http://localhost:${toString config.services.solr.port}/solr/docspell";
|
solr.url = "http://localhost:${toString config.services.solr.port}/solr/docspell";
|
||||||
|
postgresql = {
|
||||||
|
pg-config = {
|
||||||
|
"german" = "my-germam";
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
@ -213,6 +213,7 @@ Docpell Update Check
|
|||||||
};
|
};
|
||||||
full-text-search = {
|
full-text-search = {
|
||||||
enabled = false;
|
enabled = false;
|
||||||
|
backend = "solr";
|
||||||
solr = {
|
solr = {
|
||||||
url = "http://localhost:8983/solr/docspell";
|
url = "http://localhost:8983/solr/docspell";
|
||||||
commit-within = 1000;
|
commit-within = 1000;
|
||||||
@ -220,6 +221,17 @@ Docpell Update Check
|
|||||||
def-type = "lucene";
|
def-type = "lucene";
|
||||||
q-op = "OR";
|
q-op = "OR";
|
||||||
};
|
};
|
||||||
|
postgresql = {
|
||||||
|
use-default-connection = false;
|
||||||
|
jdbc = {
|
||||||
|
url = "jdbc:postgresql://server:5432/db";
|
||||||
|
user = "pguser";
|
||||||
|
password = "";
|
||||||
|
};
|
||||||
|
pg-config = {};
|
||||||
|
pg-query-parser = "websearch_to_tsquery";
|
||||||
|
pg-rank-normalization = [ 4 ];
|
||||||
|
};
|
||||||
migration = {
|
migration = {
|
||||||
index-all-chunk = 10;
|
index-all-chunk = 10;
|
||||||
};
|
};
|
||||||
@ -1371,6 +1383,12 @@ in {
|
|||||||
Currently the SOLR search platform is supported.
|
Currently the SOLR search platform is supported.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
backend = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.full-text-search.backend;
|
||||||
|
description = "The backend to use, either solr or postgresql";
|
||||||
|
};
|
||||||
|
|
||||||
solr = mkOption {
|
solr = mkOption {
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
@ -1408,6 +1426,61 @@ in {
|
|||||||
default = defaults.full-text-search.solr;
|
default = defaults.full-text-search.solr;
|
||||||
description = "Configuration for the SOLR backend.";
|
description = "Configuration for the SOLR backend.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
postgresql = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
use-default-connection = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = defaults.full-text-search.postgresql.use-default-connection;
|
||||||
|
description = "Whether to use the primary db connection.";
|
||||||
|
};
|
||||||
|
jdbc = mkOption {
|
||||||
|
type = types.submodule ({
|
||||||
|
options = {
|
||||||
|
url = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.url;
|
||||||
|
description = ''
|
||||||
|
The URL to the database.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
user = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.user;
|
||||||
|
description = "The user name to connect to the database.";
|
||||||
|
};
|
||||||
|
password = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.password;
|
||||||
|
description = "The password to connect to the database.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.full-text-search.postgresql.jdbc;
|
||||||
|
description = "Database connection settings";
|
||||||
|
};
|
||||||
|
pg-config = mkOption {
|
||||||
|
type = types.attrs;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-config;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
pg-query-parser = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-query-parser;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
pg-rank-normalization = mkOption {
|
||||||
|
type = types.listOf types.int;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-rank-normalization;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.full-text-search.postgresql;
|
||||||
|
description = "PostgreSQL for fulltext search";
|
||||||
|
};
|
||||||
|
|
||||||
migration = mkOption {
|
migration = mkOption {
|
||||||
type = types.submodule({
|
type = types.submodule({
|
||||||
options = {
|
options = {
|
||||||
|
@ -62,6 +62,17 @@ let
|
|||||||
def-type = "lucene";
|
def-type = "lucene";
|
||||||
q-op = "OR";
|
q-op = "OR";
|
||||||
};
|
};
|
||||||
|
postgresql = {
|
||||||
|
use-default-connection = false;
|
||||||
|
jdbc = {
|
||||||
|
url = "jdbc:postgresql://server:5432/db";
|
||||||
|
user = "pguser";
|
||||||
|
password = "";
|
||||||
|
};
|
||||||
|
pg-config = {};
|
||||||
|
pg-query-parser = "websearch_to_tsquery";
|
||||||
|
pg-rank-normalization = [ 4 ];
|
||||||
|
};
|
||||||
};
|
};
|
||||||
auth = {
|
auth = {
|
||||||
server-secret = "hex:caffee";
|
server-secret = "hex:caffee";
|
||||||
@ -575,6 +586,60 @@ in {
|
|||||||
default = defaults.full-text-search.solr;
|
default = defaults.full-text-search.solr;
|
||||||
description = "Configuration for the SOLR backend.";
|
description = "Configuration for the SOLR backend.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
postgresql = mkOption {
|
||||||
|
type = types.submodule({
|
||||||
|
options = {
|
||||||
|
use-default-connection = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = defaults.full-text-search.postgresql.use-default-connection;
|
||||||
|
description = "Whether to use the primary db connection.";
|
||||||
|
};
|
||||||
|
jdbc = mkOption {
|
||||||
|
type = types.submodule ({
|
||||||
|
options = {
|
||||||
|
url = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.url;
|
||||||
|
description = ''
|
||||||
|
The URL to the database.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
user = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.user;
|
||||||
|
description = "The user name to connect to the database.";
|
||||||
|
};
|
||||||
|
password = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.jdbc.password;
|
||||||
|
description = "The password to connect to the database.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.full-text-search.postgresql.jdbc;
|
||||||
|
description = "Database connection settings";
|
||||||
|
};
|
||||||
|
pg-config = mkOption {
|
||||||
|
type = types.attrs;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-config;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
pg-query-parser = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-query-parser;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
pg-rank-normalization = mkOption {
|
||||||
|
type = types.listOf types.int;
|
||||||
|
default = defaults.full-text-search.postgresql.pg-rank-normalization;
|
||||||
|
description = "";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
});
|
||||||
|
default = defaults.full-text-search.postgresql;
|
||||||
|
description = "PostgreSQL for fulltext search";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
default = defaults.full-text-search;
|
default = defaults.full-text-search;
|
||||||
|
@ -59,7 +59,7 @@ via the header `Docspell-Share-Auth`.
|
|||||||
|
|
||||||
Docspell can be configured to be a relying party for OpenID Connect.
|
Docspell can be configured to be a relying party for OpenID Connect.
|
||||||
Please see [the config
|
Please see [the config
|
||||||
section](@/docs/configure/_index.md#openid-connect-oauth2) for
|
section](@/docs/configure/authentication.md#openid-connect-oauth2) for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ $ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/a
|
|||||||
```
|
```
|
||||||
|
|
||||||
To enable these endpoints, you must provide a secret in the
|
To enable these endpoints, you must provide a secret in the
|
||||||
[configuration](@/docs/configure/_index.md#admin-endpoint).
|
[configuration](@/docs/configure/admin-endpoint.md).
|
||||||
|
|
||||||
## Live Api
|
## Live Api
|
||||||
|
|
||||||
|
@ -163,7 +163,7 @@ on the same host or network).
|
|||||||
|
|
||||||
The endpoint is disabled by default, an admin must change the
|
The endpoint is disabled by default, an admin must change the
|
||||||
`docspell.server.integration-endpoint.enabled` flag to `true` in the
|
`docspell.server.integration-endpoint.enabled` flag to `true` in the
|
||||||
[configuration file](@/docs/configure/_index.md#rest-server).
|
[configuration file](@/docs/configure/main.md#rest-server).
|
||||||
|
|
||||||
If queried by a `GET` request, it returns whether it is enabled and
|
If queried by a `GET` request, it returns whether it is enabled and
|
||||||
the collective exists.
|
the collective exists.
|
||||||
|
@ -3,803 +3,9 @@ title = "Configuration"
|
|||||||
insert_anchor_links = "right"
|
insert_anchor_links = "right"
|
||||||
description = "Describes the configuration file and shows all default settings."
|
description = "Describes the configuration file and shows all default settings."
|
||||||
weight = 40
|
weight = 40
|
||||||
template = "docs.html"
|
template = "pages.html"
|
||||||
|
sort_by = "weight"
|
||||||
|
redirect_to = "docs/configure/main"
|
||||||
+++
|
+++
|
||||||
|
|
||||||
# Configuration
|
No content here.
|
||||||
|
|
||||||
Docspell's executables (restserver and joex) can take one argument – a
|
|
||||||
configuration file. If that is not given, the defaults are used,
|
|
||||||
overriden by environment variables. A config file overrides default
|
|
||||||
values, so only values that differ from the defaults are necessary.
|
|
||||||
The complete default options and their documentation is at the end of
|
|
||||||
this page.
|
|
||||||
|
|
||||||
Besides the config file, another way is to provide individual settings
|
|
||||||
via key-value pairs to the executable by the `-D` option. For example
|
|
||||||
to override only `base-url` you could add the argument
|
|
||||||
`-Ddocspell.server.base-url=…` to the command. Multiple options are
|
|
||||||
possible. For more than few values this is very tedious, obviously, so
|
|
||||||
the recommended way is to maintain a config file. If these options
|
|
||||||
*and* a file is provded, then any setting given via the `-D…` option
|
|
||||||
overrides the same setting from the config file.
|
|
||||||
|
|
||||||
At last, it is possible to configure docspell via environment
|
|
||||||
variables if there is no config file supplied (if a config file *is*
|
|
||||||
supplied, it is always preferred). Note that this approach is limited,
|
|
||||||
as arrays are not supported. A list of environment variables can be
|
|
||||||
found at the [end of this page](#environment-variables). The
|
|
||||||
environment variable name follows the corresponding config key - where
|
|
||||||
dots are replaced by underscores and dashes are replaced by two
|
|
||||||
underscores. For example, the config key `docspell.server.app-name`
|
|
||||||
can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
|
|
||||||
|
|
||||||
It is also possible to specify environment variables inside a config
|
|
||||||
file (to get a mix of both) - please see the [documentation of the
|
|
||||||
config library](https://github.com/lightbend/config#standard-behavior)
|
|
||||||
for more on this.
|
|
||||||
|
|
||||||
# File Format
|
|
||||||
|
|
||||||
The format of the configuration files can be
|
|
||||||
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
|
|
||||||
JSON or what this [config
|
|
||||||
library](https://github.com/lightbend/config) understands. The default
|
|
||||||
values below are in HOCON format, which is recommended, since it
|
|
||||||
allows comments and has some [advanced
|
|
||||||
features](https://github.com/lightbend/config#features-of-hocon).
|
|
||||||
Please also see their documentation for more details.
|
|
||||||
|
|
||||||
A short description (please check the links for better understanding):
|
|
||||||
The config consists of key-value pairs and can be written in a
|
|
||||||
JSON-like format (called HOCON). Keys are organized in trees, and a
|
|
||||||
key defines a full path into the tree. There are two ways:
|
|
||||||
|
|
||||||
```
|
|
||||||
a.b.c.d=15
|
|
||||||
```
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
```
|
|
||||||
a {
|
|
||||||
b {
|
|
||||||
c {
|
|
||||||
d = 15
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Both are exactly the same and these forms are both used at the same
|
|
||||||
time. Usually the braces approach is used to group some more settings,
|
|
||||||
for better readability.
|
|
||||||
|
|
||||||
Strings that contain "not-so-common" characters should be enclosed in
|
|
||||||
quotes. It is possible to define values at the top of the file and
|
|
||||||
reuse them on different locations via the `${full.path.to.key}`
|
|
||||||
syntax. When using these variables, they *must not* be enclosed in
|
|
||||||
quotes.
|
|
||||||
|
|
||||||
|
|
||||||
# Important Config Options
|
|
||||||
|
|
||||||
The configuration of both components uses separate namespaces. The
|
|
||||||
configuration for the REST server is below `docspell.server`, while
|
|
||||||
the one for joex is below `docspell.joex`.
|
|
||||||
|
|
||||||
You can therefore use two separate config files or one single file
|
|
||||||
containing both namespaces.
|
|
||||||
|
|
||||||
## JDBC
|
|
||||||
|
|
||||||
This configures the connection to the database. This has to be
|
|
||||||
specified for the rest server and joex. By default, a H2 database in
|
|
||||||
the current `/tmp` directory is configured.
|
|
||||||
|
|
||||||
The config looks like this (both components):
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
docspell.joex.jdbc {
|
|
||||||
url = ...
|
|
||||||
user = ...
|
|
||||||
password = ...
|
|
||||||
}
|
|
||||||
|
|
||||||
docspell.server.backend.jdbc {
|
|
||||||
url = ...
|
|
||||||
user = ...
|
|
||||||
password = ...
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `url` is the connection to the database. It must start with
|
|
||||||
`jdbc`, followed by name of the database. The rest is specific to the
|
|
||||||
database used: it is either a path to a file for H2 or a host/database
|
|
||||||
url for MariaDB and PostgreSQL.
|
|
||||||
|
|
||||||
When using H2, the user and password can be chosen freely on first
|
|
||||||
start, but must stay the same on subsequent starts. Usually, the user
|
|
||||||
is `sa` and the password is left empty. Additionally, the url must
|
|
||||||
include these options:
|
|
||||||
|
|
||||||
```
|
|
||||||
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
|
|
||||||
```
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
|
|
||||||
PostgreSQL:
|
|
||||||
```
|
|
||||||
url = "jdbc:postgresql://localhost:5432/docspelldb"
|
|
||||||
```
|
|
||||||
|
|
||||||
MariaDB:
|
|
||||||
```
|
|
||||||
url = "jdbc:mariadb://localhost:3306/docspelldb"
|
|
||||||
```
|
|
||||||
|
|
||||||
H2
|
|
||||||
```
|
|
||||||
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Admin Endpoint
|
|
||||||
|
|
||||||
The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
|
|
||||||
for adminstration tasks. This is disabled by default and can be
|
|
||||||
enabled by providing a secret:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
...
|
|
||||||
admin-endpoint {
|
|
||||||
secret = "123"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
This secret must be provided to all requests to a `/api/v1/admin/`
|
|
||||||
endpoint.
|
|
||||||
|
|
||||||
The most convenient way to execute admin tasks is to use the
|
|
||||||
[cli](@/docs/tools/cli.md). You get a list of possible admin commands
|
|
||||||
via `dsc admin help`.
|
|
||||||
|
|
||||||
To see the output of the commands, there are these ways:
|
|
||||||
|
|
||||||
1. looking at the joex logs, which gives most details.
|
|
||||||
2. Use the job-queue page when logged in as `docspell-system`
|
|
||||||
3. setup a [webhook](@/docs/webapp/notification.md) to be notified
|
|
||||||
when a job finishes. This way you get a small message.
|
|
||||||
|
|
||||||
All admin tasks (and also some other system tasks) are run under the
|
|
||||||
account `docspell-system` (collective and user). You need to create
|
|
||||||
this account and setup the notification hooks in there - not in your
|
|
||||||
normal account.
|
|
||||||
|
|
||||||
|
|
||||||
## Full-Text Search: SOLR
|
|
||||||
|
|
||||||
[Apache SOLR](https://solr.apache.org) is used to provide the
|
|
||||||
full-text search. Both docspell components must provide the same
|
|
||||||
connection setup. This is defined in the `full-text-search.solr`
|
|
||||||
subsection:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
...
|
|
||||||
full-text-search {
|
|
||||||
enabled = true
|
|
||||||
...
|
|
||||||
solr = {
|
|
||||||
url = "http://localhost:8983/solr/docspell"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The default configuration at the end of this page contains more
|
|
||||||
information about each setting.
|
|
||||||
|
|
||||||
The `solr.url` is the mandatory setting that you need to change to
|
|
||||||
point to your SOLR instance. Then you need to set the `enabled` flag
|
|
||||||
to `true`.
|
|
||||||
|
|
||||||
When installing docspell manually, just install solr and create a core
|
|
||||||
as described in the [solr
|
|
||||||
documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
|
|
||||||
That will provide you with the connection url (the last part is the
|
|
||||||
core name). If Docspell detects an empty core it will run a schema
|
|
||||||
setup on start automatically.
|
|
||||||
|
|
||||||
The `full-text-search.solr` options are the same for joex and the
|
|
||||||
restserver.
|
|
||||||
|
|
||||||
There is an [admin route](@/docs/api/intro.md#admin) that allows to
|
|
||||||
re-create the entire index (for all collectives). This is possible via
|
|
||||||
a call:
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
|
|
||||||
```
|
|
||||||
|
|
||||||
or use the [cli](@/docs/tools/cli.md):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
dsc admin -a test123 recreate-index
|
|
||||||
```
|
|
||||||
|
|
||||||
Here the `test123` is the key defined with `admin-endpoint.secret`. If
|
|
||||||
it is empty (the default), this call is disabled (all admin routes).
|
|
||||||
Otherwise, the POST request will submit a system task that is executed
|
|
||||||
by a joex instance eventually.
|
|
||||||
|
|
||||||
Using this endpoint, the entire index (including the schema) will be
|
|
||||||
re-created. This is sometimes necessary, for example if you upgrade
|
|
||||||
SOLR or delete the core to provide a new one (see
|
|
||||||
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
|
|
||||||
details). Another way is to restart docspell (while clearing the
|
|
||||||
index). If docspell detects an empty index at startup, it will submit
|
|
||||||
a task to build the index automatically.
|
|
||||||
|
|
||||||
Note that a collective can also re-index their data using a similiar
|
|
||||||
endpoint; but this is only deleting their data and doesn't do a full
|
|
||||||
re-index.
|
|
||||||
|
|
||||||
The solr index doesn't contain any new information, it can be
|
|
||||||
regenerated any time using the above REST call. Thus it doesn't need
|
|
||||||
to be backed up.
|
|
||||||
|
|
||||||
## Bind
|
|
||||||
|
|
||||||
The host and port the http server binds to. This applies to both
|
|
||||||
components. The joex component also exposes a small REST api to
|
|
||||||
inspect its state and notify the scheduler.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
docspell.server.bind {
|
|
||||||
address = localhost
|
|
||||||
port = 7880
|
|
||||||
}
|
|
||||||
docspell.joex.bind {
|
|
||||||
address = localhost
|
|
||||||
port = 7878
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
By default, it binds to `localhost` and some predefined port. This
|
|
||||||
must be changed, if components are on different machines.
|
|
||||||
|
|
||||||
## Baseurl
|
|
||||||
|
|
||||||
The base url is an important setting that defines the http URL where
|
|
||||||
the corresponding component can be reached. It applies to both
|
|
||||||
components. For a joex component, the url must be resolvable from a
|
|
||||||
REST server component. The REST server also uses this url to create
|
|
||||||
absolute urls and to configure the authenication cookie.
|
|
||||||
|
|
||||||
By default it is build using the information from the `bind` setting,
|
|
||||||
which is `http://localhost:7880`.
|
|
||||||
|
|
||||||
If the default is not changed, docspell will use the request to
|
|
||||||
determine the base-url. It first inspects the `X-Forwarded-For` header
|
|
||||||
that is often used with reverse proxies. If that is not present, the
|
|
||||||
`Host` header of the request is used. However, if the `base-url`
|
|
||||||
setting is changed, then only this setting is used.
|
|
||||||
|
|
||||||
```
|
|
||||||
docspell.server.base-url = ...
|
|
||||||
docspell.joex.base-url = ...
|
|
||||||
```
|
|
||||||
|
|
||||||
If you are unsure, leave it at its default.
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
|
|
||||||
```
|
|
||||||
docspell.server.baseurl = "https://docspell.example.com"
|
|
||||||
docspell.joex.baseurl = "http://192.168.101.10"
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## App-id
|
|
||||||
|
|
||||||
The `app-id` is the identifier of the corresponding instance. It *must
|
|
||||||
be unique* for all instances. By default the REST server uses `rest1`
|
|
||||||
and joex `joex1`. It is recommended to overwrite this setting to have
|
|
||||||
an explicit and stable identifier should multiple instances are
|
|
||||||
intended.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
docspell.server.app-id = "rest1"
|
|
||||||
docspell.joex.app-id = "joex1"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Registration Options
|
|
||||||
|
|
||||||
This defines if and how new users can create accounts. There are 3
|
|
||||||
options:
|
|
||||||
|
|
||||||
- *closed* no new user can sign up
|
|
||||||
- *open* new users can sign up
|
|
||||||
- *invite* new users can sign up but require an invitation key
|
|
||||||
|
|
||||||
This applies only to the REST sevrer component.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
docspell.server.backend.signup {
|
|
||||||
mode = "open"
|
|
||||||
|
|
||||||
# If mode == 'invite', a password must be provided to generate
|
|
||||||
# invitation keys. It must not be empty.
|
|
||||||
new-invite-password = ""
|
|
||||||
|
|
||||||
# If mode == 'invite', this is the period an invitation token is
|
|
||||||
# considered valid.
|
|
||||||
invite-time = "3 days"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The mode `invite` is intended to open the application only to some
|
|
||||||
users. The admin can create these invitation keys and distribute them
|
|
||||||
to the desired people. For this, the `new-invite-password` must be
|
|
||||||
given. The idea is that only the person who installs docspell knows
|
|
||||||
this. If it is not set, then invitation won't work. New invitation
|
|
||||||
keys can be generated from within the web application or via REST
|
|
||||||
calls (using `curl`, for example).
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Authentication
|
|
||||||
|
|
||||||
Authentication works in two ways:
|
|
||||||
|
|
||||||
- with an account-name / password pair
|
|
||||||
- with an authentication token
|
|
||||||
|
|
||||||
The initial authentication must occur with an accountname/password
|
|
||||||
pair. This will generate an authentication token which is valid for a
|
|
||||||
some time. Subsequent calls to secured routes can use this token. The
|
|
||||||
token can be given as a normal http header or via a cookie header.
|
|
||||||
|
|
||||||
These settings apply only to the REST server.
|
|
||||||
|
|
||||||
``` bash
|
|
||||||
docspell.server.auth {
|
|
||||||
server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
|
|
||||||
session-valid = "5 minutes"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `server-secret` is used to sign the token. If multiple REST
|
|
||||||
servers are deployed, all must share the same server secret. Otherwise
|
|
||||||
tokens from one instance are not valid on another instance. The secret
|
|
||||||
can be given as Base64 encoded string or in hex form. Use the prefix
|
|
||||||
`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
|
|
||||||
of the string are used.
|
|
||||||
|
|
||||||
The `session-valid` determines how long a token is valid. This can be
|
|
||||||
just some minutes, the web application obtains new ones
|
|
||||||
periodically. So a rather short time is recommended.
|
|
||||||
|
|
||||||
## OpenID Connect / OAuth2
|
|
||||||
|
|
||||||
You can integrate Docspell into your SSO solution via [OpenID
|
|
||||||
Connect](https://openid.net/connect/) (OIDC). This requires to set up
|
|
||||||
an OpenID Provider (OP) somewhere and to configure Docspell
|
|
||||||
accordingly to act as the relying party.
|
|
||||||
|
|
||||||
You can define multiple OPs to use. For some examples, please see the
|
|
||||||
default configuration file [below](#rest-server).
|
|
||||||
|
|
||||||
The configuration of a provider highly depends on how it is setup.
|
|
||||||
Here is an example for a setup using
|
|
||||||
[keycloak](https://www.keycloak.org):
|
|
||||||
|
|
||||||
``` conf
|
|
||||||
provider = {
|
|
||||||
provider-id = "keycloak",
|
|
||||||
client-id = "docspell",
|
|
||||||
client-secret = "example-secret-439e-bf06-911e4cdd56a6",
|
|
||||||
scope = "profile", # scope is required for OIDC
|
|
||||||
authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
|
|
||||||
token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
|
|
||||||
#User URL is not used when signature key is set.
|
|
||||||
#user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
|
|
||||||
sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
|
|
||||||
sig-algo = "RS512"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `provider-id` is some identifier that is used in the URL to
|
|
||||||
distinguish between possibly multiple providers. The `client-id` and
|
|
||||||
`client-secret` define the two parameters required for a "confidential
|
|
||||||
client". The different URLs are best explained at the [keycloak
|
|
||||||
docs](https://www.keycloak.org/docs/latest/server_admin/).
|
|
||||||
They are available for all OPs in some way. The `user-url` is not
|
|
||||||
required, if the access token is already containing the necessary
|
|
||||||
data. If not, then docspell performs another request to the
|
|
||||||
`user-url`, which must be the user-info endpoint, to obtain the
|
|
||||||
required user data.
|
|
||||||
|
|
||||||
If the data is taken from the token directly and not via a request to
|
|
||||||
the user-info endpoint, then the token must be validated using the
|
|
||||||
given `sign-key` and `sig-algo`. These two values are then required to
|
|
||||||
specify! However, if the user-info endpoint should be used, then leave
|
|
||||||
the `sign-key` empty and specify the correct url in `user-url`. When
|
|
||||||
specifying the `sign-key` use a prefix of `b64:` if it is Base64
|
|
||||||
encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
|
|
||||||
are used, which is most probably not wanted for this setting.
|
|
||||||
|
|
||||||
Once the user is authenticated, docspell tries to setup an account and
|
|
||||||
does some checks. For this it must get to the username and collective
|
|
||||||
name somehow. How it does this, can be specified by the `user-key` and
|
|
||||||
`collective-key` settings:
|
|
||||||
|
|
||||||
``` conf
|
|
||||||
# The collective of the user is given in the access token as
|
|
||||||
# property `docspell_collective`.
|
|
||||||
collective-key = "lookup:docspell_collective",
|
|
||||||
# The username to use for the docspell account
|
|
||||||
user-key = "preferred_username"
|
|
||||||
```
|
|
||||||
|
|
||||||
The `user-key` is some string that is used to search the JSON response
|
|
||||||
from the OP for an object with that key. The search happens
|
|
||||||
recursively, so the field can be in a nested object. The found value
|
|
||||||
is used as the user name. Keycloak transmits the `preferred_username`
|
|
||||||
when asking for the `profile` scope. This can be used as the user
|
|
||||||
name.
|
|
||||||
|
|
||||||
The collective name can be obtained by different ways. For example,
|
|
||||||
you can instruct your OP (like keycloak) to provide a collective name
|
|
||||||
in the token and/or user-info responses. If you do this, then use the
|
|
||||||
`lookup:` prefix as in the example above. This instructs docspell to
|
|
||||||
search for a value the same way as the `user-key`. You can also set a
|
|
||||||
fixed collective, using `fixed:` prefix; in this case all users are in
|
|
||||||
the same collective! A third option is to prefix it with `account:` -
|
|
||||||
then the value that is looked up is interpreted as the full account
|
|
||||||
name, like `collective/user` and the `user-key` setting is ignored. If
|
|
||||||
you want to put each user in its own collective, you can just use the
|
|
||||||
same value as in `user-key`, only prefixed with `lookup:`. In the
|
|
||||||
example it would be `lookup:preferred_username`.
|
|
||||||
|
|
||||||
If you find that these methods do not suffice for your case, please
|
|
||||||
open an issue.
|
|
||||||
|
|
||||||
|
|
||||||
## File Backends
|
|
||||||
|
|
||||||
Docspell allows to choose from different storage backends for binary
|
|
||||||
files. You can choose between:
|
|
||||||
|
|
||||||
1. *Database (the recommended default)*
|
|
||||||
|
|
||||||
The database can be used to store the files as well. It is the
|
|
||||||
default. It doesn't require any other configuration and works well
|
|
||||||
with multiple instances of restservers and joex nodes.
|
|
||||||
2. *S3*
|
|
||||||
|
|
||||||
The S3 backend allows to store files in an S3 compatible storage.
|
|
||||||
It was tested with MinIO, which is possible to self host.
|
|
||||||
|
|
||||||
3. *Filesystem*
|
|
||||||
|
|
||||||
The filesystem can also be used directly, by specifying a
|
|
||||||
directory. Be aware that _all_ nodes must have read and write
|
|
||||||
access into this directory! When running multiple nodes over a
|
|
||||||
network, consider using one of the above instead. Docspell uses a
|
|
||||||
fixed structure for storing the files below the given directory, it
|
|
||||||
cannot be configured.
|
|
||||||
|
|
||||||
When using S3 or filesystem, remember to backup the database *and* the
|
|
||||||
files!
|
|
||||||
|
|
||||||
Note that Docspell not only stores the file that are uploaded, but
|
|
||||||
also some other files for internal use.
|
|
||||||
|
|
||||||
### Configuring
|
|
||||||
|
|
||||||
{% warningbubble(title="Note") %}
|
|
||||||
|
|
||||||
Each node must have the same config for its file backend! When using
|
|
||||||
the filesystem, make sure all processes can access the directory with
|
|
||||||
read and write permissions.
|
|
||||||
|
|
||||||
{% end %}
|
|
||||||
|
|
||||||
The file storage backend can be configured inside the `files` section
|
|
||||||
(see the default configs below):
|
|
||||||
|
|
||||||
```conf
|
|
||||||
files {
|
|
||||||
…
|
|
||||||
default-store = "database"
|
|
||||||
|
|
||||||
stores = {
|
|
||||||
database =
|
|
||||||
{ enabled = true
|
|
||||||
type = "default-database"
|
|
||||||
}
|
|
||||||
|
|
||||||
filesystem =
|
|
||||||
{ enabled = false
|
|
||||||
type = "file-system"
|
|
||||||
directory = "/some/directory"
|
|
||||||
}
|
|
||||||
|
|
||||||
minio =
|
|
||||||
{ enabled = false
|
|
||||||
type = "s3"
|
|
||||||
endpoint = "http://localhost:9000"
|
|
||||||
access-key = "username"
|
|
||||||
secret-key = "password"
|
|
||||||
bucket = "docspell"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The `stores` object defines a set of stores and the `default-store`
|
|
||||||
selects the one that should be used. All disabled store configurations
|
|
||||||
are removed from the list. Thus the `default-store` must be enabled.
|
|
||||||
Other enabled stores can be used as the target when copying files (see
|
|
||||||
below).
|
|
||||||
|
|
||||||
A store configuration requires a `enabled` and `type` property.
|
|
||||||
Depending on the `type` property, other properties are required, they
|
|
||||||
are presented above. The available storage types are
|
|
||||||
`default-database`, `file-system` and `s3`.
|
|
||||||
|
|
||||||
If you use the docker setup, you can find the corresponding
|
|
||||||
environment variables to the above config snippet
|
|
||||||
[below](#environment-variables).
|
|
||||||
|
|
||||||
### Change Backends
|
|
||||||
|
|
||||||
It is possible to change backends with a bit of manual effort. When
|
|
||||||
doing this, please make sure that the application is not used. It is
|
|
||||||
important that no file is uploaded during the following steps.
|
|
||||||
|
|
||||||
The [cli](@/docs/tools/cli.md) will be used, please set it up first
|
|
||||||
and you need to enable the [admin endpoint](#admin-endpoint). Config
|
|
||||||
changes mentioned here must be applied to all nodes - joex and
|
|
||||||
restserver!
|
|
||||||
|
|
||||||
1. In the config, enable a second file backend (besides the default)
|
|
||||||
you want to change to and start docspell as normal. Don't change
|
|
||||||
`default-store` yet.
|
|
||||||
2. Run the file integrity check in order to see whether all files are
|
|
||||||
ok as they are in the current store. This can be done using the
|
|
||||||
[cli](@/docs/tools/cli.md) by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
dsc admin file-integrity-check
|
|
||||||
```
|
|
||||||
3. Run the copy files admin command which will copy all files from the
|
|
||||||
current `default-store` to all other enabled stores.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
dsc admin clone-file-repository
|
|
||||||
```
|
|
||||||
|
|
||||||
And wait until it's done :-). You can see the progress in the jobs
|
|
||||||
page when logged in as `docspell-system` or just look at the logs.
|
|
||||||
4. In the config, change the `default-store` to the one you just
|
|
||||||
copied all the files to and restart docspell.
|
|
||||||
5. Login and do some smoke tests. Then run the file integrity check
|
|
||||||
again:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
dsc admin file-integrity-check
|
|
||||||
```
|
|
||||||
|
|
||||||
If all is fine, then you are done and are now using the new file
|
|
||||||
backend. If the second integrity check fails, please open an issue.
|
|
||||||
You need then to revert the config change of step 4 to use the
|
|
||||||
previous `default-store` again.
|
|
||||||
|
|
||||||
If you want to delete the files from the database, you can do so by
|
|
||||||
running the following SQL against the database:
|
|
||||||
|
|
||||||
```sql
|
|
||||||
DELETE FROM filechunk
|
|
||||||
```
|
|
||||||
|
|
||||||
You can copy them back into the database using the steps above.
|
|
||||||
|
|
||||||
|
|
||||||
## File Processing
|
|
||||||
|
|
||||||
Files are being processed by the joex component. So all the respective
|
|
||||||
configuration is in this config only.
|
|
||||||
|
|
||||||
File processing involves several stages, detailed information can be
|
|
||||||
found [here](@/docs/joex/file-processing.md#text-analysis) and in the
|
|
||||||
corresponding sections in [joex default config](#joex).
|
|
||||||
|
|
||||||
Configuration allows to define the external tools and set some
|
|
||||||
limitations to control memory usage. The sections are:
|
|
||||||
|
|
||||||
- `docspell.joex.extraction`
|
|
||||||
- `docspell.joex.text-analysis`
|
|
||||||
- `docspell.joex.convert`
|
|
||||||
|
|
||||||
Options to external commands can use variables that are replaced by
|
|
||||||
values at runtime. Variables are enclosed in double braces `{{…}}`.
|
|
||||||
Please see the default configuration for what variables exist per
|
|
||||||
command.
|
|
||||||
|
|
||||||
### Classification
|
|
||||||
|
|
||||||
In `text-analysis.classification` you can define how many documents at
|
|
||||||
most should be used for learning. The default settings should work
|
|
||||||
well for most cases. However, it always depends on the amount of data
|
|
||||||
and the machine that runs joex. For example, by default the documents
|
|
||||||
to learn from are limited to 600 (`classification.item-count`) and
|
|
||||||
every text is cut after 5000 characters (`text-analysis.max-length`).
|
|
||||||
This is fine if *most* of your documents are small and only a few are
|
|
||||||
near 5000 characters). But if *all* your documents are very large, you
|
|
||||||
probably need to either assign more heap memory or go down with the
|
|
||||||
limits.
|
|
||||||
|
|
||||||
Classification can be disabled, too, for when it's not needed.
|
|
||||||
|
|
||||||
### NLP
|
|
||||||
|
|
||||||
This setting defines which NLP mode to use. It defaults to `full`,
|
|
||||||
which requires more memory for certain languages (with the advantage
|
|
||||||
of better results). Other values are `basic`, `regexonly` and
|
|
||||||
`disabled`. The modes `full` and `basic` use pre-defined lanugage
|
|
||||||
models for procesing documents of languaes German, English, French and
|
|
||||||
Spanish. These require some amount of memory (see below).
|
|
||||||
|
|
||||||
The mode `basic` is like the "light" variant to `full`. It doesn't use
|
|
||||||
all NLP features, which makes memory consumption much lower, but comes
|
|
||||||
with the compromise of less accurate results.
|
|
||||||
|
|
||||||
The mode `regexonly` doesn't use pre-defined lanuage models, even if
|
|
||||||
available. It checks your address book against a document to find
|
|
||||||
metadata. That means, it is language independent. Also, when using
|
|
||||||
`full` or `basic` with lanugages where no pre-defined models exist, it
|
|
||||||
will degrade to `regexonly` for these.
|
|
||||||
|
|
||||||
The mode `disabled` skips NLP processing completely. This has least
|
|
||||||
impact in memory consumption, obviously, but then only the classifier
|
|
||||||
is used to find metadata (unless it is disabled, too).
|
|
||||||
|
|
||||||
You might want to try different modes and see what combination suits
|
|
||||||
best your usage pattern and machine running joex. If a powerful
|
|
||||||
machine is used, simply leave the defaults. When running on an
|
|
||||||
raspberry pi, for example, you might need to adjust things.
|
|
||||||
|
|
||||||
### Memory Usage
|
|
||||||
|
|
||||||
The memory requirements for the joex component depends on the document
|
|
||||||
language and the enabled features for text-analysis. The `nlp.mode`
|
|
||||||
setting has significant impact, especially when your documents are in
|
|
||||||
German. Here are some rough numbers on jvm heap usage (the same file
|
|
||||||
was used for all tries):
|
|
||||||
|
|
||||||
<table class="striped-basic">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>nlp.mode</th>
|
|
||||||
<th>English</th>
|
|
||||||
<th>German</th>
|
|
||||||
<th>French</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tfoot>
|
|
||||||
</tfoot>
|
|
||||||
<tbody>
|
|
||||||
<tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
|
|
||||||
<tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
Note that these are only rough numbers and they show the maximum used
|
|
||||||
heap memory while processing a file.
|
|
||||||
|
|
||||||
When using `mode=full`, a heap setting of at least `-Xmx1400M` is
|
|
||||||
recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
|
|
||||||
recommended.
|
|
||||||
|
|
||||||
Other languages can't use these two modes, and so don't require this
|
|
||||||
amount of memory (but don't have as good results). Then you can go
|
|
||||||
with less heap. For these languages, the nlp mode is the same as
|
|
||||||
`regexonly`.
|
|
||||||
|
|
||||||
Training the classifier is also memory intensive, which solely depends
|
|
||||||
on the size and number of documents that are being trained. However,
|
|
||||||
training the classifier is done periodically and can happen maybe
|
|
||||||
every two weeks. When classifying new documents, memory requirements
|
|
||||||
are lower, since the model already exists.
|
|
||||||
|
|
||||||
More details about these modes can be found
|
|
||||||
[here](@/docs/joex/file-processing.md#text-analysis).
|
|
||||||
|
|
||||||
|
|
||||||
The restserver component is very lightweight, here you can use
|
|
||||||
defaults.
|
|
||||||
|
|
||||||
|
|
||||||
# JVM Options
|
|
||||||
|
|
||||||
The start scripts support some options to configure the JVM. One often
|
|
||||||
used setting is the maximum heap size of the JVM. By default, java
|
|
||||||
determines it based on properties of the current machine. You can
|
|
||||||
specify it by given java startup options to the command:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
|
|
||||||
```
|
|
||||||
|
|
||||||
This would limit the maximum heap to 1GB. The double slash separates
|
|
||||||
internal options and the arguments to the program. Another frequently
|
|
||||||
used option is to change the default temp directory. Usually it is
|
|
||||||
`/tmp`, but it may be desired to have a dedicated temp directory,
|
|
||||||
which can be configured:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
|
|
||||||
```
|
|
||||||
|
|
||||||
The command:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ ./docspell-restserver*/bin/docspell-restserver -h
|
|
||||||
```
|
|
||||||
|
|
||||||
gives an overview of supported options.
|
|
||||||
|
|
||||||
It is recommended to run joex with the G1GC enabled. If you use java8,
|
|
||||||
you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
|
|
||||||
this is not necessary (but doesn't hurt either). This could look like
|
|
||||||
this:
|
|
||||||
|
|
||||||
```
|
|
||||||
./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
|
|
||||||
```
|
|
||||||
|
|
||||||
Using these options you can define how much memory the JVM process is
|
|
||||||
able to use. This might be necessary to adopt depending on the usage
|
|
||||||
scenario and configured text analysis features.
|
|
||||||
|
|
||||||
Please have a look at the corresponding [section](@/docs/configure/_index.md#memory-usage).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
|
|
||||||
By default, docspell logs to stdout. This works well, when managed by
|
|
||||||
systemd or other inits. Logging can be configured in the configuration
|
|
||||||
file or via environment variables. There are only two settings:
|
|
||||||
|
|
||||||
- `minimum-level` specifies the log level to control the verbosity.
|
|
||||||
Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
|
|
||||||
*Error*
|
|
||||||
- `format` this defines how the logs are formatted. There are two
|
|
||||||
formats for humans: *Plain* and *Fancy*. And two more suited for
|
|
||||||
machine consumption: *Json* and *Logfmt*. The *Json* format contains
|
|
||||||
all details, while the others may omit some for readability
|
|
||||||
|
|
||||||
These settings are the same for joex and the restserver component.
|
|
||||||
|
|
||||||
# Default Config
|
|
||||||
## Rest Server
|
|
||||||
|
|
||||||
{{ incl_conf(path="templates/shortcodes/server.conf") }}
|
|
||||||
|
|
||||||
|
|
||||||
## Joex
|
|
||||||
|
|
||||||
|
|
||||||
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
Environment variables can be used when there is no config file
|
|
||||||
supplied. The listing below shows all possible variables and their
|
|
||||||
default values.
|
|
||||||
|
|
||||||
{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
|
|
||||||
|
39
website/site/content/docs/configure/admin-endpoint.md
Normal file
39
website/site/content/docs/configure/admin-endpoint.md
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
+++
|
||||||
|
title = "Admin Endpoint"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 60
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Admin Endpoint
|
||||||
|
|
||||||
|
The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
|
||||||
|
for adminstration tasks. This is disabled by default and can be
|
||||||
|
enabled by providing a secret:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
...
|
||||||
|
admin-endpoint {
|
||||||
|
secret = "123"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This secret must be provided to all requests to a `/api/v1/admin/`
|
||||||
|
endpoint.
|
||||||
|
|
||||||
|
The most convenient way to execute admin tasks is to use the
|
||||||
|
[cli](@/docs/tools/cli.md). You get a list of possible admin commands
|
||||||
|
via `dsc admin help`.
|
||||||
|
|
||||||
|
To see the output of the commands, there are these ways:
|
||||||
|
|
||||||
|
1. looking at the joex logs, which gives most details.
|
||||||
|
2. Use the job-queue page when logged in as `docspell-system`
|
||||||
|
3. setup a [webhook](@/docs/webapp/notification.md) to be notified
|
||||||
|
when a job finishes. This way you get a small message.
|
||||||
|
|
||||||
|
All admin tasks (and also some other system tasks) are run under the
|
||||||
|
account `docspell-system` (collective and user). You need to create
|
||||||
|
this account and setup the notification hooks in there - not in your
|
||||||
|
normal account.
|
124
website/site/content/docs/configure/authentication.md
Normal file
124
website/site/content/docs/configure/authentication.md
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
+++
|
||||||
|
title = "Authentication"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 70
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
Authentication works in two ways:
|
||||||
|
|
||||||
|
- with an account-name / password pair
|
||||||
|
- with an authentication token
|
||||||
|
|
||||||
|
The initial authentication must occur with an accountname/password
|
||||||
|
pair. This will generate an authentication token which is valid for a
|
||||||
|
some time. Subsequent calls to secured routes can use this token. The
|
||||||
|
token can be given as a normal http header or via a cookie header.
|
||||||
|
|
||||||
|
These settings apply only to the REST server.
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
docspell.server.auth {
|
||||||
|
server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
|
||||||
|
session-valid = "5 minutes"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `server-secret` is used to sign the token. If multiple REST
|
||||||
|
servers are deployed, all must share the same server secret. Otherwise
|
||||||
|
tokens from one instance are not valid on another instance. The secret
|
||||||
|
can be given as Base64 encoded string or in hex form. Use the prefix
|
||||||
|
`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
|
||||||
|
of the string are used.
|
||||||
|
|
||||||
|
The `session-valid` determines how long a token is valid. This can be
|
||||||
|
just some minutes, the web application obtains new ones
|
||||||
|
periodically. So a rather short time is recommended.
|
||||||
|
|
||||||
|
## OpenID Connect / OAuth2
|
||||||
|
|
||||||
|
You can integrate Docspell into your SSO solution via [OpenID
|
||||||
|
Connect](https://openid.net/connect/) (OIDC). This requires to set up
|
||||||
|
an OpenID Provider (OP) somewhere and to configure Docspell
|
||||||
|
accordingly to act as the relying party.
|
||||||
|
|
||||||
|
You can define multiple OPs to use. For some examples, please see the
|
||||||
|
[default configuration](@/docs/configure/main.md#default-config).
|
||||||
|
|
||||||
|
The configuration of a provider highly depends on how it is setup.
|
||||||
|
Here is an example for a setup using
|
||||||
|
[keycloak](https://www.keycloak.org):
|
||||||
|
|
||||||
|
``` conf
|
||||||
|
provider = {
|
||||||
|
provider-id = "keycloak",
|
||||||
|
client-id = "docspell",
|
||||||
|
client-secret = "example-secret-439e-bf06-911e4cdd56a6",
|
||||||
|
scope = "profile", # scope is required for OIDC
|
||||||
|
authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
|
||||||
|
token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
|
||||||
|
#User URL is not used when signature key is set.
|
||||||
|
#user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
|
||||||
|
sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
|
||||||
|
sig-algo = "RS512"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `provider-id` is some identifier that is used in the URL to
|
||||||
|
distinguish between possibly multiple providers. The `client-id` and
|
||||||
|
`client-secret` define the two parameters required for a "confidential
|
||||||
|
client". The different URLs are best explained at the [keycloak
|
||||||
|
docs](https://www.keycloak.org/docs/latest/server_admin/).
|
||||||
|
They are available for all OPs in some way. The `user-url` is not
|
||||||
|
required, if the access token is already containing the necessary
|
||||||
|
data. If not, then docspell performs another request to the
|
||||||
|
`user-url`, which must be the user-info endpoint, to obtain the
|
||||||
|
required user data.
|
||||||
|
|
||||||
|
If the data is taken from the token directly and not via a request to
|
||||||
|
the user-info endpoint, then the token must be validated using the
|
||||||
|
given `sign-key` and `sig-algo`. These two values are then required to
|
||||||
|
specify! However, if the user-info endpoint should be used, then leave
|
||||||
|
the `sign-key` empty and specify the correct url in `user-url`. When
|
||||||
|
specifying the `sign-key` use a prefix of `b64:` if it is Base64
|
||||||
|
encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
|
||||||
|
are used, which is most probably not wanted for this setting.
|
||||||
|
|
||||||
|
Once the user is authenticated, docspell tries to setup an account and
|
||||||
|
does some checks. For this it must get to the username and collective
|
||||||
|
name somehow. How it does this, can be specified by the `user-key` and
|
||||||
|
`collective-key` settings:
|
||||||
|
|
||||||
|
``` conf
|
||||||
|
# The collective of the user is given in the access token as
|
||||||
|
# property `docspell_collective`.
|
||||||
|
collective-key = "lookup:docspell_collective",
|
||||||
|
# The username to use for the docspell account
|
||||||
|
user-key = "preferred_username"
|
||||||
|
```
|
||||||
|
|
||||||
|
The `user-key` is some string that is used to search the JSON response
|
||||||
|
from the OP for an object with that key. The search happens
|
||||||
|
recursively, so the field can be in a nested object. The found value
|
||||||
|
is used as the user name. Keycloak transmits the `preferred_username`
|
||||||
|
when asking for the `profile` scope. This can be used as the user
|
||||||
|
name.
|
||||||
|
|
||||||
|
The collective name can be obtained by different ways. For example,
|
||||||
|
you can instruct your OP (like keycloak) to provide a collective name
|
||||||
|
in the token and/or user-info responses. If you do this, then use the
|
||||||
|
`lookup:` prefix as in the example above. This instructs docspell to
|
||||||
|
search for a value the same way as the `user-key`. You can also set a
|
||||||
|
fixed collective, using `fixed:` prefix; in this case all users are in
|
||||||
|
the same collective! A third option is to prefix it with `account:` -
|
||||||
|
then the value that is looked up is interpreted as the full account
|
||||||
|
name, like `collective/user` and the `user-key` setting is ignored. If
|
||||||
|
you want to put each user in its own collective, you can just use the
|
||||||
|
same value as in `user-key`, only prefixed with `lookup:`. In the
|
||||||
|
example it would be `lookup:preferred_username`.
|
||||||
|
|
||||||
|
If you find that these methods do not suffice for your case, please
|
||||||
|
open an issue.
|
38
website/site/content/docs/configure/baseurl.md
Normal file
38
website/site/content/docs/configure/baseurl.md
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
+++
|
||||||
|
title = "Base URL"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 90
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
## Baseurl
|
||||||
|
|
||||||
|
The base url is an important setting that defines the http URL where
|
||||||
|
the corresponding component can be reached. It applies to both
|
||||||
|
components. For a joex component, the url must be resolvable from a
|
||||||
|
REST server component. The REST server also uses this url to create
|
||||||
|
absolute urls and to configure the authenication cookie.
|
||||||
|
|
||||||
|
By default it is build using the information from the `bind` setting,
|
||||||
|
which is `http://localhost:7880`.
|
||||||
|
|
||||||
|
If the default is not changed, docspell will use the request to
|
||||||
|
determine the base-url. It first inspects the `X-Forwarded-For` header
|
||||||
|
that is often used with reverse proxies. If that is not present, the
|
||||||
|
`Host` header of the request is used. However, if the `base-url`
|
||||||
|
setting is changed, then only this setting is used.
|
||||||
|
|
||||||
|
```
|
||||||
|
docspell.server.base-url = ...
|
||||||
|
docspell.joex.base-url = ...
|
||||||
|
```
|
||||||
|
|
||||||
|
If you are unsure, leave it at its default.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
docspell.server.baseurl = "https://docspell.example.com"
|
||||||
|
docspell.joex.baseurl = "http://192.168.101.10"
|
||||||
|
```
|
27
website/site/content/docs/configure/bind.md
Normal file
27
website/site/content/docs/configure/bind.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
+++
|
||||||
|
title = "Bind"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 12
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
## Bind
|
||||||
|
|
||||||
|
The host and port the http server binds to. This applies to both
|
||||||
|
components. The joex component also exposes a small REST api to
|
||||||
|
inspect its state and notify the scheduler.
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
docspell.server.bind {
|
||||||
|
address = localhost
|
||||||
|
port = 7880
|
||||||
|
}
|
||||||
|
docspell.joex.bind {
|
||||||
|
address = localhost
|
||||||
|
port = 7878
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, it binds to `localhost` and some predefined port. This
|
||||||
|
must be changed, if components are on different machines.
|
71
website/site/content/docs/configure/database.md
Normal file
71
website/site/content/docs/configure/database.md
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
+++
|
||||||
|
title = "Database"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Details about configuring the database."
|
||||||
|
weight = 20
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
|
||||||
|
# Database
|
||||||
|
|
||||||
|
The database holds by default all the data and must be configured
|
||||||
|
exactly the same on all nodes.
|
||||||
|
|
||||||
|
The following are supported DBs:
|
||||||
|
|
||||||
|
- PostgreSQL (recommended)
|
||||||
|
- MariaDB
|
||||||
|
- H2
|
||||||
|
|
||||||
|
This has to be specified for the rest server and joex. By default, a
|
||||||
|
H2 database in the current `/tmp` directory is configured.
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
The config looks like this (both components):
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
docspell.joex.jdbc {
|
||||||
|
url = ...
|
||||||
|
user = ...
|
||||||
|
password = ...
|
||||||
|
}
|
||||||
|
|
||||||
|
docspell.server.backend.jdbc {
|
||||||
|
url = ...
|
||||||
|
user = ...
|
||||||
|
password = ...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `url` is the connection to the database. It must start with
|
||||||
|
`jdbc`, followed by name of the database. The rest is specific to the
|
||||||
|
database used: it is either a path to a file for H2 or a host/database
|
||||||
|
url for MariaDB and PostgreSQL.
|
||||||
|
|
||||||
|
When using H2, the user and password can be chosen freely on first
|
||||||
|
start, but must stay the same on subsequent starts. Usually, the user
|
||||||
|
is `sa` and the password is left empty. Additionally, the url must
|
||||||
|
include these options:
|
||||||
|
|
||||||
|
```
|
||||||
|
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
PostgreSQL:
|
||||||
|
```
|
||||||
|
url = "jdbc:postgresql://localhost:5432/docspelldb"
|
||||||
|
```
|
||||||
|
|
||||||
|
MariaDB:
|
||||||
|
```
|
||||||
|
url = "jdbc:mariadb://localhost:3306/docspelldb"
|
||||||
|
```
|
||||||
|
|
||||||
|
H2
|
||||||
|
```
|
||||||
|
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
|
||||||
|
```
|
147
website/site/content/docs/configure/file-backends.md
Normal file
147
website/site/content/docs/configure/file-backends.md
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
+++
|
||||||
|
title = "File Backends"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 30
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
## File Backends
|
||||||
|
|
||||||
|
Docspell allows to choose from different storage backends for binary
|
||||||
|
files. You can choose between:
|
||||||
|
|
||||||
|
1. *Database (the recommended default)*
|
||||||
|
|
||||||
|
The database can be used to store the files as well. It is the
|
||||||
|
default. It doesn't require any other configuration and works well
|
||||||
|
with multiple instances of restservers and joex nodes.
|
||||||
|
2. *S3*
|
||||||
|
|
||||||
|
The S3 backend allows to store files in an S3 compatible storage.
|
||||||
|
It was tested with MinIO, which is possible to self host.
|
||||||
|
|
||||||
|
3. *Filesystem*
|
||||||
|
|
||||||
|
The filesystem can also be used directly, by specifying a
|
||||||
|
directory. Be aware that _all_ nodes must have read and write
|
||||||
|
access into this directory! When running multiple nodes over a
|
||||||
|
network, consider using one of the above instead. Docspell uses a
|
||||||
|
fixed structure for storing the files below the given directory, it
|
||||||
|
cannot be configured.
|
||||||
|
|
||||||
|
When using S3 or filesystem, remember to backup the database *and* the
|
||||||
|
files!
|
||||||
|
|
||||||
|
Note that Docspell not only stores the file that are uploaded, but
|
||||||
|
also some other files for internal use.
|
||||||
|
|
||||||
|
### Configuring
|
||||||
|
|
||||||
|
{% warningbubble(title="Note") %}
|
||||||
|
|
||||||
|
Each node must have the same config for its file backend! When using
|
||||||
|
the filesystem, make sure all processes can access the directory with
|
||||||
|
read and write permissions.
|
||||||
|
|
||||||
|
{% end %}
|
||||||
|
|
||||||
|
The file storage backend can be configured inside the `files` section
|
||||||
|
(see the [default configs](@/docs/configure/main.md#default-config)):
|
||||||
|
|
||||||
|
```conf
|
||||||
|
files {
|
||||||
|
…
|
||||||
|
default-store = "database"
|
||||||
|
|
||||||
|
stores = {
|
||||||
|
database =
|
||||||
|
{ enabled = true
|
||||||
|
type = "default-database"
|
||||||
|
}
|
||||||
|
|
||||||
|
filesystem =
|
||||||
|
{ enabled = false
|
||||||
|
type = "file-system"
|
||||||
|
directory = "/some/directory"
|
||||||
|
}
|
||||||
|
|
||||||
|
minio =
|
||||||
|
{ enabled = false
|
||||||
|
type = "s3"
|
||||||
|
endpoint = "http://localhost:9000"
|
||||||
|
access-key = "username"
|
||||||
|
secret-key = "password"
|
||||||
|
bucket = "docspell"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `stores` object defines a set of stores and the `default-store`
|
||||||
|
selects the one that should be used. All disabled store configurations
|
||||||
|
are removed from the list. Thus the `default-store` must be enabled.
|
||||||
|
Other enabled stores can be used as the target when copying files (see
|
||||||
|
below).
|
||||||
|
|
||||||
|
A store configuration requires a `enabled` and `type` property.
|
||||||
|
Depending on the `type` property, other properties are required, they
|
||||||
|
are presented above. The available storage types are
|
||||||
|
`default-database`, `file-system` and `s3`.
|
||||||
|
|
||||||
|
If you use the docker setup, you can find the corresponding
|
||||||
|
environment variables to the above config snippet
|
||||||
|
[below](#environment-variables).
|
||||||
|
|
||||||
|
### Change Backends
|
||||||
|
|
||||||
|
It is possible to change backends with a bit of manual effort. When
|
||||||
|
doing this, please make sure that the application is not used. It is
|
||||||
|
important that no file is uploaded during the following steps.
|
||||||
|
|
||||||
|
The [cli](@/docs/tools/cli.md) will be used, please set it up first
|
||||||
|
and you need to enable the [admin endpoint](#admin-endpoint). Config
|
||||||
|
changes mentioned here must be applied to all nodes - joex and
|
||||||
|
restserver!
|
||||||
|
|
||||||
|
1. In the config, enable a second file backend (besides the default)
|
||||||
|
you want to change to and start docspell as normal. Don't change
|
||||||
|
`default-store` yet.
|
||||||
|
2. Run the file integrity check in order to see whether all files are
|
||||||
|
ok as they are in the current store. This can be done using the
|
||||||
|
[cli](@/docs/tools/cli.md) by running:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dsc admin file-integrity-check
|
||||||
|
```
|
||||||
|
3. Run the copy files admin command which will copy all files from the
|
||||||
|
current `default-store` to all other enabled stores.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dsc admin clone-file-repository
|
||||||
|
```
|
||||||
|
|
||||||
|
And wait until it's done :-). You can see the progress in the jobs
|
||||||
|
page when logged in as `docspell-system` or just look at the logs.
|
||||||
|
4. In the config, change the `default-store` to the one you just
|
||||||
|
copied all the files to and restart docspell.
|
||||||
|
5. Login and do some smoke tests. Then run the file integrity check
|
||||||
|
again:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dsc admin file-integrity-check
|
||||||
|
```
|
||||||
|
|
||||||
|
If all is fine, then you are done and are now using the new file
|
||||||
|
backend. If the second integrity check fails, please open an issue.
|
||||||
|
You need then to revert the config change of step 4 to use the
|
||||||
|
previous `default-store` again.
|
||||||
|
|
||||||
|
If you want to delete the files from the database, you can do so by
|
||||||
|
running the following SQL against the database:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DELETE FROM filechunk
|
||||||
|
```
|
||||||
|
|
||||||
|
You can copy them back into the database using the steps above.
|
122
website/site/content/docs/configure/file-processing.md
Normal file
122
website/site/content/docs/configure/file-processing.md
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
+++
|
||||||
|
title = "File Processing"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 40
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
## File Processing
|
||||||
|
|
||||||
|
Files are being processed by the joex component. So all the respective
|
||||||
|
configuration is in this config only.
|
||||||
|
|
||||||
|
File processing involves several stages, detailed information can be
|
||||||
|
found [here](@/docs/joex/file-processing.md#text-analysis) and in the
|
||||||
|
corresponding sections in [joex default
|
||||||
|
config](@/docs/configure/main.md#joex).
|
||||||
|
|
||||||
|
Configuration allows to define the external tools and set some
|
||||||
|
limitations to control memory usage. The sections are:
|
||||||
|
|
||||||
|
- `docspell.joex.extraction`
|
||||||
|
- `docspell.joex.text-analysis`
|
||||||
|
- `docspell.joex.convert`
|
||||||
|
|
||||||
|
Options to external commands can use variables that are replaced by
|
||||||
|
values at runtime. Variables are enclosed in double braces `{{…}}`.
|
||||||
|
Please see the default configuration for what variables exist per
|
||||||
|
command.
|
||||||
|
|
||||||
|
### Classification
|
||||||
|
|
||||||
|
In `text-analysis.classification` you can define how many documents at
|
||||||
|
most should be used for learning. The default settings should work
|
||||||
|
well for most cases. However, it always depends on the amount of data
|
||||||
|
and the machine that runs joex. For example, by default the documents
|
||||||
|
to learn from are limited to 600 (`classification.item-count`) and
|
||||||
|
every text is cut after 5000 characters (`text-analysis.max-length`).
|
||||||
|
This is fine if *most* of your documents are small and only a few are
|
||||||
|
near 5000 characters). But if *all* your documents are very large, you
|
||||||
|
probably need to either assign more heap memory or go down with the
|
||||||
|
limits.
|
||||||
|
|
||||||
|
Classification can be disabled, too, for when it's not needed.
|
||||||
|
|
||||||
|
### NLP
|
||||||
|
|
||||||
|
This setting defines which NLP mode to use. It defaults to `full`,
|
||||||
|
which requires more memory for certain languages (with the advantage
|
||||||
|
of better results). Other values are `basic`, `regexonly` and
|
||||||
|
`disabled`. The modes `full` and `basic` use pre-defined lanugage
|
||||||
|
models for procesing documents of languaes German, English, French and
|
||||||
|
Spanish. These require some amount of memory (see below).
|
||||||
|
|
||||||
|
The mode `basic` is like the "light" variant to `full`. It doesn't use
|
||||||
|
all NLP features, which makes memory consumption much lower, but comes
|
||||||
|
with the compromise of less accurate results.
|
||||||
|
|
||||||
|
The mode `regexonly` doesn't use pre-defined lanuage models, even if
|
||||||
|
available. It checks your address book against a document to find
|
||||||
|
metadata. That means, it is language independent. Also, when using
|
||||||
|
`full` or `basic` with lanugages where no pre-defined models exist, it
|
||||||
|
will degrade to `regexonly` for these.
|
||||||
|
|
||||||
|
The mode `disabled` skips NLP processing completely. This has least
|
||||||
|
impact in memory consumption, obviously, but then only the classifier
|
||||||
|
is used to find metadata (unless it is disabled, too).
|
||||||
|
|
||||||
|
You might want to try different modes and see what combination suits
|
||||||
|
best your usage pattern and machine running joex. If a powerful
|
||||||
|
machine is used, simply leave the defaults. When running on an
|
||||||
|
raspberry pi, for example, you might need to adjust things.
|
||||||
|
|
||||||
|
### Memory Usage
|
||||||
|
|
||||||
|
The memory requirements for the joex component depends on the document
|
||||||
|
language and the enabled features for text-analysis. The `nlp.mode`
|
||||||
|
setting has significant impact, especially when your documents are in
|
||||||
|
German. Here are some rough numbers on jvm heap usage (the same file
|
||||||
|
was used for all tries):
|
||||||
|
|
||||||
|
<table class="striped-basic">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>nlp.mode</th>
|
||||||
|
<th>English</th>
|
||||||
|
<th>German</th>
|
||||||
|
<th>French</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tfoot>
|
||||||
|
</tfoot>
|
||||||
|
<tbody>
|
||||||
|
<tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
|
||||||
|
<tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Note that these are only rough numbers and they show the maximum used
|
||||||
|
heap memory while processing a file.
|
||||||
|
|
||||||
|
When using `mode=full`, a heap setting of at least `-Xmx1400M` is
|
||||||
|
recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
|
||||||
|
recommended.
|
||||||
|
|
||||||
|
Other languages can't use these two modes, and so don't require this
|
||||||
|
amount of memory (but don't have as good results). Then you can go
|
||||||
|
with less heap. For these languages, the nlp mode is the same as
|
||||||
|
`regexonly`.
|
||||||
|
|
||||||
|
Training the classifier is also memory intensive, which solely depends
|
||||||
|
on the size and number of documents that are being trained. However,
|
||||||
|
training the classifier is done periodically and can happen maybe
|
||||||
|
every two weeks. When classifying new documents, memory requirements
|
||||||
|
are lower, since the model already exists.
|
||||||
|
|
||||||
|
More details about these modes can be found
|
||||||
|
[here](@/docs/joex/file-processing.md#text-analysis).
|
||||||
|
|
||||||
|
|
||||||
|
The restserver component is very lightweight, here you can use
|
||||||
|
defaults.
|
176
website/site/content/docs/configure/fulltext-search.md
Normal file
176
website/site/content/docs/configure/fulltext-search.md
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
+++
|
||||||
|
title = "Full-Text Search"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Details about configuring the fulltext search."
|
||||||
|
weight = 50
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
|
||||||
|
# Full-Text Search
|
||||||
|
|
||||||
|
Fulltext search is optional and provided by external systems. There
|
||||||
|
are currently [Apache SOLR](https://solr.apache.org) and [PostgreSQL's
|
||||||
|
text search](https://www.postgresql.org/docs/14/textsearch.html)
|
||||||
|
available.
|
||||||
|
|
||||||
|
You can enable and configure the fulltext search backends as described
|
||||||
|
below and then choose the backend:
|
||||||
|
|
||||||
|
```conf
|
||||||
|
full-text-search {
|
||||||
|
enabled = true
|
||||||
|
# Which backend to use, either solr or postgresql
|
||||||
|
backend = "solr"
|
||||||
|
…
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
All docspell components must provide the same fulltext search
|
||||||
|
configuration.
|
||||||
|
|
||||||
|
|
||||||
|
## SOLR
|
||||||
|
|
||||||
|
[Apache SOLR](https://solr.apache.org) can be used to provide the
|
||||||
|
full-text search. This is defined in the `full-text-search.solr`
|
||||||
|
subsection:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
...
|
||||||
|
full-text-search {
|
||||||
|
...
|
||||||
|
solr = {
|
||||||
|
url = "http://localhost:8983/solr/docspell"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The default configuration at the end of this page contains more
|
||||||
|
information about each setting.
|
||||||
|
|
||||||
|
The `solr.url` is the mandatory setting that you need to change to
|
||||||
|
point to your SOLR instance. Then you need to set the `enabled` flag
|
||||||
|
to `true`.
|
||||||
|
|
||||||
|
When installing docspell manually, just install solr and create a core
|
||||||
|
as described in the [solr
|
||||||
|
documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
|
||||||
|
That will provide you with the connection url (the last part is the
|
||||||
|
core name). If Docspell detects an empty core it will run a schema
|
||||||
|
setup on start automatically.
|
||||||
|
|
||||||
|
The `full-text-search.solr` options are the same for joex and the
|
||||||
|
restserver.
|
||||||
|
|
||||||
|
Sometimes it is necessary to re-create the entire index, for example
|
||||||
|
if you upgrade SOLR or delete the core to provide a new one (see
|
||||||
|
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
|
||||||
|
details). Another way is to restart docspell (while clearing the
|
||||||
|
index). If docspell detects an empty index at startup, it will submit
|
||||||
|
a task to build the index automatically.
|
||||||
|
|
||||||
|
Note that a collective can also re-index their data using a similiar
|
||||||
|
endpoint; but this is only deleting their data and doesn't do a full
|
||||||
|
re-index.
|
||||||
|
|
||||||
|
The solr index doesn't contain any new information, it can be
|
||||||
|
regenerated any time using the above REST call. Thus it doesn't need
|
||||||
|
to be backed up.
|
||||||
|
|
||||||
|
|
||||||
|
## PostgreSQL
|
||||||
|
|
||||||
|
PostgreSQL provides many additional features, one of them is [text
|
||||||
|
search](https://www.postgresql.org/docs/14/textsearch.html). Docspell
|
||||||
|
can utilize this to provide the fulltext search feature. This is
|
||||||
|
especially useful, if PostgreSQL is used as the primary database for
|
||||||
|
docspell.
|
||||||
|
|
||||||
|
You can choose to use the same database or separate connection. The
|
||||||
|
fulltext search will create a single table `ftspsql_search` that holds
|
||||||
|
all necessary data. When doing backups, you can exclude this table as
|
||||||
|
it can be recreated from the primary data any time.
|
||||||
|
|
||||||
|
The configuration is placed inside `full-text-search`:
|
||||||
|
|
||||||
|
```conf
|
||||||
|
full-text-search {
|
||||||
|
…
|
||||||
|
postgresql = {
|
||||||
|
use-default-connection = false
|
||||||
|
|
||||||
|
jdbc {
|
||||||
|
url = "jdbc:postgresql://server:5432/db"
|
||||||
|
user = "pguser"
|
||||||
|
password = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
pg-config = {
|
||||||
|
}
|
||||||
|
pg-query-parser = "websearch_to_tsquery"
|
||||||
|
pg-rank-normalization = [ 4 ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The flag `use-default-connection` can be set to `true` if you use
|
||||||
|
PostgreSQL as the primary db to have it also used for the fulltext
|
||||||
|
search. If set to `false`, the subsequent `jdbc` block defines the
|
||||||
|
connection to the postgres database to use.
|
||||||
|
|
||||||
|
It follows some settings to tune PostgreSQL's text search feature.
|
||||||
|
Please visit [their
|
||||||
|
documentation](https://www.postgresql.org/docs/14/textsearch.html) for
|
||||||
|
all the details.
|
||||||
|
|
||||||
|
- `pg-config`: this is an optional mapping from document languages as
|
||||||
|
used in Docspell to a PostgreSQL text search configuration. Not all
|
||||||
|
languages are equally well supported out of the box. You can create
|
||||||
|
your own text search config in PostgreSQL and then define it in this
|
||||||
|
map for your language. For example:
|
||||||
|
|
||||||
|
```conf
|
||||||
|
pg-config = {
|
||||||
|
english = "my-english"
|
||||||
|
german = "my-german"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, the predefined configs are used for some lanugages and
|
||||||
|
otherwise fallback to `simple`.
|
||||||
|
|
||||||
|
*If you change this setting, you must re-index everything.*
|
||||||
|
- `pg-query-parser`: the parser applied to the fulltext query. By
|
||||||
|
default it is `websearch_to_tsquery`. (relevant [doc
|
||||||
|
link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES))
|
||||||
|
- `pg-rank-normalization`: this is used to tweak rank calculation that
|
||||||
|
affects the order of the elements returned from a query. It is an
|
||||||
|
array of numbers out of `1`, `2`, `4`, `8`, `16` or `32`. (relevant
|
||||||
|
[doc
|
||||||
|
link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING))
|
||||||
|
|
||||||
|
|
||||||
|
# Re-create the index
|
||||||
|
|
||||||
|
There is an [admin route](@/docs/api/intro.md#admin) that allows to
|
||||||
|
re-create the entire index (for all collectives). This is possible via
|
||||||
|
a call:
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
|
||||||
|
```
|
||||||
|
|
||||||
|
or use the [cli](@/docs/tools/cli.md):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dsc admin -a test123 recreate-index
|
||||||
|
```
|
||||||
|
|
||||||
|
Here the `test123` is the key defined with `admin-endpoint.secret`. If
|
||||||
|
it is empty (the default), this call is disabled (all admin routes).
|
||||||
|
Otherwise, the POST request will submit a system task that is executed
|
||||||
|
by a joex instance eventually.
|
||||||
|
|
||||||
|
Using this endpoint, the entire index (including the schema) will be
|
||||||
|
re-created.
|
192
website/site/content/docs/configure/main.md
Normal file
192
website/site/content/docs/configure/main.md
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
+++
|
||||||
|
title = "Main"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 10
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
|
||||||
|
Docspell's executables (restserver and joex) can take one argument – a
|
||||||
|
configuration file. If that is not given, the defaults are used,
|
||||||
|
overriden by environment variables. A config file overrides default
|
||||||
|
values, so only values that differ from the defaults are necessary.
|
||||||
|
The complete default options and their documentation is at the end of
|
||||||
|
this page.
|
||||||
|
|
||||||
|
Besides the config file, another way is to provide individual settings
|
||||||
|
via key-value pairs to the executable by the `-D` option. For example
|
||||||
|
to override only `base-url` you could add the argument
|
||||||
|
`-Ddocspell.server.base-url=…` to the command. Multiple options are
|
||||||
|
possible. For more than few values this is very tedious, obviously, so
|
||||||
|
the recommended way is to maintain a config file. If these options
|
||||||
|
*and* a file is provded, then any setting given via the `-D…` option
|
||||||
|
overrides the same setting from the config file.
|
||||||
|
|
||||||
|
At last, it is possible to configure docspell via environment
|
||||||
|
variables if there is no config file supplied (if a config file *is*
|
||||||
|
supplied, it is always preferred). Note that this approach is limited,
|
||||||
|
as arrays are not supported. A list of environment variables can be
|
||||||
|
found at the [end of this page](#environment-variables). The
|
||||||
|
environment variable name follows the corresponding config key - where
|
||||||
|
dots are replaced by underscores and dashes are replaced by two
|
||||||
|
underscores. For example, the config key `docspell.server.app-name`
|
||||||
|
can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
|
||||||
|
|
||||||
|
It is also possible to specify environment variables inside a config
|
||||||
|
file (to get a mix of both) - please see the [documentation of the
|
||||||
|
config library](https://github.com/lightbend/config#standard-behavior)
|
||||||
|
for more on this.
|
||||||
|
|
||||||
|
# File Format
|
||||||
|
|
||||||
|
The format of the configuration files can be
|
||||||
|
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
|
||||||
|
JSON or what this [config
|
||||||
|
library](https://github.com/lightbend/config) understands. The default
|
||||||
|
values below are in HOCON format, which is recommended, since it
|
||||||
|
allows comments and has some [advanced
|
||||||
|
features](https://github.com/lightbend/config#features-of-hocon).
|
||||||
|
Please also see their documentation for more details.
|
||||||
|
|
||||||
|
A short description (please check the links for better understanding):
|
||||||
|
The config consists of key-value pairs and can be written in a
|
||||||
|
JSON-like format (called HOCON). Keys are organized in trees, and a
|
||||||
|
key defines a full path into the tree. There are two ways:
|
||||||
|
|
||||||
|
```
|
||||||
|
a.b.c.d=15
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```
|
||||||
|
a {
|
||||||
|
b {
|
||||||
|
c {
|
||||||
|
d = 15
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Both are exactly the same and these forms are both used at the same
|
||||||
|
time. Usually the braces approach is used to group some more settings,
|
||||||
|
for better readability.
|
||||||
|
|
||||||
|
Strings that contain "not-so-common" characters should be enclosed in
|
||||||
|
quotes. It is possible to define values at the top of the file and
|
||||||
|
reuse them on different locations via the `${full.path.to.key}`
|
||||||
|
syntax. When using these variables, they *must not* be enclosed in
|
||||||
|
quotes.
|
||||||
|
|
||||||
|
|
||||||
|
# Config Options
|
||||||
|
|
||||||
|
The configuration of both components uses separate namespaces. The
|
||||||
|
configuration for the REST server is below `docspell.server`, while
|
||||||
|
the one for joex is below `docspell.joex`.
|
||||||
|
|
||||||
|
You can therefore use two separate config files or one single file
|
||||||
|
containing both namespaces.
|
||||||
|
|
||||||
|
## App-id
|
||||||
|
|
||||||
|
The `app-id` is the identifier of the corresponding instance. It *must
|
||||||
|
be unique* for all instances. By default the REST server uses `rest1`
|
||||||
|
and joex `joex1`. It is recommended to overwrite this setting to have
|
||||||
|
an explicit and stable identifier should multiple instances are
|
||||||
|
intended.
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
docspell.server.app-id = "rest1"
|
||||||
|
docspell.joex.app-id = "joex1"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Other options
|
||||||
|
|
||||||
|
Please see the menu on the left for details about specific
|
||||||
|
configuration options.
|
||||||
|
|
||||||
|
# JVM Options
|
||||||
|
|
||||||
|
The start scripts support some options to configure the JVM. One often
|
||||||
|
used setting is the maximum heap size of the JVM. By default, java
|
||||||
|
determines it based on properties of the current machine. You can
|
||||||
|
specify it by given java startup options to the command:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
This would limit the maximum heap to 1GB. The double slash separates
|
||||||
|
internal options and the arguments to the program. Another frequently
|
||||||
|
used option is to change the default temp directory. Usually it is
|
||||||
|
`/tmp`, but it may be desired to have a dedicated temp directory,
|
||||||
|
which can be configured:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
The command:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ ./docspell-restserver*/bin/docspell-restserver -h
|
||||||
|
```
|
||||||
|
|
||||||
|
gives an overview of supported options.
|
||||||
|
|
||||||
|
It is recommended to run joex with the G1GC enabled. If you use java8,
|
||||||
|
you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
|
||||||
|
this is not necessary (but doesn't hurt either). This could look like
|
||||||
|
this:
|
||||||
|
|
||||||
|
```
|
||||||
|
./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
Using these options you can define how much memory the JVM process is
|
||||||
|
able to use. This might be necessary to adopt depending on the usage
|
||||||
|
scenario and configured text analysis features.
|
||||||
|
|
||||||
|
Please have a look at the corresponding
|
||||||
|
[section](@/docs/configure/file-processing.md#memory-usage).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
|
||||||
|
By default, docspell logs to stdout. This works well, when managed by
|
||||||
|
systemd or other inits. Logging can be configured in the configuration
|
||||||
|
file or via environment variables. There are only two settings:
|
||||||
|
|
||||||
|
- `minimum-level` specifies the log level to control the verbosity.
|
||||||
|
Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
|
||||||
|
*Error*
|
||||||
|
- `format` this defines how the logs are formatted. There are two
|
||||||
|
formats for humans: *Plain* and *Fancy*. And two more suited for
|
||||||
|
machine consumption: *Json* and *Logfmt*. The *Json* format contains
|
||||||
|
all details, while the others may omit some for readability
|
||||||
|
|
||||||
|
These settings are the same for joex and the restserver component.
|
||||||
|
|
||||||
|
# Default Config
|
||||||
|
## Rest Server
|
||||||
|
|
||||||
|
{{ incl_conf(path="templates/shortcodes/server.conf") }}
|
||||||
|
|
||||||
|
|
||||||
|
## Joex
|
||||||
|
|
||||||
|
|
||||||
|
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Environment variables can be used when there is no config file
|
||||||
|
supplied. The listing below shows all possible variables and their
|
||||||
|
default values.
|
||||||
|
|
||||||
|
{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
|
44
website/site/content/docs/configure/registration.md
Normal file
44
website/site/content/docs/configure/registration.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
+++
|
||||||
|
title = "Registration"
|
||||||
|
insert_anchor_links = "right"
|
||||||
|
description = "Describes the configuration file and shows all default settings."
|
||||||
|
weight = 80
|
||||||
|
template = "docs.html"
|
||||||
|
+++
|
||||||
|
|
||||||
|
# Registration Options
|
||||||
|
|
||||||
|
This defines if and how new users can create accounts. There are 3
|
||||||
|
options:
|
||||||
|
|
||||||
|
- *closed* no new user can sign up
|
||||||
|
- *open* new users can sign up
|
||||||
|
- *invite* new users can sign up but require an invitation key
|
||||||
|
|
||||||
|
This applies only to the REST sevrer component.
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
docspell.server.backend.signup {
|
||||||
|
mode = "open"
|
||||||
|
|
||||||
|
# If mode == 'invite', a password must be provided to generate
|
||||||
|
# invitation keys. It must not be empty.
|
||||||
|
new-invite-password = ""
|
||||||
|
|
||||||
|
# If mode == 'invite', this is the period an invitation token is
|
||||||
|
# considered valid.
|
||||||
|
invite-time = "3 days"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The mode `invite` is intended to open the application only to some
|
||||||
|
users. The admin can create these invitation keys and distribute them
|
||||||
|
to the desired people. For this, the `new-invite-password` must be
|
||||||
|
given. The idea is that only the person who installs docspell knows
|
||||||
|
this. If it is not set, then invitation won't work. New invitation
|
||||||
|
keys can be generated from within the web application or via REST
|
||||||
|
calls (using `curl`, for example).
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
|
||||||
|
```
|
@ -14,7 +14,8 @@ template = "docs.html"
|
|||||||
- Handle multiple documents as one unit
|
- Handle multiple documents as one unit
|
||||||
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
|
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
|
||||||
- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
|
- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
|
||||||
on [Apache SOLR](https://solr.apache.org)
|
on [Apache SOLR](https://solr.apache.org) or [PostgreSQL's text
|
||||||
|
search](https://www.postgresql.org/docs/14/textsearch.html)
|
||||||
- Conversion to PDF: all files are converted into a PDF file. PDFs
|
- Conversion to PDF: all files are converted into a PDF file. PDFs
|
||||||
with only images (as often returned from scanners) are converted
|
with only images (as often returned from scanners) are converted
|
||||||
into searchable PDF/A pdfs.
|
into searchable PDF/A pdfs.
|
||||||
@ -36,7 +37,8 @@ template = "docs.html"
|
|||||||
[REST Api](@/docs/api/_index.md); allows to [generate
|
[REST Api](@/docs/api/_index.md); allows to [generate
|
||||||
clients](https://openapi-generator.tech/docs/generators) for many
|
clients](https://openapi-generator.tech/docs/generators) for many
|
||||||
languages
|
languages
|
||||||
- [OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2)
|
- [OpenID
|
||||||
|
Connect](@/docs/configure/authentication.md#openid-connect-oauth2)
|
||||||
support allows Docspell to integrate into your SSO setup, for
|
support allows Docspell to integrate into your SSO setup, for
|
||||||
example with keycloak.
|
example with keycloak.
|
||||||
- Two-Factor Authentication using [TOTP](@/docs/webapp/totp.md) built
|
- Two-Factor Authentication using [TOTP](@/docs/webapp/totp.md) built
|
||||||
|
@ -74,9 +74,10 @@ $ ./docspell-joex*/bin/docspell-joex
|
|||||||
```
|
```
|
||||||
|
|
||||||
This will startup both components using the default configuration.
|
This will startup both components using the default configuration.
|
||||||
Please refer to the [configuration page](@/docs/configure/_index.md)
|
Please refer to the [configuration
|
||||||
for how to create a custom config file. Once you have your config
|
page](@/docs/configure/main.md) for how to create a custom
|
||||||
file, simply pass it as argument to the command:
|
config file. Once you have your config file, simply pass it as
|
||||||
|
argument to the command:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
|
$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
|
||||||
@ -110,7 +111,7 @@ Fulltext search is powered by [SOLR](https://solr.apache.org). You
|
|||||||
need to install solr and create a core for docspell. Then cange the
|
need to install solr and create a core for docspell. Then cange the
|
||||||
solr url for both components (restserver and joex) accordingly. See
|
solr url for both components (restserver and joex) accordingly. See
|
||||||
the relevant section in the [config
|
the relevant section in the [config
|
||||||
page](@/docs/configure/_index.md#full-text-search-solr).
|
page](@/docs/configure/fulltext-search.md).
|
||||||
|
|
||||||
|
|
||||||
### Watching a directory
|
### Watching a directory
|
||||||
|
@ -102,7 +102,7 @@ When using H2, make sure that all components access the same database
|
|||||||
– the jdbc url must point to the same file. Then, it is important to
|
– the jdbc url must point to the same file. Then, it is important to
|
||||||
add the options
|
add the options
|
||||||
`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
|
`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
|
||||||
of the url. See the [config page](@/docs/configure/_index.md#jdbc) for
|
of the url. See the [config page](@/docs/configure/database.md) for
|
||||||
an example.
|
an example.
|
||||||
|
|
||||||
For large installations, PostgreSQL or MariaDB is recommended. Create
|
For large installations, PostgreSQL or MariaDB is recommended. Create
|
||||||
|
@ -30,10 +30,10 @@ result in long processing times for OCR and text analysis. The board
|
|||||||
should provide 4G of RAM (like the current RPi4), especially if also a
|
should provide 4G of RAM (like the current RPi4), especially if also a
|
||||||
database and solr are running next to it. The memory required by joex
|
database and solr are running next to it. The memory required by joex
|
||||||
depends on the config and document language. Please pick a value that
|
depends on the config and document language. Please pick a value that
|
||||||
suits your setup from [here](@/docs/configure/_index.md#memory-usage).
|
suits your setup from
|
||||||
For boards like the RPi, it might be necessary to use
|
[here](@/docs/configure/file-processing.md#memory-usage). For boards
|
||||||
`nlp.mode=basic`, rather than `nlp.mode=full`. You should also set the
|
like the RPi, it might be necessary to use `nlp.mode=basic`, rather
|
||||||
joex pool size to 1.
|
than `nlp.mode=full`. You should also set the joex pool size to 1.
|
||||||
|
|
||||||
An example: on this [UP
|
An example: on this [UP
|
||||||
board](https://up-board.org/up/specifications/) with an Intel Atom
|
board](https://up-board.org/up/specifications/) with an Intel Atom
|
||||||
|
@ -80,7 +80,7 @@ line are required. As you see for `wkhtmltopdf` the page size is fixed
|
|||||||
to DIN A4. Other commands are configured like this as well.
|
to DIN A4. Other commands are configured like this as well.
|
||||||
|
|
||||||
For the default values, please see the [configuration
|
For the default values, please see the [configuration
|
||||||
page](@/docs/configure/_index.md#joex).
|
page](@/docs/configure/main.md#joex).
|
||||||
|
|
||||||
## Duplicate Check
|
## Duplicate Check
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ For larger installations, it is probably better to run several joex
|
|||||||
components on different machines. That works out of the box, as long
|
components on different machines. That works out of the box, as long
|
||||||
as all components point to the same database and use different
|
as all components point to the same database and use different
|
||||||
`app-id`s (see [configuring
|
`app-id`s (see [configuring
|
||||||
docspell](@/docs/configure/_index.md#app-id)).
|
docspell](@/docs/configure/main.md#app-id)).
|
||||||
|
|
||||||
When files are submitted to docspell, they are stored in the database
|
When files are submitted to docspell, they are stored in the database
|
||||||
and all known joex components are notified about new work. Then they
|
and all known joex components are notified about new work. Then they
|
||||||
|
@ -323,8 +323,8 @@ full detail.
|
|||||||
These are a set of commands that simply call a route at the server to
|
These are a set of commands that simply call a route at the server to
|
||||||
submit a maintenance task or to reset the password of some user. These
|
submit a maintenance task or to reset the password of some user. These
|
||||||
commands require the [admin
|
commands require the [admin
|
||||||
secret](@/docs/configure/_index.md#admin-endpoint) either in the
|
secret](@/docs/configure/admin-endpoint.md) either in the config file
|
||||||
config file or as an argument.
|
or as an argument.
|
||||||
|
|
||||||
### Reset user password
|
### Reset user password
|
||||||
|
|
||||||
|
@ -19,15 +19,15 @@ _UI Settings_. Among other things, there is a _Item Cards_ section:
|
|||||||
This defines how many of the item notes to display in the card. You
|
This defines how many of the item notes to display in the card. You
|
||||||
can set it to `0` to not show any notes at all. This is only a "soft
|
can set it to `0` to not show any notes at all. This is only a "soft
|
||||||
limit", there is also a "hard limit" in [docspell's
|
limit", there is also a "hard limit" in [docspell's
|
||||||
configuration](@/docs/configure/_index.md#rest-server) (see `max-note-length`),
|
configuration](@/docs/configure/main.md#rest-server) (see
|
||||||
that is an upper limit to this value.
|
`max-note-length`), that is an upper limit to this value.
|
||||||
|
|
||||||
### Size of item preview
|
### Size of item preview
|
||||||
|
|
||||||
The item preview is an image of the first page of the first
|
The item preview is an image of the first page of the first
|
||||||
attachment. You can change the order of attachments in the item detail
|
attachment. You can change the order of attachments in the item detail
|
||||||
view. This image has a predefined size, which is specified [docspell's
|
view. This image has a predefined size, which is specified [docspell's
|
||||||
configuration](@/docs/configure/_index.md#joex) (see
|
configuration](@/docs/configure/main.md#joex) (see
|
||||||
`extraction.preview.dpi`). The size for displaying it, can be
|
`extraction.preview.dpi`). The size for displaying it, can be
|
||||||
specified via this setting. A _small_ preview uses about 80px width, a
|
specified via this setting. A _small_ preview uses about 80px width, a
|
||||||
_medium_ one 160px and _large_ means to use the available space in the
|
_medium_ one 160px and _large_ means to use the available space in the
|
||||||
|
@ -191,9 +191,9 @@ file to look for duplicates, too.
|
|||||||
|
|
||||||
Docspell will go through all folders and download mails in “batches”.
|
Docspell will go through all folders and download mails in “batches”.
|
||||||
This size can be set by the admin in the [configuration
|
This size can be set by the admin in the [configuration
|
||||||
file](@/docs/configure/_index.md#joex) and applies to all these tasks
|
file](@/docs/configure/main.md#joex) and applies to all these
|
||||||
(same for all users). This batch only contains the mail headers and
|
tasks (same for all users). This batch only contains the mail headers
|
||||||
not the complete mail.
|
and not the complete mail.
|
||||||
|
|
||||||
Then each mail is downloaded completely one by one and converted into
|
Then each mail is downloaded completely one by one and converted into
|
||||||
an [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) file
|
an [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) file
|
||||||
|
@ -11,7 +11,8 @@ Docspell has built-in support for two-factor (2FA) authentication
|
|||||||
using
|
using
|
||||||
[TOTP](https://en.wikipedia.org/wiki/Time-based_One-Time_Password)s.
|
[TOTP](https://en.wikipedia.org/wiki/Time-based_One-Time_Password)s.
|
||||||
For anything more, consider a dedicated account management tool and
|
For anything more, consider a dedicated account management tool and
|
||||||
[OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2).
|
[OpenID
|
||||||
|
Connect](@/docs/configure/authentication.md#openid-connect-oauth2).
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
@ -65,7 +66,7 @@ client](@/docs/tools/cli.md) to execute an admin command that removes
|
|||||||
2FA for a given user.
|
2FA for a given user.
|
||||||
|
|
||||||
For this to work, you need to [enable the admin
|
For this to work, you need to [enable the admin
|
||||||
endpoint](@/docs/configure/_index.md#admin-endpoint). Then execute the
|
endpoint](@/docs/configure/admin-endpoint.md). Then execute the
|
||||||
`disable-2fa` admin command and specify the complete account.
|
`disable-2fa` admin command and specify the complete account.
|
||||||
|
|
||||||
```
|
```
|
||||||
|
Reference in New Issue
Block a user