mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-04 12:30:12 +00:00 
			
		
		
		
	Merge pull request #1462 from eikek/postgres-fulltext
Postgres fulltext
This commit is contained in:
		
							
								
								
									
										44
									
								
								build.sbt
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								build.sbt
									
									
									
									
									
								
							@@ -319,19 +319,6 @@ val common = project
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(loggingApi)
 | 
			
		||||
 | 
			
		||||
val config = project
 | 
			
		||||
  .in(file("modules/config"))
 | 
			
		||||
  .disablePlugins(RevolverPlugin)
 | 
			
		||||
  .settings(sharedSettings)
 | 
			
		||||
  .withTestSettings
 | 
			
		||||
  .settings(
 | 
			
		||||
    name := "docspell-config",
 | 
			
		||||
    libraryDependencies ++=
 | 
			
		||||
      Dependencies.fs2 ++
 | 
			
		||||
        Dependencies.pureconfig
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(common, loggingApi)
 | 
			
		||||
 | 
			
		||||
val loggingScribe = project
 | 
			
		||||
  .in(file("modules/logging/scribe"))
 | 
			
		||||
  .disablePlugins(RevolverPlugin)
 | 
			
		||||
@@ -619,6 +606,20 @@ val ftssolr = project
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(common, ftsclient)
 | 
			
		||||
 | 
			
		||||
val ftspsql = project
 | 
			
		||||
  .in(file("modules/fts-psql"))
 | 
			
		||||
  .disablePlugins(RevolverPlugin)
 | 
			
		||||
  .settings(sharedSettings)
 | 
			
		||||
  .withTestSettings
 | 
			
		||||
  .settings(
 | 
			
		||||
    name := "docspell-fts-psql",
 | 
			
		||||
    libraryDependencies ++=
 | 
			
		||||
      Dependencies.doobie ++
 | 
			
		||||
        Dependencies.postgres ++
 | 
			
		||||
        Dependencies.flyway
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(common, ftsclient, store % "compile->test;test->test")
 | 
			
		||||
 | 
			
		||||
val restapi = project
 | 
			
		||||
  .in(file("modules/restapi"))
 | 
			
		||||
  .disablePlugins(RevolverPlugin)
 | 
			
		||||
@@ -715,6 +716,20 @@ val webapp = project
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(query.js)
 | 
			
		||||
 | 
			
		||||
// Config project shared among the two applications only
 | 
			
		||||
val config = project
 | 
			
		||||
  .in(file("modules/config"))
 | 
			
		||||
  .disablePlugins(RevolverPlugin)
 | 
			
		||||
  .settings(sharedSettings)
 | 
			
		||||
  .withTestSettings
 | 
			
		||||
  .settings(
 | 
			
		||||
    name := "docspell-config",
 | 
			
		||||
    libraryDependencies ++=
 | 
			
		||||
      Dependencies.fs2 ++
 | 
			
		||||
        Dependencies.pureconfig
 | 
			
		||||
  )
 | 
			
		||||
  .dependsOn(common, loggingApi, ftspsql, store)
 | 
			
		||||
 | 
			
		||||
// --- Application(s)
 | 
			
		||||
 | 
			
		||||
val joex = project
 | 
			
		||||
@@ -769,6 +784,7 @@ val joex = project
 | 
			
		||||
    joexapi,
 | 
			
		||||
    restapi,
 | 
			
		||||
    ftssolr,
 | 
			
		||||
    ftspsql,
 | 
			
		||||
    pubsubNaive,
 | 
			
		||||
    notificationImpl,
 | 
			
		||||
    schedulerImpl
 | 
			
		||||
@@ -841,6 +857,7 @@ val restserver = project
 | 
			
		||||
    backend,
 | 
			
		||||
    webapp,
 | 
			
		||||
    ftssolr,
 | 
			
		||||
    ftspsql,
 | 
			
		||||
    oidc,
 | 
			
		||||
    pubsubNaive,
 | 
			
		||||
    notificationImpl,
 | 
			
		||||
@@ -926,6 +943,7 @@ val root = project
 | 
			
		||||
    analysis,
 | 
			
		||||
    ftsclient,
 | 
			
		||||
    ftssolr,
 | 
			
		||||
    ftspsql,
 | 
			
		||||
    files,
 | 
			
		||||
    store,
 | 
			
		||||
    joexapi,
 | 
			
		||||
 
 | 
			
		||||
@@ -62,7 +62,14 @@ object CreateIndex {
 | 
			
		||||
        val items = store
 | 
			
		||||
          .transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
 | 
			
		||||
          .map(nn =>
 | 
			
		||||
            TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
 | 
			
		||||
            TextData.item(
 | 
			
		||||
              nn.id,
 | 
			
		||||
              nn.collective,
 | 
			
		||||
              nn.folder,
 | 
			
		||||
              Option(nn.name),
 | 
			
		||||
              nn.notes,
 | 
			
		||||
              nn.language
 | 
			
		||||
            )
 | 
			
		||||
          )
 | 
			
		||||
 | 
			
		||||
        fts.indexData(logger, attachs ++ items)
 | 
			
		||||
 
 | 
			
		||||
@@ -605,7 +605,14 @@ object OItem {
 | 
			
		||||
                .transact(RItem.updateNotes(item, collective, notes))
 | 
			
		||||
            )
 | 
			
		||||
            .flatTap(
 | 
			
		||||
              onSuccessIgnoreError(fts.updateItemNotes(logger, item, collective, notes))
 | 
			
		||||
              onSuccessIgnoreError {
 | 
			
		||||
                store
 | 
			
		||||
                  .transact(RCollective.findLanguage(collective))
 | 
			
		||||
                  .map(_.getOrElse(Language.English))
 | 
			
		||||
                  .flatMap(lang =>
 | 
			
		||||
                    fts.updateItemNotes(logger, item, collective, lang, notes)
 | 
			
		||||
                  )
 | 
			
		||||
              }
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        def setName(item: Ident, name: String, collective: Ident): F[UpdateResult] =
 | 
			
		||||
@@ -615,7 +622,14 @@ object OItem {
 | 
			
		||||
                .transact(RItem.updateName(item, collective, name))
 | 
			
		||||
            )
 | 
			
		||||
            .flatTap(
 | 
			
		||||
              onSuccessIgnoreError(fts.updateItemName(logger, item, collective, name))
 | 
			
		||||
              onSuccessIgnoreError {
 | 
			
		||||
                store
 | 
			
		||||
                  .transact(RCollective.findLanguage(collective))
 | 
			
		||||
                  .map(_.getOrElse(Language.English))
 | 
			
		||||
                  .flatMap(lang =>
 | 
			
		||||
                    fts.updateItemName(logger, item, collective, lang, name)
 | 
			
		||||
                  )
 | 
			
		||||
              }
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        def setNameMultiple(
 | 
			
		||||
@@ -733,10 +747,17 @@ object OItem {
 | 
			
		||||
            )
 | 
			
		||||
            .flatTap(
 | 
			
		||||
              onSuccessIgnoreError(
 | 
			
		||||
                OptionT(store.transact(RAttachment.findItemId(attachId)))
 | 
			
		||||
                  .semiflatMap(itemId =>
 | 
			
		||||
                    fts.updateAttachmentName(logger, itemId, attachId, collective, name)
 | 
			
		||||
                  )
 | 
			
		||||
                OptionT(store.transact(RAttachment.findItemAndLanguage(attachId)))
 | 
			
		||||
                  .semiflatMap { case (itemId, lang) =>
 | 
			
		||||
                    fts.updateAttachmentName(
 | 
			
		||||
                      logger,
 | 
			
		||||
                      itemId,
 | 
			
		||||
                      attachId,
 | 
			
		||||
                      collective,
 | 
			
		||||
                      lang.getOrElse(Language.English),
 | 
			
		||||
                      name
 | 
			
		||||
                    )
 | 
			
		||||
                  }
 | 
			
		||||
                  .fold(())(identity)
 | 
			
		||||
              )
 | 
			
		||||
            )
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@ case class Banner(
 | 
			
		||||
    configFile: Option[String],
 | 
			
		||||
    appId: Ident,
 | 
			
		||||
    baseUrl: LenientUri,
 | 
			
		||||
    ftsUrl: Option[LenientUri],
 | 
			
		||||
    ftsInfo: Option[String],
 | 
			
		||||
    fileStoreConfig: FileStoreConfig
 | 
			
		||||
) {
 | 
			
		||||
 | 
			
		||||
@@ -35,7 +35,7 @@ case class Banner(
 | 
			
		||||
      s"Id:       ${appId.id}",
 | 
			
		||||
      s"Base-Url: ${baseUrl.asString}",
 | 
			
		||||
      s"Database: ${jdbcUrl.asString}",
 | 
			
		||||
      s"Fts:      ${ftsUrl.map(_.asString).getOrElse("-")}",
 | 
			
		||||
      s"Fts:      ${ftsInfo.getOrElse("-")}",
 | 
			
		||||
      s"Config:   ${configFile.getOrElse("")}",
 | 
			
		||||
      s"FileRepo: ${fileStoreConfig}",
 | 
			
		||||
      ""
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										27
									
								
								modules/config/src/main/scala/docspell/config/FtsType.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								modules/config/src/main/scala/docspell/config/FtsType.scala
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.config
 | 
			
		||||
 | 
			
		||||
import cats.data.NonEmptyList
 | 
			
		||||
 | 
			
		||||
sealed trait FtsType {
 | 
			
		||||
  def name: String
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object FtsType {
 | 
			
		||||
  case object Solr extends FtsType { val name = "solr" }
 | 
			
		||||
  case object PostgreSQL extends FtsType { val name = "postgresql" }
 | 
			
		||||
 | 
			
		||||
  val all: NonEmptyList[FtsType] =
 | 
			
		||||
    NonEmptyList.of(Solr, PostgreSQL)
 | 
			
		||||
 | 
			
		||||
  def fromName(str: String): Either[String, FtsType] =
 | 
			
		||||
    all.find(_.name.equalsIgnoreCase(str)).toRight(s"Unknown fts type: $str")
 | 
			
		||||
 | 
			
		||||
  def unsafeFromName(str: String): FtsType =
 | 
			
		||||
    fromName(str).fold(sys.error, identity)
 | 
			
		||||
}
 | 
			
		||||
@@ -10,9 +10,11 @@ import java.nio.file.{Path => JPath}
 | 
			
		||||
 | 
			
		||||
import scala.reflect.ClassTag
 | 
			
		||||
 | 
			
		||||
import cats.syntax.all._
 | 
			
		||||
import fs2.io.file.Path
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftspsql.{PgQueryParser, RankNormalization}
 | 
			
		||||
import docspell.logging.{Level, LogConfig}
 | 
			
		||||
 | 
			
		||||
import com.github.eikek.calev.CalEvent
 | 
			
		||||
@@ -85,11 +87,28 @@ object Implicits {
 | 
			
		||||
  implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
 | 
			
		||||
    ConfigReader[String].emap(reason(FileStoreType.fromString))
 | 
			
		||||
 | 
			
		||||
  def reason[A: ClassTag](
 | 
			
		||||
      f: String => Either[String, A]
 | 
			
		||||
  ): String => Either[FailureReason, A] =
 | 
			
		||||
  implicit val pgQueryParserReader: ConfigReader[PgQueryParser] =
 | 
			
		||||
    ConfigReader[String].emap(reason(PgQueryParser.fromName))
 | 
			
		||||
 | 
			
		||||
  implicit val pgRankNormalizationReader: ConfigReader[RankNormalization] =
 | 
			
		||||
    ConfigReader[List[Int]].emap(
 | 
			
		||||
      reason(ints => ints.traverse(RankNormalization.byNumber).map(_.reduce(_ && _)))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  implicit val languageReader: ConfigReader[Language] =
 | 
			
		||||
    ConfigReader[String].emap(reason(Language.fromString))
 | 
			
		||||
 | 
			
		||||
  implicit def languageMapReader[B: ConfigReader]: ConfigReader[Map[Language, B]] =
 | 
			
		||||
    pureconfig.configurable.genericMapReader[Language, B](reason(Language.fromString))
 | 
			
		||||
 | 
			
		||||
  implicit val ftsTypeReader: ConfigReader[FtsType] =
 | 
			
		||||
    ConfigReader[String].emap(reason(FtsType.fromName))
 | 
			
		||||
 | 
			
		||||
  def reason[T, A: ClassTag](
 | 
			
		||||
      f: T => Either[String, A]
 | 
			
		||||
  ): T => Either[FailureReason, A] =
 | 
			
		||||
    in =>
 | 
			
		||||
      f(in).left.map(str =>
 | 
			
		||||
        CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str)
 | 
			
		||||
        CannotConvert(in.toString, implicitly[ClassTag[A]].runtimeClass.toString, str)
 | 
			
		||||
      )
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,37 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.config
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftspsql._
 | 
			
		||||
import docspell.store.JdbcConfig
 | 
			
		||||
 | 
			
		||||
case class PgFtsConfig(
 | 
			
		||||
    useDefaultConnection: Boolean,
 | 
			
		||||
    jdbc: JdbcConfig,
 | 
			
		||||
    pgQueryParser: PgQueryParser,
 | 
			
		||||
    pgRankNormalization: RankNormalization,
 | 
			
		||||
    pgConfig: Map[Language, String]
 | 
			
		||||
) {
 | 
			
		||||
 | 
			
		||||
  def toPsqlConfig(stdConn: JdbcConfig): PsqlConfig = {
 | 
			
		||||
    val db =
 | 
			
		||||
      if (useDefaultConnection) stdConn
 | 
			
		||||
      else jdbc
 | 
			
		||||
 | 
			
		||||
    PsqlConfig(
 | 
			
		||||
      db.url,
 | 
			
		||||
      db.user,
 | 
			
		||||
      Password(db.password),
 | 
			
		||||
      pgConfig,
 | 
			
		||||
      pgQueryParser,
 | 
			
		||||
      pgRankNormalization
 | 
			
		||||
    )
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object PgFtsConfig {}
 | 
			
		||||
@@ -68,19 +68,24 @@ trait FtsClient[F[_]] {
 | 
			
		||||
      logger: Logger[F],
 | 
			
		||||
      itemId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      language: Language,
 | 
			
		||||
      name: String
 | 
			
		||||
  ): F[Unit] =
 | 
			
		||||
    updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None))
 | 
			
		||||
    updateIndex(
 | 
			
		||||
      logger,
 | 
			
		||||
      TextData.item(itemId, collective, None, Some(name), None, language)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def updateItemNotes(
 | 
			
		||||
      logger: Logger[F],
 | 
			
		||||
      itemId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      language: Language,
 | 
			
		||||
      notes: Option[String]
 | 
			
		||||
  ): F[Unit] =
 | 
			
		||||
    updateIndex(
 | 
			
		||||
      logger,
 | 
			
		||||
      TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")))
 | 
			
		||||
      TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")), language)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def updateAttachmentName(
 | 
			
		||||
@@ -88,6 +93,7 @@ trait FtsClient[F[_]] {
 | 
			
		||||
      itemId: Ident,
 | 
			
		||||
      attachId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      language: Language,
 | 
			
		||||
      name: Option[String]
 | 
			
		||||
  ): F[Unit] =
 | 
			
		||||
    updateIndex(
 | 
			
		||||
@@ -97,7 +103,7 @@ trait FtsClient[F[_]] {
 | 
			
		||||
        attachId,
 | 
			
		||||
        collective,
 | 
			
		||||
        None,
 | 
			
		||||
        Language.English,
 | 
			
		||||
        language,
 | 
			
		||||
        Some(name.getOrElse("")),
 | 
			
		||||
        None
 | 
			
		||||
      )
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,8 @@ sealed trait TextData {
 | 
			
		||||
 | 
			
		||||
  def folder: Option[Ident]
 | 
			
		||||
 | 
			
		||||
  def language: Language
 | 
			
		||||
 | 
			
		||||
  final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
 | 
			
		||||
    this match {
 | 
			
		||||
      case a: TextData.Attachment => f(a)
 | 
			
		||||
@@ -32,7 +34,7 @@ object TextData {
 | 
			
		||||
      attachId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident],
 | 
			
		||||
      lang: Language,
 | 
			
		||||
      language: Language,
 | 
			
		||||
      name: Option[String],
 | 
			
		||||
      text: Option[String]
 | 
			
		||||
  ) extends TextData {
 | 
			
		||||
@@ -57,7 +59,8 @@ object TextData {
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident],
 | 
			
		||||
      name: Option[String],
 | 
			
		||||
      notes: Option[String]
 | 
			
		||||
      notes: Option[String],
 | 
			
		||||
      language: Language
 | 
			
		||||
  ) extends TextData {
 | 
			
		||||
 | 
			
		||||
    val id = Ident.unsafe("item") / item
 | 
			
		||||
@@ -69,8 +72,9 @@ object TextData {
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident],
 | 
			
		||||
      name: Option[String],
 | 
			
		||||
      notes: Option[String]
 | 
			
		||||
      notes: Option[String],
 | 
			
		||||
      lang: Language
 | 
			
		||||
  ): TextData =
 | 
			
		||||
    Item(item, collective, folder, name, notes)
 | 
			
		||||
    Item(item, collective, folder, name, notes, lang)
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,27 @@
 | 
			
		||||
create table "ftspsql_search"(
 | 
			
		||||
  "id" varchar(254) not null primary key,
 | 
			
		||||
  "item_id" varchar(254) not null,
 | 
			
		||||
  "collective" varchar(254) not null,
 | 
			
		||||
  "lang" varchar(254) not null,
 | 
			
		||||
  "attach_id" varchar(254),
 | 
			
		||||
  "folder_id" varchar(254),
 | 
			
		||||
  "updated_at" timestamptz not null default current_timestamp,
 | 
			
		||||
  --- content columns
 | 
			
		||||
  "attach_name" text,
 | 
			
		||||
  "attach_content" text,
 | 
			
		||||
  "item_name" text,
 | 
			
		||||
  "item_notes" text,
 | 
			
		||||
  --- index column
 | 
			
		||||
  "fts_config" regconfig not null,
 | 
			
		||||
  "text_index" tsvector
 | 
			
		||||
    generated always as (
 | 
			
		||||
     setweight(to_tsvector("fts_config", coalesce("attach_name", '')), 'B') ||
 | 
			
		||||
     setweight(to_tsvector("fts_config", coalesce("item_name", '')), 'B') ||
 | 
			
		||||
     setweight(to_tsvector("fts_config", coalesce("attach_content", '')), 'C') ||
 | 
			
		||||
     setweight(to_tsvector("fts_config", coalesce("item_notes", '')), 'C')) stored
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
create index "ftspsql_search_ftsidx" on "ftspsql_search" using GIN ("text_index");
 | 
			
		||||
create index "ftpsql_search_item_idx" on "ftspsql_search"("item_id");
 | 
			
		||||
create index "ftpsql_search_attach_idx" on "ftspsql_search"("attach_id");
 | 
			
		||||
create index "ftpsql_search_folder_idx" on "ftspsql_search"("folder_id");
 | 
			
		||||
@@ -0,0 +1,43 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
 | 
			
		||||
import org.flywaydb.core.Flyway
 | 
			
		||||
import org.flywaydb.core.api.output.MigrateResult
 | 
			
		||||
 | 
			
		||||
final class DbMigration[F[_]: Sync](cfg: PsqlConfig) {
 | 
			
		||||
  private[this] val logger = docspell.logging.getLogger[F]
 | 
			
		||||
  private val location: String = "classpath:db/psqlfts"
 | 
			
		||||
 | 
			
		||||
  def run: F[MigrateResult] =
 | 
			
		||||
    for {
 | 
			
		||||
      fw <- createFlyway
 | 
			
		||||
      _ <- logger.info(s"Running FTS migrations")
 | 
			
		||||
      result <- Sync[F].blocking(fw.migrate())
 | 
			
		||||
    } yield result
 | 
			
		||||
 | 
			
		||||
  def createFlyway: F[Flyway] =
 | 
			
		||||
    for {
 | 
			
		||||
      _ <- logger.info(s"Creating Flyway for: $location")
 | 
			
		||||
      fw = Flyway
 | 
			
		||||
        .configure()
 | 
			
		||||
        .table("flyway_fts_history")
 | 
			
		||||
        .cleanDisabled(true)
 | 
			
		||||
        .dataSource(cfg.url.asString, cfg.user, cfg.password.pass)
 | 
			
		||||
        .locations(location)
 | 
			
		||||
        .baselineOnMigrate(true)
 | 
			
		||||
        .load()
 | 
			
		||||
    } yield fw
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object DbMigration {
 | 
			
		||||
  def apply[F[_]: Sync](cfg: PsqlConfig): DbMigration[F] =
 | 
			
		||||
    new DbMigration[F](cfg)
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,33 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.util.log.Success
 | 
			
		||||
 | 
			
		||||
trait DoobieMeta {
 | 
			
		||||
 | 
			
		||||
  implicit val sqlLogging: LogHandler = LogHandler {
 | 
			
		||||
    case e @ Success(_, _, _, _) =>
 | 
			
		||||
      DoobieMeta.logger.debug("SQL " + e)
 | 
			
		||||
    case e =>
 | 
			
		||||
      DoobieMeta.logger.error(s"SQL Failure: $e")
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  implicit val metaIdent: Meta[Ident] =
 | 
			
		||||
    Meta[String].timap(Ident.unsafe)(_.id)
 | 
			
		||||
 | 
			
		||||
  implicit val metaLanguage: Meta[Language] =
 | 
			
		||||
    Meta[String].timap(Language.unsafe)(_.iso3)
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object DoobieMeta {
 | 
			
		||||
  private val logger = org.log4s.getLogger
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,65 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.syntax.all._
 | 
			
		||||
 | 
			
		||||
import docspell.common.{Ident, Language}
 | 
			
		||||
import docspell.ftsclient.TextData
 | 
			
		||||
 | 
			
		||||
final case class FtsRecord(
 | 
			
		||||
    id: Ident,
 | 
			
		||||
    itemId: Ident,
 | 
			
		||||
    collective: Ident,
 | 
			
		||||
    language: Language,
 | 
			
		||||
    attachId: Option[Ident],
 | 
			
		||||
    folderId: Option[Ident],
 | 
			
		||||
    attachName: Option[String],
 | 
			
		||||
    attachContent: Option[String],
 | 
			
		||||
    itemName: Option[String],
 | 
			
		||||
    itemNotes: Option[String]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
object FtsRecord {
 | 
			
		||||
  def fromTextData(td: TextData): FtsRecord =
 | 
			
		||||
    td match {
 | 
			
		||||
      case TextData.Attachment(
 | 
			
		||||
            item,
 | 
			
		||||
            attachId,
 | 
			
		||||
            collective,
 | 
			
		||||
            folder,
 | 
			
		||||
            language,
 | 
			
		||||
            name,
 | 
			
		||||
            text
 | 
			
		||||
          ) =>
 | 
			
		||||
        FtsRecord(
 | 
			
		||||
          td.id,
 | 
			
		||||
          item,
 | 
			
		||||
          collective,
 | 
			
		||||
          language,
 | 
			
		||||
          attachId.some,
 | 
			
		||||
          folder,
 | 
			
		||||
          name,
 | 
			
		||||
          text,
 | 
			
		||||
          None,
 | 
			
		||||
          None
 | 
			
		||||
        )
 | 
			
		||||
      case TextData.Item(item, collective, folder, name, notes, language) =>
 | 
			
		||||
        FtsRecord(
 | 
			
		||||
          td.id,
 | 
			
		||||
          item,
 | 
			
		||||
          collective,
 | 
			
		||||
          language,
 | 
			
		||||
          None,
 | 
			
		||||
          folder,
 | 
			
		||||
          None,
 | 
			
		||||
          None,
 | 
			
		||||
          name,
 | 
			
		||||
          notes
 | 
			
		||||
        )
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,197 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.data.NonEmptyList
 | 
			
		||||
import fs2.Chunk
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftsclient.FtsQuery
 | 
			
		||||
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
 | 
			
		||||
object FtsRepository extends DoobieMeta {
 | 
			
		||||
  val table = fr"ftspsql_search"
 | 
			
		||||
 | 
			
		||||
  def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
 | 
			
		||||
      q: FtsQuery
 | 
			
		||||
  ): ConnectionIO[SearchSummary] = {
 | 
			
		||||
    val selectRank = mkSelectRank(rn)
 | 
			
		||||
    val query = mkQueryPart(pq, q)
 | 
			
		||||
 | 
			
		||||
    sql"""select count(id), coalesce(max($selectRank), 0)
 | 
			
		||||
         |from $table, $query
 | 
			
		||||
         |where ${mkCondition(q)} AND query @@ text_index 
 | 
			
		||||
         |""".stripMargin
 | 
			
		||||
      .query[SearchSummary]
 | 
			
		||||
      .unique
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def search(pq: PgQueryParser, rn: RankNormalization)(
 | 
			
		||||
      q: FtsQuery,
 | 
			
		||||
      withHighlighting: Boolean
 | 
			
		||||
  ): ConnectionIO[Vector[SearchResult]] = {
 | 
			
		||||
    val selectRank = mkSelectRank(rn)
 | 
			
		||||
 | 
			
		||||
    val hlOption =
 | 
			
		||||
      s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
 | 
			
		||||
 | 
			
		||||
    val selectHl =
 | 
			
		||||
      if (!withHighlighting) fr"null as highlight"
 | 
			
		||||
      else
 | 
			
		||||
        fr"""ts_headline(
 | 
			
		||||
            |    fts_config,
 | 
			
		||||
            |    coalesce(attach_name, '') ||
 | 
			
		||||
            |    ' ' || coalesce(attach_content, '') ||
 | 
			
		||||
            |    ' ' || coalesce(item_name, '') ||
 | 
			
		||||
            |    ' ' || coalesce(item_notes, ''), query, $hlOption) as highlight""".stripMargin
 | 
			
		||||
 | 
			
		||||
    val select =
 | 
			
		||||
      fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
 | 
			
		||||
 | 
			
		||||
    val query = mkQueryPart(pq, q)
 | 
			
		||||
 | 
			
		||||
    sql"""select $select 
 | 
			
		||||
         |from $table, $query
 | 
			
		||||
         |where ${mkCondition(q)} AND query @@ text_index 
 | 
			
		||||
         |order by rank desc
 | 
			
		||||
         |limit ${q.limit}
 | 
			
		||||
         |offset ${q.offset}
 | 
			
		||||
         |""".stripMargin
 | 
			
		||||
      .query[SearchResult]
 | 
			
		||||
      .to[Vector]
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private def mkCondition(q: FtsQuery): Fragment = {
 | 
			
		||||
    val coll = fr"collective = ${q.collective}"
 | 
			
		||||
    val items =
 | 
			
		||||
      NonEmptyList.fromList(q.items.toList).map { nel =>
 | 
			
		||||
        val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
 | 
			
		||||
        fr"item_id in ($ids)"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    val folders =
 | 
			
		||||
      NonEmptyList.fromList(q.folders.toList).map { nel =>
 | 
			
		||||
        val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
 | 
			
		||||
        fr"folder_id in ($ids)"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
 | 
			
		||||
    val fname = Fragment.const(p.name)
 | 
			
		||||
    fr"$fname(fts_config, ${q.q}) query"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private def mkSelectRank(rn: RankNormalization): Fragment = {
 | 
			
		||||
    val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
 | 
			
		||||
    fr"ts_rank_cd(text_index, query, $bits)"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
 | 
			
		||||
    r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
 | 
			
		||||
 | 
			
		||||
  def replace(
 | 
			
		||||
      pgConfig: Language => String
 | 
			
		||||
  )(r: FtsRecord): ConnectionIO[Int] =
 | 
			
		||||
    (fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
 | 
			
		||||
      commas(
 | 
			
		||||
        sql"${r.id}",
 | 
			
		||||
        sql"${r.itemId}",
 | 
			
		||||
        sql"${r.collective}",
 | 
			
		||||
        sql"${r.language}",
 | 
			
		||||
        sql"${r.attachId}",
 | 
			
		||||
        sql"${r.folderId}",
 | 
			
		||||
        sql"${r.attachName}",
 | 
			
		||||
        sql"${r.attachContent}",
 | 
			
		||||
        sql"${r.itemName}",
 | 
			
		||||
        sql"${r.itemNotes}",
 | 
			
		||||
        sql"${pgConfig(r.language)}::regconfig"
 | 
			
		||||
      ) ++ sql") on conflict (id) do update set " ++ commas(
 | 
			
		||||
        sql"lang = ${r.language}",
 | 
			
		||||
        sql"folder_id = ${r.folderId}",
 | 
			
		||||
        sql"attach_name = ${r.attachName}",
 | 
			
		||||
        sql"attach_content = ${r.attachContent}",
 | 
			
		||||
        sql"item_name = ${r.itemName}",
 | 
			
		||||
        sql"item_notes = ${r.itemNotes}",
 | 
			
		||||
        sql"fts_config = ${pgConfig(r.language)}::regconfig"
 | 
			
		||||
      )).update.run
 | 
			
		||||
 | 
			
		||||
  def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
 | 
			
		||||
    (fr"UPDATE $table SET" ++ commas(
 | 
			
		||||
      sql"lang = ${r.language}",
 | 
			
		||||
      sql"folder_id = ${r.folderId}",
 | 
			
		||||
      sql"attach_name = ${r.attachName}",
 | 
			
		||||
      sql"attach_content = ${r.attachContent}",
 | 
			
		||||
      sql"item_name = ${r.itemName}",
 | 
			
		||||
      sql"item_notes = ${r.itemNotes}",
 | 
			
		||||
      sql"fts_config = ${pgConfig(r.language)}::regconfig"
 | 
			
		||||
    ) ++ fr"WHERE id = ${r.id}").update.run
 | 
			
		||||
 | 
			
		||||
  def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
 | 
			
		||||
    r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
 | 
			
		||||
 | 
			
		||||
  def updateFolder(
 | 
			
		||||
      itemId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident]
 | 
			
		||||
  ): ConnectionIO[Int] =
 | 
			
		||||
    (sql"UPDATE $table" ++
 | 
			
		||||
      fr"SET folder_id = $folder" ++
 | 
			
		||||
      fr"WHERE item_id = $itemId AND collective = $collective").update.run
 | 
			
		||||
 | 
			
		||||
  def deleteByItemId(itemId: Ident): ConnectionIO[Int] =
 | 
			
		||||
    sql"DELETE FROM $table WHERE item_id = $itemId".update.run
 | 
			
		||||
 | 
			
		||||
  def deleteByAttachId(attachId: Ident): ConnectionIO[Int] =
 | 
			
		||||
    sql"DELETE FROM $table WHERE attach_id = $attachId".update.run
 | 
			
		||||
 | 
			
		||||
  def deleteAll: ConnectionIO[Int] =
 | 
			
		||||
    sql"DELETE FROM $table".update.run
 | 
			
		||||
 | 
			
		||||
  def delete(collective: Ident): ConnectionIO[Int] =
 | 
			
		||||
    sql"DELETE FROM $table WHERE collective = $collective".update.run
 | 
			
		||||
 | 
			
		||||
  def resetAll: ConnectionIO[Int] = {
 | 
			
		||||
    val dropFlyway = sql"DROP TABLE IF EXISTS flyway_fts_history".update.run
 | 
			
		||||
    val dropSearch = sql"DROP TABLE IF EXISTS $table".update.run
 | 
			
		||||
    for {
 | 
			
		||||
      a <- dropFlyway
 | 
			
		||||
      b <- dropSearch
 | 
			
		||||
    } yield a + b
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private def commas(fr: Fragment, frn: Fragment*): Fragment =
 | 
			
		||||
    frn.foldLeft(fr)(_ ++ fr"," ++ _)
 | 
			
		||||
 | 
			
		||||
  def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
 | 
			
		||||
    select.applyOrElse(language, defaultPgConfig)
 | 
			
		||||
 | 
			
		||||
  def defaultPgConfig(language: Language): String =
 | 
			
		||||
    language match {
 | 
			
		||||
      case Language.English    => "english"
 | 
			
		||||
      case Language.German     => "german"
 | 
			
		||||
      case Language.French     => "french"
 | 
			
		||||
      case Language.Italian    => "italian"
 | 
			
		||||
      case Language.Spanish    => "spanish"
 | 
			
		||||
      case Language.Hungarian  => "hungarian"
 | 
			
		||||
      case Language.Portuguese => "portuguese"
 | 
			
		||||
      case Language.Danish     => "danish"
 | 
			
		||||
      case Language.Finnish    => "finnish"
 | 
			
		||||
      case Language.Norwegian  => "norwegian"
 | 
			
		||||
      case Language.Swedish    => "swedish"
 | 
			
		||||
      case Language.Russian    => "russian"
 | 
			
		||||
      case Language.Romanian   => "romanian"
 | 
			
		||||
      case Language.Dutch      => "dutch"
 | 
			
		||||
      case Language.Czech      => "simple"
 | 
			
		||||
      case Language.Latvian    => "simple"
 | 
			
		||||
      case Language.Japanese   => "simple"
 | 
			
		||||
      case Language.Hebrew     => "simple"
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,38 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.data.NonEmptyList
 | 
			
		||||
 | 
			
		||||
sealed trait PgQueryParser {
 | 
			
		||||
  def name: String
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object PgQueryParser {
 | 
			
		||||
 | 
			
		||||
  case object ToTsQuery extends PgQueryParser {
 | 
			
		||||
    val name = "to_tsquery"
 | 
			
		||||
  }
 | 
			
		||||
  case object Plain extends PgQueryParser {
 | 
			
		||||
    val name = "plainto_tsquery"
 | 
			
		||||
  }
 | 
			
		||||
  case object Phrase extends PgQueryParser {
 | 
			
		||||
    val name = "phraseto_tsquery"
 | 
			
		||||
  }
 | 
			
		||||
  case object Websearch extends PgQueryParser {
 | 
			
		||||
    val name = "websearch_to_tsquery"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val all: NonEmptyList[PgQueryParser] =
 | 
			
		||||
    NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
 | 
			
		||||
 | 
			
		||||
  def fromName(name: String): Either[String, PgQueryParser] =
 | 
			
		||||
    all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
 | 
			
		||||
 | 
			
		||||
  def unsafeFromName(name: String): PgQueryParser =
 | 
			
		||||
    fromName(name).fold(sys.error, identity)
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,31 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
 | 
			
		||||
final case class PsqlConfig(
 | 
			
		||||
    url: LenientUri,
 | 
			
		||||
    user: String,
 | 
			
		||||
    password: Password,
 | 
			
		||||
    pgConfigSelect: PartialFunction[Language, String],
 | 
			
		||||
    pgQueryParser: PgQueryParser,
 | 
			
		||||
    rankNormalization: RankNormalization
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
object PsqlConfig {
 | 
			
		||||
 | 
			
		||||
  def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
 | 
			
		||||
    PsqlConfig(
 | 
			
		||||
      url,
 | 
			
		||||
      user,
 | 
			
		||||
      password,
 | 
			
		||||
      PartialFunction.empty,
 | 
			
		||||
      PgQueryParser.Websearch,
 | 
			
		||||
      RankNormalization.Mhd && RankNormalization.Scale
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,155 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import scala.concurrent.ExecutionContext
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.implicits._
 | 
			
		||||
import fs2.Stream
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftsclient._
 | 
			
		||||
import docspell.logging.Logger
 | 
			
		||||
 | 
			
		||||
import com.zaxxer.hikari.HikariDataSource
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.hikari.HikariTransactor
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
 | 
			
		||||
final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
 | 
			
		||||
    extends FtsClient[F] {
 | 
			
		||||
  val engine = Ident.unsafe("postgres")
 | 
			
		||||
 | 
			
		||||
  val config = cfg
 | 
			
		||||
  private[ftspsql] val transactor = xa
 | 
			
		||||
 | 
			
		||||
  private[this] val searchSummary =
 | 
			
		||||
    FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
 | 
			
		||||
  private[this] val search =
 | 
			
		||||
    FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
 | 
			
		||||
 | 
			
		||||
  private[this] val replaceChunk =
 | 
			
		||||
    FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
 | 
			
		||||
  private[this] val updateChunk =
 | 
			
		||||
    FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
 | 
			
		||||
 | 
			
		||||
  def initialize: F[List[FtsMigration[F]]] =
 | 
			
		||||
    Sync[F].pure(
 | 
			
		||||
      List(
 | 
			
		||||
        FtsMigration(
 | 
			
		||||
          0,
 | 
			
		||||
          engine,
 | 
			
		||||
          "initialize",
 | 
			
		||||
          DbMigration[F](cfg).run.as(FtsMigration.Result.WorkDone)
 | 
			
		||||
        )
 | 
			
		||||
      )
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def initializeNew: List[FtsMigration[F]] =
 | 
			
		||||
    List(
 | 
			
		||||
      FtsMigration(
 | 
			
		||||
        10,
 | 
			
		||||
        engine,
 | 
			
		||||
        "reset",
 | 
			
		||||
        FtsRepository.resetAll.transact(xa).as(FtsMigration.Result.workDone)
 | 
			
		||||
      ),
 | 
			
		||||
      FtsMigration(
 | 
			
		||||
        20,
 | 
			
		||||
        engine,
 | 
			
		||||
        "schema",
 | 
			
		||||
        DbMigration[F](cfg).run.as(FtsMigration.Result.workDone)
 | 
			
		||||
      ),
 | 
			
		||||
      FtsMigration(20, engine, "index all", FtsMigration.Result.indexAll.pure[F])
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def search(q: FtsQuery): F[FtsResult] =
 | 
			
		||||
    for {
 | 
			
		||||
      startNanos <- Sync[F].delay(System.nanoTime())
 | 
			
		||||
      summary <- searchSummary(q).transact(xa)
 | 
			
		||||
      results <- search(q, true).transact(xa)
 | 
			
		||||
      endNanos <- Sync[F].delay(System.nanoTime())
 | 
			
		||||
      duration = Duration.nanos(endNanos - startNanos)
 | 
			
		||||
      res = SearchResult
 | 
			
		||||
        .toFtsResult(summary, results)
 | 
			
		||||
        .copy(qtime = duration)
 | 
			
		||||
    } yield res
 | 
			
		||||
 | 
			
		||||
  def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
 | 
			
		||||
    data
 | 
			
		||||
      .map(FtsRecord.fromTextData)
 | 
			
		||||
      .chunkN(50)
 | 
			
		||||
      .evalMap(chunk =>
 | 
			
		||||
        logger.debug(s"Add to fts index ${chunk.size} records") *>
 | 
			
		||||
          replaceChunk(chunk).transact(xa)
 | 
			
		||||
      )
 | 
			
		||||
      .compile
 | 
			
		||||
      .drain
 | 
			
		||||
 | 
			
		||||
  def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
 | 
			
		||||
    data
 | 
			
		||||
      .map(FtsRecord.fromTextData)
 | 
			
		||||
      .chunkN(50)
 | 
			
		||||
      .evalMap(chunk =>
 | 
			
		||||
        logger.debug(s"Update fts index with ${chunk.size} records") *>
 | 
			
		||||
          updateChunk(chunk).transact(xa)
 | 
			
		||||
      )
 | 
			
		||||
      .compile
 | 
			
		||||
      .drain
 | 
			
		||||
 | 
			
		||||
  def updateFolder(
 | 
			
		||||
      logger: Logger[F],
 | 
			
		||||
      itemId: Ident,
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident]
 | 
			
		||||
  ): F[Unit] =
 | 
			
		||||
    logger.debug(s"Update folder '${folder
 | 
			
		||||
        .map(_.id)}' in fts for collective ${collective.id} and item ${itemId.id}") *>
 | 
			
		||||
      FtsRepository.updateFolder(itemId, collective, folder).transact(xa).void
 | 
			
		||||
 | 
			
		||||
  def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
 | 
			
		||||
    logger.debug(s"Removing item from fts index: ${itemId.id}") *>
 | 
			
		||||
      FtsRepository.deleteByItemId(itemId).transact(xa).void
 | 
			
		||||
 | 
			
		||||
  def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
 | 
			
		||||
    logger.debug(s"Removing attachment from fts index: ${attachId.id}") *>
 | 
			
		||||
      FtsRepository.deleteByAttachId(attachId).transact(xa).void
 | 
			
		||||
 | 
			
		||||
  def clearAll(logger: Logger[F]): F[Unit] =
 | 
			
		||||
    logger.info(s"Deleting complete FTS index") *>
 | 
			
		||||
      FtsRepository.deleteAll.transact(xa).void
 | 
			
		||||
 | 
			
		||||
  def clear(logger: Logger[F], collective: Ident): F[Unit] =
 | 
			
		||||
    logger.info(s"Deleting index for collective ${collective.id}") *>
 | 
			
		||||
      FtsRepository.delete(collective).transact(xa).void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object PsqlFtsClient {
 | 
			
		||||
  def apply[F[_]: Async](
 | 
			
		||||
      cfg: PsqlConfig,
 | 
			
		||||
      connectEC: ExecutionContext
 | 
			
		||||
  ): Resource[F, PsqlFtsClient[F]] = {
 | 
			
		||||
    val acquire = Sync[F].delay(new HikariDataSource())
 | 
			
		||||
    val free: HikariDataSource => F[Unit] = ds => Sync[F].delay(ds.close())
 | 
			
		||||
 | 
			
		||||
    for {
 | 
			
		||||
      ds <- Resource.make(acquire)(free)
 | 
			
		||||
      _ = Resource.pure {
 | 
			
		||||
        ds.setJdbcUrl(cfg.url.asString)
 | 
			
		||||
        ds.setUsername(cfg.user)
 | 
			
		||||
        ds.setPassword(cfg.password.pass)
 | 
			
		||||
        ds.setDriverClassName("org.postgresql.Driver")
 | 
			
		||||
      }
 | 
			
		||||
      xa = HikariTransactor[F](ds, connectEC)
 | 
			
		||||
 | 
			
		||||
      pc = new PsqlFtsClient[F](cfg, xa)
 | 
			
		||||
    } yield pc
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
 | 
			
		||||
    new PsqlFtsClient[F](cfg, xa)
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,46 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.Order
 | 
			
		||||
import cats.data.NonEmptySet
 | 
			
		||||
 | 
			
		||||
sealed trait RankNormalization { self =>
 | 
			
		||||
  def value: NonEmptySet[Int]
 | 
			
		||||
 | 
			
		||||
  def &&(other: RankNormalization): RankNormalization =
 | 
			
		||||
    new RankNormalization { val value = self.value ++ other.value }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object RankNormalization {
 | 
			
		||||
// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
			
		||||
 | 
			
		||||
  case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
 | 
			
		||||
  case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
 | 
			
		||||
  case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
 | 
			
		||||
  case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
 | 
			
		||||
  case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
 | 
			
		||||
  case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
 | 
			
		||||
  case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
 | 
			
		||||
 | 
			
		||||
  def byNumber(n: Int): Either[String, RankNormalization] =
 | 
			
		||||
    all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
 | 
			
		||||
 | 
			
		||||
  implicit val order: Order[RankNormalization] =
 | 
			
		||||
    Order.by(_.value.reduce)
 | 
			
		||||
 | 
			
		||||
  val all: NonEmptySet[RankNormalization] =
 | 
			
		||||
    NonEmptySet.of(
 | 
			
		||||
      IgnoreDocLength,
 | 
			
		||||
      LogDocLength,
 | 
			
		||||
      DocLength,
 | 
			
		||||
      Mhd,
 | 
			
		||||
      UniqueWords,
 | 
			
		||||
      LogUniqueWords,
 | 
			
		||||
      Scale
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,53 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftsclient.FtsResult
 | 
			
		||||
import docspell.ftsclient.FtsResult.{ItemMatch, MatchData}
 | 
			
		||||
 | 
			
		||||
final case class SearchResult(
 | 
			
		||||
    id: Ident,
 | 
			
		||||
    itemId: Ident,
 | 
			
		||||
    collective: Ident,
 | 
			
		||||
    language: Language,
 | 
			
		||||
    attachId: Option[Ident],
 | 
			
		||||
    folderId: Option[Ident],
 | 
			
		||||
    attachName: Option[String],
 | 
			
		||||
    itemName: Option[String],
 | 
			
		||||
    rank: Double,
 | 
			
		||||
    highlight: Option[String]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
object SearchResult {
 | 
			
		||||
 | 
			
		||||
  def toFtsResult(summary: SearchSummary, results: Vector[SearchResult]): FtsResult = {
 | 
			
		||||
    def mkEntry(r: SearchResult): (ItemMatch, (Ident, List[String])) = {
 | 
			
		||||
      def create(md: MatchData) = ItemMatch(r.id, r.itemId, r.collective, r.rank, md)
 | 
			
		||||
 | 
			
		||||
      val itemMatch =
 | 
			
		||||
        r.attachId match {
 | 
			
		||||
          case Some(aId) =>
 | 
			
		||||
            create(FtsResult.AttachmentData(aId, r.attachName.getOrElse("")))
 | 
			
		||||
          case None =>
 | 
			
		||||
            create(FtsResult.ItemData)
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
      (itemMatch, r.id -> r.highlight.toList)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    val (items, hl) = results.map(mkEntry).unzip
 | 
			
		||||
 | 
			
		||||
    FtsResult(
 | 
			
		||||
      Duration.zero,
 | 
			
		||||
      summary.count.toInt,
 | 
			
		||||
      summary.maxScore,
 | 
			
		||||
      hl.toMap,
 | 
			
		||||
      items.toList
 | 
			
		||||
    )
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,9 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
case class SearchSummary(count: Long, maxScore: Double)
 | 
			
		||||
@@ -0,0 +1,52 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.logging.TestLoggingConfig
 | 
			
		||||
import docspell.logging.{Level, LogConfig}
 | 
			
		||||
 | 
			
		||||
import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
			
		||||
import com.dimafeng.testcontainers.munit.TestContainerForAll
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
import munit.CatsEffectSuite
 | 
			
		||||
import org.testcontainers.utility.DockerImageName
 | 
			
		||||
 | 
			
		||||
class MigrationTest
 | 
			
		||||
    extends CatsEffectSuite
 | 
			
		||||
    with PgFixtures
 | 
			
		||||
    with TestContainerForAll
 | 
			
		||||
    with TestLoggingConfig {
 | 
			
		||||
  override val containerDef: PostgreSQLContainer.Def =
 | 
			
		||||
    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
			
		||||
 | 
			
		||||
  override def docspellLogConfig: LogConfig =
 | 
			
		||||
    LogConfig(Level.Debug, LogConfig.Format.Fancy)
 | 
			
		||||
 | 
			
		||||
  override def rootMinimumLevel = Level.Warn
 | 
			
		||||
 | 
			
		||||
  test("create schema") {
 | 
			
		||||
    withContainers { cnt =>
 | 
			
		||||
      val jdbc =
 | 
			
		||||
        PsqlConfig.defaults(
 | 
			
		||||
          LenientUri.unsafe(cnt.jdbcUrl),
 | 
			
		||||
          cnt.username,
 | 
			
		||||
          Password(cnt.password)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
      for {
 | 
			
		||||
        _ <- DbMigration[IO](jdbc).run
 | 
			
		||||
        n <- runQuery(cnt)(
 | 
			
		||||
          sql"SELECT count(*) FROM ${FtsRepository.table}".query[Int].unique
 | 
			
		||||
        )
 | 
			
		||||
        _ = assertEquals(n, 0)
 | 
			
		||||
      } yield ()
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,77 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import javax.sql.DataSource
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.syntax.all._
 | 
			
		||||
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.ftsclient.TextData
 | 
			
		||||
import docspell.store.{JdbcConfig, StoreFixture}
 | 
			
		||||
 | 
			
		||||
import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
			
		||||
import doobie._
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
 | 
			
		||||
trait PgFixtures {
 | 
			
		||||
  def ident(n: String): Ident = Ident.unsafe(n)
 | 
			
		||||
 | 
			
		||||
  def psqlConfig(cnt: PostgreSQLContainer): PsqlConfig =
 | 
			
		||||
    PsqlConfig.defaults(
 | 
			
		||||
      LenientUri.unsafe(cnt.jdbcUrl),
 | 
			
		||||
      cnt.username,
 | 
			
		||||
      Password(cnt.password)
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  def jdbcConfig(cnt: PostgreSQLContainer): JdbcConfig =
 | 
			
		||||
    JdbcConfig(LenientUri.unsafe(cnt.jdbcUrl), cnt.username, cnt.password)
 | 
			
		||||
 | 
			
		||||
  def dataSource(cnt: PostgreSQLContainer): Resource[IO, DataSource] =
 | 
			
		||||
    StoreFixture.dataSource(jdbcConfig(cnt))
 | 
			
		||||
 | 
			
		||||
  def transactor(cnt: PostgreSQLContainer): Resource[IO, Transactor[IO]] =
 | 
			
		||||
    dataSource(cnt).flatMap(StoreFixture.makeXA)
 | 
			
		||||
 | 
			
		||||
  def psqlFtsClient(cnt: PostgreSQLContainer): Resource[IO, PsqlFtsClient[IO]] =
 | 
			
		||||
    transactor(cnt)
 | 
			
		||||
      .map(xa => PsqlFtsClient.fromTransactor(psqlConfig(cnt), xa))
 | 
			
		||||
      .evalTap(client => DbMigration[IO](client.config).run)
 | 
			
		||||
 | 
			
		||||
  def runQuery[A](cnt: PostgreSQLContainer)(q: ConnectionIO[A]): IO[A] =
 | 
			
		||||
    transactor(cnt).use(q.transact(_))
 | 
			
		||||
 | 
			
		||||
  implicit class QueryOps[A](self: ConnectionIO[A]) {
 | 
			
		||||
    def exec(implicit client: PsqlFtsClient[IO]): IO[A] =
 | 
			
		||||
      self.transact(client.transactor)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val collective1 = ident("coll1")
 | 
			
		||||
  val collective2 = ident("coll2")
 | 
			
		||||
 | 
			
		||||
  val itemData: TextData.Item =
 | 
			
		||||
    TextData.Item(
 | 
			
		||||
      item = ident("item-id-1"),
 | 
			
		||||
      collective = collective1,
 | 
			
		||||
      folder = None,
 | 
			
		||||
      name = "mydoc.pdf".some,
 | 
			
		||||
      notes = Some("my notes are these"),
 | 
			
		||||
      language = Language.English
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  val attachData: TextData.Attachment =
 | 
			
		||||
    TextData.Attachment(
 | 
			
		||||
      item = ident("item-id-1"),
 | 
			
		||||
      attachId = ident("attach-id-1"),
 | 
			
		||||
      collective = collective1,
 | 
			
		||||
      folder = None,
 | 
			
		||||
      language = Language.English,
 | 
			
		||||
      name = "mydoc.pdf".some,
 | 
			
		||||
      text = "lorem ipsum dolores est".some
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
@@ -0,0 +1,151 @@
 | 
			
		||||
/*
 | 
			
		||||
 * Copyright 2020 Eike K. & Contributors
 | 
			
		||||
 *
 | 
			
		||||
 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
package docspell.ftspsql
 | 
			
		||||
 | 
			
		||||
import cats.effect._
 | 
			
		||||
import cats.syntax.all._
 | 
			
		||||
 | 
			
		||||
import docspell.ftsclient.{FtsQuery, TextData}
 | 
			
		||||
import docspell.logging.{Level, LogConfig, TestLoggingConfig}
 | 
			
		||||
 | 
			
		||||
import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
			
		||||
import com.dimafeng.testcontainers.munit.TestContainerForAll
 | 
			
		||||
import doobie.implicits._
 | 
			
		||||
import munit.CatsEffectSuite
 | 
			
		||||
import org.testcontainers.utility.DockerImageName
 | 
			
		||||
 | 
			
		||||
class PsqlFtsClientTest
 | 
			
		||||
    extends CatsEffectSuite
 | 
			
		||||
    with PgFixtures
 | 
			
		||||
    with TestContainerForAll
 | 
			
		||||
    with TestLoggingConfig {
 | 
			
		||||
  override val containerDef: PostgreSQLContainer.Def =
 | 
			
		||||
    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
			
		||||
 | 
			
		||||
  val logger = docspell.logging.getLogger[IO]
 | 
			
		||||
 | 
			
		||||
  private val table = FtsRepository.table
 | 
			
		||||
 | 
			
		||||
  override def docspellLogConfig: LogConfig =
 | 
			
		||||
    LogConfig(Level.Debug, LogConfig.Format.Fancy)
 | 
			
		||||
 | 
			
		||||
  override def rootMinimumLevel = Level.Warn
 | 
			
		||||
 | 
			
		||||
  test("insert data into index") {
 | 
			
		||||
    withContainers { cnt =>
 | 
			
		||||
      psqlFtsClient(cnt).use { implicit client =>
 | 
			
		||||
        def assertions(id: TextData.Item, ad: TextData.Attachment) =
 | 
			
		||||
          for {
 | 
			
		||||
            n <- sql"SELECT count(*) from $table".query[Int].unique.exec
 | 
			
		||||
            _ = assertEquals(n, 2)
 | 
			
		||||
            itemStored <-
 | 
			
		||||
              sql"select item_name, item_notes from $table WHERE id = ${id.id}"
 | 
			
		||||
                .query[(Option[String], Option[String])]
 | 
			
		||||
                .unique
 | 
			
		||||
                .exec
 | 
			
		||||
            _ = assertEquals(itemStored, (id.name, id.notes))
 | 
			
		||||
            attachStored <-
 | 
			
		||||
              sql"select attach_name, attach_content from $table where id = ${ad.id}"
 | 
			
		||||
                .query[(Option[String], Option[String])]
 | 
			
		||||
                .unique
 | 
			
		||||
                .exec
 | 
			
		||||
            _ = assertEquals(attachStored, (ad.name, ad.text))
 | 
			
		||||
          } yield ()
 | 
			
		||||
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- client.indexData(logger, itemData, attachData)
 | 
			
		||||
          _ <- assertions(itemData, attachData)
 | 
			
		||||
          _ <- client.indexData(logger, itemData, attachData)
 | 
			
		||||
          _ <- assertions(itemData, attachData)
 | 
			
		||||
 | 
			
		||||
          _ <- client.indexData(
 | 
			
		||||
            logger,
 | 
			
		||||
            itemData.copy(notes = None),
 | 
			
		||||
            attachData.copy(name = "ha.pdf".some)
 | 
			
		||||
          )
 | 
			
		||||
          _ <- assertions(
 | 
			
		||||
            itemData.copy(notes = None),
 | 
			
		||||
            attachData.copy(name = "ha.pdf".some)
 | 
			
		||||
          )
 | 
			
		||||
        } yield ()
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  test("clear index") {
 | 
			
		||||
    withContainers { cnt =>
 | 
			
		||||
      psqlFtsClient(cnt).use { implicit client =>
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- client.indexData(logger, itemData, attachData)
 | 
			
		||||
          _ <- client.clearAll(logger)
 | 
			
		||||
          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
			
		||||
          _ = assertEquals(n, 0)
 | 
			
		||||
        } yield ()
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  test("clear index by collective") {
 | 
			
		||||
    withContainers { cnt =>
 | 
			
		||||
      psqlFtsClient(cnt).use { implicit client =>
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- client.indexData(
 | 
			
		||||
            logger,
 | 
			
		||||
            itemData,
 | 
			
		||||
            attachData,
 | 
			
		||||
            itemData.copy(collective = collective2, item = ident("item-id-2")),
 | 
			
		||||
            attachData.copy(collective = collective2, item = ident("item-id-2"))
 | 
			
		||||
          )
 | 
			
		||||
          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
			
		||||
          _ = assertEquals(n, 4)
 | 
			
		||||
 | 
			
		||||
          _ <- client.clear(logger, collective1)
 | 
			
		||||
          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
			
		||||
          _ = assertEquals(n, 2)
 | 
			
		||||
        } yield ()
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  test("search by query") {
 | 
			
		||||
    def query(s: String): FtsQuery =
 | 
			
		||||
      FtsQuery(
 | 
			
		||||
        q = s,
 | 
			
		||||
        collective = collective1,
 | 
			
		||||
        items = Set.empty,
 | 
			
		||||
        folders = Set.empty,
 | 
			
		||||
        limit = 10,
 | 
			
		||||
        offset = 0,
 | 
			
		||||
        highlight = FtsQuery.HighlightSetting.default
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
    withContainers { cnt =>
 | 
			
		||||
      psqlFtsClient(cnt).use { implicit client =>
 | 
			
		||||
        for {
 | 
			
		||||
          _ <- client.indexData(
 | 
			
		||||
            logger,
 | 
			
		||||
            itemData,
 | 
			
		||||
            attachData,
 | 
			
		||||
            itemData.copy(collective = collective2, item = ident("item-id-2")),
 | 
			
		||||
            attachData.copy(collective = collective2, item = ident("item-id-2"))
 | 
			
		||||
          )
 | 
			
		||||
 | 
			
		||||
          res0 <- client.search(query("lorem uiaeduiae"))
 | 
			
		||||
          _ = assertEquals(res0.count, 0)
 | 
			
		||||
 | 
			
		||||
          res1 <- client.search(query("lorem"))
 | 
			
		||||
          _ = assertEquals(res1.count, 1)
 | 
			
		||||
          _ = assertEquals(res1.results.head.id, attachData.id)
 | 
			
		||||
 | 
			
		||||
          res2 <- client.search(query("note"))
 | 
			
		||||
          _ = assertEquals(res2.count, 1)
 | 
			
		||||
          _ = assertEquals(res2.results.head.id, itemData.id)
 | 
			
		||||
        } yield ()
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -22,7 +22,7 @@ trait JsonCodec {
 | 
			
		||||
    new Encoder[TextData.Attachment] {
 | 
			
		||||
      final def apply(td: TextData.Attachment): Json = {
 | 
			
		||||
        val cnt =
 | 
			
		||||
          (Field.contentField(td.lang).name, Json.fromString(td.text.getOrElse("")))
 | 
			
		||||
          (Field.contentField(td.language).name, Json.fromString(td.text.getOrElse("")))
 | 
			
		||||
 | 
			
		||||
        Json.fromFields(
 | 
			
		||||
          cnt :: List(
 | 
			
		||||
@@ -165,7 +165,7 @@ trait JsonCodec {
 | 
			
		||||
        val setter = List(
 | 
			
		||||
          td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
 | 
			
		||||
          td.text.map(txt =>
 | 
			
		||||
            (Field.contentField(td.lang).name, Map("set" -> txt.asJson).asJson)
 | 
			
		||||
            (Field.contentField(td.language).name, Map("set" -> txt.asJson).asJson)
 | 
			
		||||
          )
 | 
			
		||||
        ).flatten
 | 
			
		||||
        Json.fromFields(
 | 
			
		||||
 
 | 
			
		||||
@@ -697,6 +697,9 @@ Docpell Update Check
 | 
			
		||||
    # Currently the SOLR search platform is supported.
 | 
			
		||||
    enabled = false
 | 
			
		||||
 | 
			
		||||
    # Which backend to use, either solr or postgresql
 | 
			
		||||
    backend = "solr"
 | 
			
		||||
 | 
			
		||||
    # Configuration for the SOLR backend.
 | 
			
		||||
    solr = {
 | 
			
		||||
      # The URL to solr
 | 
			
		||||
@@ -713,6 +716,43 @@ Docpell Update Check
 | 
			
		||||
      q-op = "OR"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Configuration for PostgreSQL backend
 | 
			
		||||
    postgresql = {
 | 
			
		||||
      # Whether to use the default database, only works if it is
 | 
			
		||||
      # postgresql
 | 
			
		||||
      use-default-connection = false
 | 
			
		||||
 | 
			
		||||
      # The database connection.
 | 
			
		||||
      jdbc {
 | 
			
		||||
        url = "jdbc:postgresql://server:5432/db"
 | 
			
		||||
        user = "pguser"
 | 
			
		||||
        password = ""
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      # A mapping from a language to a postgres text search config. By
 | 
			
		||||
      # default a language is mapped to a predefined config.
 | 
			
		||||
      # PostgreSQL has predefined configs for some languages. This
 | 
			
		||||
      # setting allows to create a custom text search config and
 | 
			
		||||
      # define it here for some or all languages.
 | 
			
		||||
      #
 | 
			
		||||
      # Example:
 | 
			
		||||
      #  { german = "my-german" }
 | 
			
		||||
      #
 | 
			
		||||
      # See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
 | 
			
		||||
      pg-config = {
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      # Define which query parser to use.
 | 
			
		||||
      #
 | 
			
		||||
      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
 | 
			
		||||
      pg-query-parser = "websearch_to_tsquery"
 | 
			
		||||
 | 
			
		||||
      # Allows to define a normalization for the ranking.
 | 
			
		||||
      #
 | 
			
		||||
      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
			
		||||
      pg-rank-normalization = [ 4 ]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Settings for running the index migration tasks
 | 
			
		||||
    migration = {
 | 
			
		||||
      # Chunk size to use when indexing data from the database. This
 | 
			
		||||
 
 | 
			
		||||
@@ -13,6 +13,7 @@ import docspell.analysis.TextAnalysisConfig
 | 
			
		||||
import docspell.analysis.classifier.TextClassifierConfig
 | 
			
		||||
import docspell.backend.Config.Files
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.config.{FtsType, PgFtsConfig}
 | 
			
		||||
import docspell.convert.ConvertConfig
 | 
			
		||||
import docspell.extract.ExtractConfig
 | 
			
		||||
import docspell.ftssolr.SolrConfig
 | 
			
		||||
@@ -65,9 +66,25 @@ object Config {
 | 
			
		||||
 | 
			
		||||
  case class FullTextSearch(
 | 
			
		||||
      enabled: Boolean,
 | 
			
		||||
      backend: FtsType,
 | 
			
		||||
      migration: FullTextSearch.Migration,
 | 
			
		||||
      solr: SolrConfig
 | 
			
		||||
  )
 | 
			
		||||
      solr: SolrConfig,
 | 
			
		||||
      postgresql: PgFtsConfig
 | 
			
		||||
  ) {
 | 
			
		||||
 | 
			
		||||
    def info: String =
 | 
			
		||||
      if (!enabled) "Disabled."
 | 
			
		||||
      else
 | 
			
		||||
        backend match {
 | 
			
		||||
          case FtsType.Solr =>
 | 
			
		||||
            s"Solr(${solr.url.asString})"
 | 
			
		||||
          case FtsType.PostgreSQL =>
 | 
			
		||||
            if (postgresql.useDefaultConnection)
 | 
			
		||||
              "PostgreSQL(default)"
 | 
			
		||||
            else
 | 
			
		||||
              s"PostgreSQL(${postgresql.jdbc.url.asString})"
 | 
			
		||||
        }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  object FullTextSearch {
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,7 @@ package docspell.joex
 | 
			
		||||
import cats.effect.Async
 | 
			
		||||
 | 
			
		||||
import docspell.config.Implicits._
 | 
			
		||||
import docspell.config.{ConfigFactory, Validation}
 | 
			
		||||
import docspell.config.{ConfigFactory, FtsType, Validation}
 | 
			
		||||
import docspell.scheduler.CountingScheme
 | 
			
		||||
 | 
			
		||||
import emil.MailAddress
 | 
			
		||||
@@ -53,6 +53,14 @@ object ConfigFile {
 | 
			
		||||
        cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
 | 
			
		||||
        "No subject given for enabled update check!"
 | 
			
		||||
      ),
 | 
			
		||||
      Validation(cfg => cfg.files.validate.map(_ => cfg))
 | 
			
		||||
      Validation(cfg => cfg.files.validate.map(_ => cfg)),
 | 
			
		||||
      Validation.failWhen(
 | 
			
		||||
        cfg =>
 | 
			
		||||
          cfg.fullTextSearch.enabled &&
 | 
			
		||||
            cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
 | 
			
		||||
            cfg.fullTextSearch.postgresql.useDefaultConnection &&
 | 
			
		||||
            !cfg.jdbc.dbmsName.contains("postgresql"),
 | 
			
		||||
        s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
 | 
			
		||||
      )
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -102,7 +102,8 @@ object JoexAppImpl extends MailAddressCodec {
 | 
			
		||||
      termSignal: SignallingRef[F, Boolean],
 | 
			
		||||
      store: Store[F],
 | 
			
		||||
      httpClient: Client[F],
 | 
			
		||||
      pubSub: PubSub[F]
 | 
			
		||||
      pubSub: PubSub[F],
 | 
			
		||||
      pools: Pools
 | 
			
		||||
  ): Resource[F, JoexApp[F]] =
 | 
			
		||||
    for {
 | 
			
		||||
      joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
 | 
			
		||||
@@ -120,6 +121,7 @@ object JoexAppImpl extends MailAddressCodec {
 | 
			
		||||
 | 
			
		||||
      tasks <- JoexTasks.resource(
 | 
			
		||||
        cfg,
 | 
			
		||||
        pools,
 | 
			
		||||
        jobStoreModule,
 | 
			
		||||
        httpClient,
 | 
			
		||||
        pubSubT,
 | 
			
		||||
 
 | 
			
		||||
@@ -52,7 +52,7 @@ object JoexServer {
 | 
			
		||||
        httpClient
 | 
			
		||||
      )(Topics.all.map(_.topic))
 | 
			
		||||
 | 
			
		||||
      joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub)
 | 
			
		||||
      joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub, pools)
 | 
			
		||||
 | 
			
		||||
      httpApp = Router(
 | 
			
		||||
        "/internal" -> InternalHeader(settings.internalRouteKey) {
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,9 @@ import docspell.analysis.TextAnalyser
 | 
			
		||||
import docspell.backend.fulltext.CreateIndex
 | 
			
		||||
import docspell.backend.ops._
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.config.FtsType
 | 
			
		||||
import docspell.ftsclient.FtsClient
 | 
			
		||||
import docspell.ftspsql.PsqlFtsClient
 | 
			
		||||
import docspell.ftssolr.SolrFtsClient
 | 
			
		||||
import docspell.joex.analysis.RegexNerFile
 | 
			
		||||
import docspell.joex.emptytrash.EmptyTrashTask
 | 
			
		||||
@@ -211,6 +213,7 @@ object JoexTasks {
 | 
			
		||||
 | 
			
		||||
  def resource[F[_]: Async](
 | 
			
		||||
      cfg: Config,
 | 
			
		||||
      pools: Pools,
 | 
			
		||||
      jobStoreModule: JobStoreModuleBuilder.Module[F],
 | 
			
		||||
      httpClient: Client[F],
 | 
			
		||||
      pubSub: PubSubT[F],
 | 
			
		||||
@@ -221,7 +224,7 @@ object JoexTasks {
 | 
			
		||||
      joex <- OJoex(pubSub)
 | 
			
		||||
      store = jobStoreModule.store
 | 
			
		||||
      upload <- OUpload(store, jobStoreModule.jobs)
 | 
			
		||||
      fts <- createFtsClient(cfg)(httpClient)
 | 
			
		||||
      fts <- createFtsClient(cfg, pools, store, httpClient)
 | 
			
		||||
      createIndex <- CreateIndex.resource(fts, store)
 | 
			
		||||
      itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs)
 | 
			
		||||
      itemSearchOps <- OItemSearch(store)
 | 
			
		||||
@@ -249,8 +252,24 @@ object JoexTasks {
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
  private def createFtsClient[F[_]: Async](
 | 
			
		||||
      cfg: Config
 | 
			
		||||
  )(client: Client[F]): Resource[F, FtsClient[F]] =
 | 
			
		||||
    if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
			
		||||
      cfg: Config,
 | 
			
		||||
      pools: Pools,
 | 
			
		||||
      store: Store[F],
 | 
			
		||||
      client: Client[F]
 | 
			
		||||
  ): Resource[F, FtsClient[F]] =
 | 
			
		||||
    if (cfg.fullTextSearch.enabled)
 | 
			
		||||
      cfg.fullTextSearch.backend match {
 | 
			
		||||
        case FtsType.Solr =>
 | 
			
		||||
          SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
			
		||||
 | 
			
		||||
        case FtsType.PostgreSQL =>
 | 
			
		||||
          val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.jdbc)
 | 
			
		||||
          if (cfg.fullTextSearch.postgresql.useDefaultConnection)
 | 
			
		||||
            Resource.pure[F, FtsClient[F]](
 | 
			
		||||
              new PsqlFtsClient[F](psqlCfg, store.transactor)
 | 
			
		||||
            )
 | 
			
		||||
          else
 | 
			
		||||
            PsqlFtsClient(psqlCfg, pools.connectEC)
 | 
			
		||||
      }
 | 
			
		||||
    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -31,7 +31,7 @@ object Main extends IOApp {
 | 
			
		||||
        Option(System.getProperty("config.file")),
 | 
			
		||||
        cfg.appId,
 | 
			
		||||
        cfg.baseUrl,
 | 
			
		||||
        Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
 | 
			
		||||
        Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
 | 
			
		||||
        cfg.files.defaultStoreConfig
 | 
			
		||||
      )
 | 
			
		||||
      _ <- logger.info(s"\n${banner.render("***>")}")
 | 
			
		||||
 
 | 
			
		||||
@@ -47,9 +47,10 @@ object TextExtraction {
 | 
			
		||||
          ctx.args.meta.collective,
 | 
			
		||||
          ctx.args.meta.folderId,
 | 
			
		||||
          item.item.name.some,
 | 
			
		||||
          None
 | 
			
		||||
          None,
 | 
			
		||||
          ctx.args.meta.language
 | 
			
		||||
        )
 | 
			
		||||
        _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)).toSeq: _*)
 | 
			
		||||
        _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)): _*)
 | 
			
		||||
        dur <- start
 | 
			
		||||
        extractedTags = txt.flatMap(_.tags).distinct.toList
 | 
			
		||||
        _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.")
 | 
			
		||||
 
 | 
			
		||||
@@ -289,6 +289,9 @@ docspell.server {
 | 
			
		||||
    # Currently the SOLR search platform is supported.
 | 
			
		||||
    enabled = false
 | 
			
		||||
 | 
			
		||||
    # Which backend to use, either solr or postgresql
 | 
			
		||||
    backend = "solr"
 | 
			
		||||
 | 
			
		||||
    # Configuration for the SOLR backend.
 | 
			
		||||
    solr = {
 | 
			
		||||
      # The URL to solr
 | 
			
		||||
@@ -304,6 +307,43 @@ docspell.server {
 | 
			
		||||
      # The default combiner for tokens. One of {AND, OR}.
 | 
			
		||||
      q-op = "OR"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # Configuration for PostgreSQL backend
 | 
			
		||||
    postgresql = {
 | 
			
		||||
      # Whether to use the default database, only works if it is
 | 
			
		||||
      # postgresql
 | 
			
		||||
      use-default-connection = false
 | 
			
		||||
 | 
			
		||||
      # The database connection.
 | 
			
		||||
      jdbc {
 | 
			
		||||
        url = "jdbc:postgresql://server:5432/db"
 | 
			
		||||
        user = "pguser"
 | 
			
		||||
        password = ""
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      # A mapping from a language to a postgres text search config. By
 | 
			
		||||
      # default a language is mapped to a predefined config.
 | 
			
		||||
      # PostgreSQL has predefined configs for some languages. This
 | 
			
		||||
      # setting allows to create a custom text search config and
 | 
			
		||||
      # define it here for some or all languages.
 | 
			
		||||
      #
 | 
			
		||||
      # Example:
 | 
			
		||||
      #  { german = "my-german" }
 | 
			
		||||
      #
 | 
			
		||||
      # See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
 | 
			
		||||
      pg-config = {
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      # Define which query parser to use.
 | 
			
		||||
      #
 | 
			
		||||
      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
 | 
			
		||||
      pg-query-parser = "websearch_to_tsquery"
 | 
			
		||||
 | 
			
		||||
      # Allows to define a normalization for the ranking.
 | 
			
		||||
      #
 | 
			
		||||
      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
			
		||||
      pg-rank-normalization = [ 4 ]
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # Configuration for the backend.
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@ package docspell.restserver
 | 
			
		||||
import docspell.backend.auth.Login
 | 
			
		||||
import docspell.backend.{Config => BackendConfig}
 | 
			
		||||
import docspell.common._
 | 
			
		||||
import docspell.config.{FtsType, PgFtsConfig}
 | 
			
		||||
import docspell.ftssolr.SolrConfig
 | 
			
		||||
import docspell.logging.LogConfig
 | 
			
		||||
import docspell.oidc.ProviderConfig
 | 
			
		||||
@@ -92,7 +93,26 @@ object Config {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
 | 
			
		||||
  case class FullTextSearch(
 | 
			
		||||
      enabled: Boolean,
 | 
			
		||||
      backend: FtsType,
 | 
			
		||||
      solr: SolrConfig,
 | 
			
		||||
      postgresql: PgFtsConfig
 | 
			
		||||
  ) {
 | 
			
		||||
 | 
			
		||||
    def info: String =
 | 
			
		||||
      if (!enabled) "Disabled."
 | 
			
		||||
      else
 | 
			
		||||
        backend match {
 | 
			
		||||
          case FtsType.Solr =>
 | 
			
		||||
            s"Solr(${solr.url.asString})"
 | 
			
		||||
          case FtsType.PostgreSQL =>
 | 
			
		||||
            if (postgresql.useDefaultConnection)
 | 
			
		||||
              "PostgreSQL(default)"
 | 
			
		||||
            else
 | 
			
		||||
              s"PostgreSQL(${postgresql.jdbc.url.asString})"
 | 
			
		||||
        }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  object FullTextSearch {}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ import cats.effect.Async
 | 
			
		||||
 | 
			
		||||
import docspell.backend.signup.{Config => SignupConfig}
 | 
			
		||||
import docspell.config.Implicits._
 | 
			
		||||
import docspell.config.{ConfigFactory, Validation}
 | 
			
		||||
import docspell.config.{ConfigFactory, FtsType, Validation}
 | 
			
		||||
import docspell.oidc.{ProviderConfig, SignatureAlgo}
 | 
			
		||||
import docspell.restserver.auth.OpenId
 | 
			
		||||
 | 
			
		||||
@@ -106,4 +106,15 @@ object ConfigFile {
 | 
			
		||||
 | 
			
		||||
  def filesValidate: Validation[Config] =
 | 
			
		||||
    Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
 | 
			
		||||
 | 
			
		||||
  def postgresFtsValidate: Validation[Config] =
 | 
			
		||||
    Validation.failWhen(
 | 
			
		||||
      cfg =>
 | 
			
		||||
        cfg.fullTextSearch.enabled &&
 | 
			
		||||
          cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
 | 
			
		||||
          cfg.fullTextSearch.postgresql.useDefaultConnection &&
 | 
			
		||||
          !cfg.backend.jdbc.dbmsName.contains("postgresql"),
 | 
			
		||||
      s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -28,7 +28,7 @@ object Main extends IOApp {
 | 
			
		||||
      Option(System.getProperty("config.file")),
 | 
			
		||||
      cfg.appId,
 | 
			
		||||
      cfg.baseUrl,
 | 
			
		||||
      Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
 | 
			
		||||
      Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
 | 
			
		||||
      cfg.backend.files.defaultStoreConfig
 | 
			
		||||
    )
 | 
			
		||||
    _ <- logger.info(s"\n${banner.render("***>")}")
 | 
			
		||||
 
 | 
			
		||||
@@ -12,7 +12,10 @@ import fs2.concurrent.Topic
 | 
			
		||||
 | 
			
		||||
import docspell.backend.BackendApp
 | 
			
		||||
import docspell.backend.auth.{AuthToken, ShareToken}
 | 
			
		||||
import docspell.common.Pools
 | 
			
		||||
import docspell.config.FtsType
 | 
			
		||||
import docspell.ftsclient.FtsClient
 | 
			
		||||
import docspell.ftspsql.PsqlFtsClient
 | 
			
		||||
import docspell.ftssolr.SolrFtsClient
 | 
			
		||||
import docspell.notification.api.NotificationModule
 | 
			
		||||
import docspell.notification.impl.NotificationModuleImpl
 | 
			
		||||
@@ -155,6 +158,7 @@ object RestAppImpl {
 | 
			
		||||
 | 
			
		||||
  def create[F[_]: Async](
 | 
			
		||||
      cfg: Config,
 | 
			
		||||
      pools: Pools,
 | 
			
		||||
      store: Store[F],
 | 
			
		||||
      httpClient: Client[F],
 | 
			
		||||
      pubSub: PubSub[F],
 | 
			
		||||
@@ -163,7 +167,7 @@ object RestAppImpl {
 | 
			
		||||
    val logger = docspell.logging.getLogger[F](s"restserver-${cfg.appId.id}")
 | 
			
		||||
 | 
			
		||||
    for {
 | 
			
		||||
      ftsClient <- createFtsClient(cfg)(httpClient)
 | 
			
		||||
      ftsClient <- createFtsClient(cfg, pools, store, httpClient)
 | 
			
		||||
      pubSubT = PubSubT(pubSub, logger)
 | 
			
		||||
      javaEmil = JavaMailEmil(cfg.backend.mailSettings)
 | 
			
		||||
      notificationMod <- Resource.eval(
 | 
			
		||||
@@ -188,8 +192,25 @@ object RestAppImpl {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private def createFtsClient[F[_]: Async](
 | 
			
		||||
      cfg: Config
 | 
			
		||||
  )(client: Client[F]): Resource[F, FtsClient[F]] =
 | 
			
		||||
    if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
			
		||||
      cfg: Config,
 | 
			
		||||
      pools: Pools,
 | 
			
		||||
      store: Store[F],
 | 
			
		||||
      client: Client[F]
 | 
			
		||||
  ): Resource[F, FtsClient[F]] =
 | 
			
		||||
    if (cfg.fullTextSearch.enabled)
 | 
			
		||||
      cfg.fullTextSearch.backend match {
 | 
			
		||||
        case FtsType.Solr =>
 | 
			
		||||
          SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
			
		||||
 | 
			
		||||
        case FtsType.PostgreSQL =>
 | 
			
		||||
          val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.backend.jdbc)
 | 
			
		||||
          if (cfg.fullTextSearch.postgresql.useDefaultConnection)
 | 
			
		||||
            Resource.pure[F, FtsClient[F]](
 | 
			
		||||
              new PsqlFtsClient[F](psqlCfg, store.transactor)
 | 
			
		||||
            )
 | 
			
		||||
          else
 | 
			
		||||
            PsqlFtsClient(psqlCfg, pools.connectEC)
 | 
			
		||||
      }
 | 
			
		||||
    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -88,7 +88,7 @@ object RestServer {
 | 
			
		||||
        store,
 | 
			
		||||
        httpClient
 | 
			
		||||
      )(Topics.all.map(_.topic))
 | 
			
		||||
      restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic)
 | 
			
		||||
      restApp <- RestAppImpl.create[F](cfg, pools, store, httpClient, pubSub, wsTopic)
 | 
			
		||||
    } yield (restApp, pubSub, setting)
 | 
			
		||||
 | 
			
		||||
  def createHttpApp[F[_]: Async](
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,8 @@ trait Store[F[_]] {
 | 
			
		||||
  ): FileRepository[F]
 | 
			
		||||
 | 
			
		||||
  def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
 | 
			
		||||
 | 
			
		||||
  def transactor: Transactor[F]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
object Store {
 | 
			
		||||
 
 | 
			
		||||
@@ -24,8 +24,9 @@ final class StoreImpl[F[_]: Async](
 | 
			
		||||
    val fileRepo: FileRepository[F],
 | 
			
		||||
    jdbc: JdbcConfig,
 | 
			
		||||
    ds: DataSource,
 | 
			
		||||
    xa: Transactor[F]
 | 
			
		||||
    val transactor: Transactor[F]
 | 
			
		||||
) extends Store[F] {
 | 
			
		||||
  private[this] val xa = transactor
 | 
			
		||||
 | 
			
		||||
  def createFileRepository(
 | 
			
		||||
      cfg: FileRepositoryConfig,
 | 
			
		||||
 
 | 
			
		||||
@@ -576,7 +576,8 @@ object QItem {
 | 
			
		||||
      collective: Ident,
 | 
			
		||||
      folder: Option[Ident],
 | 
			
		||||
      name: String,
 | 
			
		||||
      notes: Option[String]
 | 
			
		||||
      notes: Option[String],
 | 
			
		||||
      language: Language
 | 
			
		||||
  )
 | 
			
		||||
  def allNameAndNotes(
 | 
			
		||||
      coll: Option[Ident],
 | 
			
		||||
@@ -584,10 +585,11 @@ object QItem {
 | 
			
		||||
      chunkSize: Int
 | 
			
		||||
  ): Stream[ConnectionIO, NameAndNotes] = {
 | 
			
		||||
    val i = RItem.as("i")
 | 
			
		||||
    val c = RCollective.as("c")
 | 
			
		||||
 | 
			
		||||
    Select(
 | 
			
		||||
      select(i.id, i.cid, i.folder, i.name, i.notes),
 | 
			
		||||
      from(i)
 | 
			
		||||
      select(i.id, i.cid, i.folder, i.name, i.notes, c.language),
 | 
			
		||||
      from(i).innerJoin(c, c.id === i.cid)
 | 
			
		||||
    ).where(
 | 
			
		||||
      i.state.in(ItemState.validStates) &&?
 | 
			
		||||
        itemIds.map(ids => i.id.in(ids)) &&?
 | 
			
		||||
 
 | 
			
		||||
@@ -236,8 +236,18 @@ object RAttachment {
 | 
			
		||||
      n3 <- DML.delete(T, T.id === attachId)
 | 
			
		||||
    } yield n0 + n1 + n2 + n3
 | 
			
		||||
 | 
			
		||||
  def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
 | 
			
		||||
    Select(T.itemId.s, from(T), T.id === attachId).build.query[Ident].option
 | 
			
		||||
  def findItemAndLanguage(
 | 
			
		||||
      attachId: Ident
 | 
			
		||||
  ): ConnectionIO[Option[(Ident, Option[Language])]] = {
 | 
			
		||||
    val a = RAttachment.as("a")
 | 
			
		||||
    val m = RAttachmentMeta.as("m")
 | 
			
		||||
 | 
			
		||||
    Select(
 | 
			
		||||
      select(a.itemId, m.language),
 | 
			
		||||
      from(a).leftJoin(m, m.id === a.id),
 | 
			
		||||
      a.id === attachId
 | 
			
		||||
    ).build.query[(Ident, Option[Language])].option
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  def findAll(
 | 
			
		||||
      coll: Option[Ident],
 | 
			
		||||
 
 | 
			
		||||
@@ -23,7 +23,7 @@ class PostgresqlMigrateTest
 | 
			
		||||
    with TestContainerForAll
 | 
			
		||||
    with TestLoggingConfig {
 | 
			
		||||
  override val containerDef: PostgreSQLContainer.Def =
 | 
			
		||||
    PostgreSQLContainer.Def(DockerImageName.parse("postgres:13"))
 | 
			
		||||
    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
			
		||||
 | 
			
		||||
  test("postgres empty schema migration") {
 | 
			
		||||
    assume(Docker.existsUnsafe, "docker doesn't exist!")
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,11 @@ let
 | 
			
		||||
  full-text-search = {
 | 
			
		||||
    enabled = true;
 | 
			
		||||
    solr.url = "http://localhost:${toString config.services.solr.port}/solr/docspell";
 | 
			
		||||
    postgresql = {
 | 
			
		||||
      pg-config = {
 | 
			
		||||
        "german" = "my-germam";
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
  };
 | 
			
		||||
in
 | 
			
		||||
{
 | 
			
		||||
 
 | 
			
		||||
@@ -213,6 +213,7 @@ Docpell Update Check
 | 
			
		||||
    };
 | 
			
		||||
    full-text-search = {
 | 
			
		||||
      enabled = false;
 | 
			
		||||
      backend = "solr";
 | 
			
		||||
      solr = {
 | 
			
		||||
        url = "http://localhost:8983/solr/docspell";
 | 
			
		||||
        commit-within = 1000;
 | 
			
		||||
@@ -220,6 +221,17 @@ Docpell Update Check
 | 
			
		||||
        def-type = "lucene";
 | 
			
		||||
        q-op = "OR";
 | 
			
		||||
      };
 | 
			
		||||
      postgresql = {
 | 
			
		||||
        use-default-connection = false;
 | 
			
		||||
        jdbc = {
 | 
			
		||||
          url = "jdbc:postgresql://server:5432/db";
 | 
			
		||||
          user = "pguser";
 | 
			
		||||
          password = "";
 | 
			
		||||
        };
 | 
			
		||||
        pg-config = {};
 | 
			
		||||
        pg-query-parser = "websearch_to_tsquery";
 | 
			
		||||
        pg-rank-normalization = [ 4 ];
 | 
			
		||||
      };
 | 
			
		||||
      migration = {
 | 
			
		||||
        index-all-chunk = 10;
 | 
			
		||||
      };
 | 
			
		||||
@@ -1371,6 +1383,12 @@ in {
 | 
			
		||||
                Currently the SOLR search platform is supported.
 | 
			
		||||
              '';
 | 
			
		||||
            };
 | 
			
		||||
            backend = mkOption {
 | 
			
		||||
              type = types.str;
 | 
			
		||||
              default = defaults.full-text-search.backend;
 | 
			
		||||
              description = "The backend to use, either solr or postgresql";
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            solr = mkOption {
 | 
			
		||||
              type = types.submodule({
 | 
			
		||||
                options = {
 | 
			
		||||
@@ -1408,6 +1426,61 @@ in {
 | 
			
		||||
              default = defaults.full-text-search.solr;
 | 
			
		||||
              description = "Configuration for the SOLR backend.";
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            postgresql = mkOption {
 | 
			
		||||
              type = types.submodule({
 | 
			
		||||
                options = {
 | 
			
		||||
                  use-default-connection = mkOption {
 | 
			
		||||
                    type = types.bool;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.use-default-connection;
 | 
			
		||||
                    description = "Whether to use the primary db connection.";
 | 
			
		||||
                  };
 | 
			
		||||
                  jdbc = mkOption {
 | 
			
		||||
                    type = types.submodule ({
 | 
			
		||||
                      options = {
 | 
			
		||||
                        url = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.url;
 | 
			
		||||
                          description = ''
 | 
			
		||||
                            The URL to the database.
 | 
			
		||||
                          '';
 | 
			
		||||
                        };
 | 
			
		||||
                        user = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.user;
 | 
			
		||||
                          description = "The user name to connect to the database.";
 | 
			
		||||
                        };
 | 
			
		||||
                        password = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.password;
 | 
			
		||||
                          description = "The password to connect to the database.";
 | 
			
		||||
                        };
 | 
			
		||||
                      };
 | 
			
		||||
                    });
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.jdbc;
 | 
			
		||||
                    description = "Database connection settings";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-config = mkOption {
 | 
			
		||||
                    type = types.attrs;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-config;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-query-parser = mkOption {
 | 
			
		||||
                    type = types.str;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-query-parser;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-rank-normalization = mkOption {
 | 
			
		||||
                    type = types.listOf types.int;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-rank-normalization;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                };
 | 
			
		||||
              });
 | 
			
		||||
              default = defaults.full-text-search.postgresql;
 | 
			
		||||
              description = "PostgreSQL for fulltext search";
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            migration = mkOption {
 | 
			
		||||
              type = types.submodule({
 | 
			
		||||
                options = {
 | 
			
		||||
 
 | 
			
		||||
@@ -62,6 +62,17 @@ let
 | 
			
		||||
        def-type = "lucene";
 | 
			
		||||
        q-op = "OR";
 | 
			
		||||
      };
 | 
			
		||||
      postgresql = {
 | 
			
		||||
        use-default-connection = false;
 | 
			
		||||
        jdbc = {
 | 
			
		||||
          url = "jdbc:postgresql://server:5432/db";
 | 
			
		||||
          user = "pguser";
 | 
			
		||||
          password = "";
 | 
			
		||||
        };
 | 
			
		||||
        pg-config = {};
 | 
			
		||||
        pg-query-parser = "websearch_to_tsquery";
 | 
			
		||||
        pg-rank-normalization = [ 4 ];
 | 
			
		||||
      };
 | 
			
		||||
    };
 | 
			
		||||
    auth = {
 | 
			
		||||
      server-secret = "hex:caffee";
 | 
			
		||||
@@ -575,6 +586,60 @@ in {
 | 
			
		||||
              default = defaults.full-text-search.solr;
 | 
			
		||||
              description = "Configuration for the SOLR backend.";
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            postgresql = mkOption {
 | 
			
		||||
              type = types.submodule({
 | 
			
		||||
                options = {
 | 
			
		||||
                  use-default-connection = mkOption {
 | 
			
		||||
                    type = types.bool;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.use-default-connection;
 | 
			
		||||
                    description = "Whether to use the primary db connection.";
 | 
			
		||||
                  };
 | 
			
		||||
                  jdbc = mkOption {
 | 
			
		||||
                    type = types.submodule ({
 | 
			
		||||
                      options = {
 | 
			
		||||
                        url = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.url;
 | 
			
		||||
                          description = ''
 | 
			
		||||
                            The URL to the database.
 | 
			
		||||
                          '';
 | 
			
		||||
                        };
 | 
			
		||||
                        user = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.user;
 | 
			
		||||
                          description = "The user name to connect to the database.";
 | 
			
		||||
                        };
 | 
			
		||||
                        password = mkOption {
 | 
			
		||||
                          type = types.str;
 | 
			
		||||
                          default = defaults.jdbc.password;
 | 
			
		||||
                          description = "The password to connect to the database.";
 | 
			
		||||
                        };
 | 
			
		||||
                      };
 | 
			
		||||
                    });
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.jdbc;
 | 
			
		||||
                    description = "Database connection settings";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-config = mkOption {
 | 
			
		||||
                    type = types.attrs;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-config;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-query-parser = mkOption {
 | 
			
		||||
                    type = types.str;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-query-parser;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                  pg-rank-normalization = mkOption {
 | 
			
		||||
                    type = types.listOf types.int;
 | 
			
		||||
                    default = defaults.full-text-search.postgresql.pg-rank-normalization;
 | 
			
		||||
                    description = "";
 | 
			
		||||
                  };
 | 
			
		||||
                };
 | 
			
		||||
              });
 | 
			
		||||
              default = defaults.full-text-search.postgresql;
 | 
			
		||||
              description = "PostgreSQL for fulltext search";
 | 
			
		||||
            };
 | 
			
		||||
          };
 | 
			
		||||
        });
 | 
			
		||||
        default = defaults.full-text-search;
 | 
			
		||||
 
 | 
			
		||||
@@ -59,7 +59,7 @@ via the header `Docspell-Share-Auth`.
 | 
			
		||||
 | 
			
		||||
Docspell can be configured to be a relying party for OpenID Connect.
 | 
			
		||||
Please see [the config
 | 
			
		||||
section](@/docs/configure/_index.md#openid-connect-oauth2) for
 | 
			
		||||
section](@/docs/configure/authentication.md#openid-connect-oauth2) for
 | 
			
		||||
details.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -80,7 +80,7 @@ $ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/a
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
To enable these endpoints, you must provide a secret in the
 | 
			
		||||
[configuration](@/docs/configure/_index.md#admin-endpoint).
 | 
			
		||||
[configuration](@/docs/configure/admin-endpoint.md).
 | 
			
		||||
 | 
			
		||||
## Live Api
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -163,7 +163,7 @@ on the same host or network).
 | 
			
		||||
 | 
			
		||||
The endpoint is disabled by default, an admin must change the
 | 
			
		||||
`docspell.server.integration-endpoint.enabled` flag to `true` in the
 | 
			
		||||
[configuration file](@/docs/configure/_index.md#rest-server).
 | 
			
		||||
[configuration file](@/docs/configure/main.md#rest-server).
 | 
			
		||||
 | 
			
		||||
If queried by a `GET` request, it returns whether it is enabled and
 | 
			
		||||
the collective exists.
 | 
			
		||||
 
 | 
			
		||||
@@ -3,803 +3,9 @@ title = "Configuration"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 40
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
template = "pages.html"
 | 
			
		||||
sort_by = "weight"
 | 
			
		||||
redirect_to = "docs/configure/main"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
# Configuration
 | 
			
		||||
 | 
			
		||||
Docspell's executables (restserver and joex) can take one argument – a
 | 
			
		||||
configuration file. If that is not given, the defaults are used,
 | 
			
		||||
overriden by environment variables. A config file overrides default
 | 
			
		||||
values, so only values that differ from the defaults are necessary.
 | 
			
		||||
The complete default options and their documentation is at the end of
 | 
			
		||||
this page.
 | 
			
		||||
 | 
			
		||||
Besides the config file, another way is to provide individual settings
 | 
			
		||||
via key-value pairs to the executable by the `-D` option. For example
 | 
			
		||||
to override only `base-url` you could add the argument
 | 
			
		||||
`-Ddocspell.server.base-url=…` to the command. Multiple options are
 | 
			
		||||
possible. For more than few values this is very tedious, obviously, so
 | 
			
		||||
the recommended way is to maintain a config file. If these options
 | 
			
		||||
*and* a file is provded, then any setting given via the `-D…` option
 | 
			
		||||
overrides the same setting from the config file.
 | 
			
		||||
 | 
			
		||||
At last, it is possible to configure docspell via environment
 | 
			
		||||
variables if there is no config file supplied (if a config file *is*
 | 
			
		||||
supplied, it is always preferred). Note that this approach is limited,
 | 
			
		||||
as arrays are not supported. A list of environment variables can be
 | 
			
		||||
found at the [end of this page](#environment-variables). The
 | 
			
		||||
environment variable name follows the corresponding config key - where
 | 
			
		||||
dots are replaced by underscores and dashes are replaced by two
 | 
			
		||||
underscores. For example, the config key `docspell.server.app-name`
 | 
			
		||||
can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
 | 
			
		||||
 | 
			
		||||
It is also possible to specify environment variables inside a config
 | 
			
		||||
file (to get a mix of both) - please see the [documentation of the
 | 
			
		||||
config library](https://github.com/lightbend/config#standard-behavior)
 | 
			
		||||
for more on this.
 | 
			
		||||
 | 
			
		||||
# File Format
 | 
			
		||||
 | 
			
		||||
The format of the configuration files can be
 | 
			
		||||
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
 | 
			
		||||
JSON or what this [config
 | 
			
		||||
library](https://github.com/lightbend/config) understands. The default
 | 
			
		||||
values below are in HOCON format, which is recommended, since it
 | 
			
		||||
allows comments and has some [advanced
 | 
			
		||||
features](https://github.com/lightbend/config#features-of-hocon).
 | 
			
		||||
Please also see their documentation for more details.
 | 
			
		||||
 | 
			
		||||
A short description (please check the links for better understanding):
 | 
			
		||||
The config consists of key-value pairs and can be written in a
 | 
			
		||||
JSON-like format (called HOCON). Keys are organized in trees, and a
 | 
			
		||||
key defines a full path into the tree. There are two ways:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
a.b.c.d=15
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
or
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
a {
 | 
			
		||||
  b {
 | 
			
		||||
    c {
 | 
			
		||||
      d = 15
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Both are exactly the same and these forms are both used at the same
 | 
			
		||||
time. Usually the braces approach is used to group some more settings,
 | 
			
		||||
for better readability.
 | 
			
		||||
 | 
			
		||||
Strings that contain "not-so-common" characters should be enclosed in
 | 
			
		||||
quotes. It is possible to define values at the top of the file and
 | 
			
		||||
reuse them on different locations via the `${full.path.to.key}`
 | 
			
		||||
syntax. When using these variables, they *must not* be enclosed in
 | 
			
		||||
quotes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Important Config Options
 | 
			
		||||
 | 
			
		||||
The configuration of both components uses separate namespaces. The
 | 
			
		||||
configuration for the REST server is below `docspell.server`, while
 | 
			
		||||
the one for joex is below `docspell.joex`.
 | 
			
		||||
 | 
			
		||||
You can therefore use two separate config files or one single file
 | 
			
		||||
containing both namespaces.
 | 
			
		||||
 | 
			
		||||
## JDBC
 | 
			
		||||
 | 
			
		||||
This configures the connection to the database. This has to be
 | 
			
		||||
specified for the rest server and joex. By default, a H2 database in
 | 
			
		||||
the current `/tmp` directory is configured.
 | 
			
		||||
 | 
			
		||||
The config looks like this (both components):
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.joex.jdbc {
 | 
			
		||||
  url = ...
 | 
			
		||||
  user = ...
 | 
			
		||||
  password = ...
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
docspell.server.backend.jdbc {
 | 
			
		||||
  url = ...
 | 
			
		||||
  user = ...
 | 
			
		||||
  password = ...
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `url` is the connection to the database. It must start with
 | 
			
		||||
`jdbc`, followed by name of the database. The rest is specific to the
 | 
			
		||||
database used: it is either a path to a file for H2 or a host/database
 | 
			
		||||
url for MariaDB and PostgreSQL.
 | 
			
		||||
 | 
			
		||||
When using H2, the user and password can be chosen freely on first
 | 
			
		||||
start, but must stay the same on subsequent starts. Usually, the user
 | 
			
		||||
is `sa` and the password is left empty. Additionally, the url must
 | 
			
		||||
include these options:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Examples
 | 
			
		||||
 | 
			
		||||
PostgreSQL:
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:postgresql://localhost:5432/docspelldb"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
MariaDB:
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:mariadb://localhost:3306/docspelldb"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
H2
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Admin Endpoint
 | 
			
		||||
 | 
			
		||||
The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
 | 
			
		||||
for adminstration tasks. This is disabled by default and can be
 | 
			
		||||
enabled by providing a secret:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
...
 | 
			
		||||
  admin-endpoint {
 | 
			
		||||
    secret = "123"
 | 
			
		||||
  }
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This secret must be provided to all requests to a `/api/v1/admin/`
 | 
			
		||||
endpoint.
 | 
			
		||||
 | 
			
		||||
The most convenient way to execute admin tasks is to use the
 | 
			
		||||
[cli](@/docs/tools/cli.md). You get a list of possible admin commands
 | 
			
		||||
via `dsc admin help`.
 | 
			
		||||
 | 
			
		||||
To see the output of the commands, there are these ways:
 | 
			
		||||
 | 
			
		||||
1. looking at the joex logs, which gives most details.
 | 
			
		||||
2. Use the job-queue page when logged in as `docspell-system`
 | 
			
		||||
3. setup a [webhook](@/docs/webapp/notification.md) to be notified
 | 
			
		||||
   when a job finishes. This way you get a small message.
 | 
			
		||||
 | 
			
		||||
All admin tasks (and also some other system tasks) are run under the
 | 
			
		||||
account `docspell-system` (collective and user). You need to create
 | 
			
		||||
this account and setup the notification hooks in there - not in your
 | 
			
		||||
normal account.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Full-Text Search: SOLR
 | 
			
		||||
 | 
			
		||||
[Apache SOLR](https://solr.apache.org) is used to provide the
 | 
			
		||||
full-text search. Both docspell components must provide the same
 | 
			
		||||
connection setup. This is defined in the `full-text-search.solr`
 | 
			
		||||
subsection:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
...
 | 
			
		||||
  full-text-search {
 | 
			
		||||
    enabled = true
 | 
			
		||||
    ...
 | 
			
		||||
    solr = {
 | 
			
		||||
      url = "http://localhost:8983/solr/docspell"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The default configuration at the end of this page contains more
 | 
			
		||||
information about each setting.
 | 
			
		||||
 | 
			
		||||
The `solr.url` is the mandatory setting that you need to change to
 | 
			
		||||
point to your SOLR instance. Then you need to set the `enabled` flag
 | 
			
		||||
to `true`.
 | 
			
		||||
 | 
			
		||||
When installing docspell manually, just install solr and create a core
 | 
			
		||||
as described in the [solr
 | 
			
		||||
documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
 | 
			
		||||
That will provide you with the connection url (the last part is the
 | 
			
		||||
core name). If Docspell detects an empty core it will run a schema
 | 
			
		||||
setup on start automatically.
 | 
			
		||||
 | 
			
		||||
The `full-text-search.solr` options are the same for joex and the
 | 
			
		||||
restserver.
 | 
			
		||||
 | 
			
		||||
There is an [admin route](@/docs/api/intro.md#admin) that allows to
 | 
			
		||||
re-create the entire index (for all collectives). This is possible via
 | 
			
		||||
a call:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
or use the [cli](@/docs/tools/cli.md):
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
dsc admin -a test123 recreate-index
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Here the `test123` is the key defined with `admin-endpoint.secret`. If
 | 
			
		||||
it is empty (the default), this call is disabled (all admin routes).
 | 
			
		||||
Otherwise, the POST request will submit a system task that is executed
 | 
			
		||||
by a joex instance eventually.
 | 
			
		||||
 | 
			
		||||
Using this endpoint, the entire index (including the schema) will be
 | 
			
		||||
re-created. This is sometimes necessary, for example if you upgrade
 | 
			
		||||
SOLR or delete the core to provide a new one (see
 | 
			
		||||
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
 | 
			
		||||
details). Another way is to restart docspell (while clearing the
 | 
			
		||||
index). If docspell detects an empty index at startup, it will submit
 | 
			
		||||
a task to build the index automatically.
 | 
			
		||||
 | 
			
		||||
Note that a collective can also re-index their data using a similiar
 | 
			
		||||
endpoint; but this is only deleting their data and doesn't do a full
 | 
			
		||||
re-index.
 | 
			
		||||
 | 
			
		||||
The solr index doesn't contain any new information, it can be
 | 
			
		||||
regenerated any time using the above REST call. Thus it doesn't need
 | 
			
		||||
to be backed up.
 | 
			
		||||
 | 
			
		||||
## Bind
 | 
			
		||||
 | 
			
		||||
The host and port the http server binds to. This applies to both
 | 
			
		||||
components. The joex component also exposes a small REST api to
 | 
			
		||||
inspect its state and notify the scheduler.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.bind {
 | 
			
		||||
  address = localhost
 | 
			
		||||
  port = 7880
 | 
			
		||||
}
 | 
			
		||||
docspell.joex.bind {
 | 
			
		||||
  address = localhost
 | 
			
		||||
  port = 7878
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
By default, it binds to `localhost` and some predefined port. This
 | 
			
		||||
must be changed, if components are on different machines.
 | 
			
		||||
 | 
			
		||||
## Baseurl
 | 
			
		||||
 | 
			
		||||
The base url is an important setting that defines the http URL where
 | 
			
		||||
the corresponding component can be reached. It applies to both
 | 
			
		||||
components. For a joex component, the url must be resolvable from a
 | 
			
		||||
REST server component. The REST server also uses this url to create
 | 
			
		||||
absolute urls and to configure the authenication cookie.
 | 
			
		||||
 | 
			
		||||
By default it is build using the information from the `bind` setting,
 | 
			
		||||
which is `http://localhost:7880`.
 | 
			
		||||
 | 
			
		||||
If the default is not changed, docspell will use the request to
 | 
			
		||||
determine the base-url. It first inspects the `X-Forwarded-For` header
 | 
			
		||||
that is often used with reverse proxies. If that is not present, the
 | 
			
		||||
`Host` header of the request is used. However, if the `base-url`
 | 
			
		||||
setting is changed, then only this setting is used.
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
docspell.server.base-url = ...
 | 
			
		||||
docspell.joex.base-url = ...
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If you are unsure, leave it at its default.
 | 
			
		||||
 | 
			
		||||
### Examples
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
docspell.server.baseurl = "https://docspell.example.com"
 | 
			
		||||
docspell.joex.baseurl = "http://192.168.101.10"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## App-id
 | 
			
		||||
 | 
			
		||||
The `app-id` is the identifier of the corresponding instance. It *must
 | 
			
		||||
be unique* for all instances. By default the REST server uses `rest1`
 | 
			
		||||
and joex `joex1`. It is recommended to overwrite this setting to have
 | 
			
		||||
an explicit and stable identifier should multiple instances are
 | 
			
		||||
intended.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.app-id = "rest1"
 | 
			
		||||
docspell.joex.app-id = "joex1"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Registration Options
 | 
			
		||||
 | 
			
		||||
This defines if and how new users can create accounts. There are 3
 | 
			
		||||
options:
 | 
			
		||||
 | 
			
		||||
- *closed* no new user can sign up
 | 
			
		||||
- *open* new users can sign up
 | 
			
		||||
- *invite* new users can sign up but require an invitation key
 | 
			
		||||
 | 
			
		||||
This applies only to the REST sevrer component.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.backend.signup {
 | 
			
		||||
  mode = "open"
 | 
			
		||||
 | 
			
		||||
  # If mode == 'invite', a password must be provided to generate
 | 
			
		||||
  # invitation keys. It must not be empty.
 | 
			
		||||
  new-invite-password = ""
 | 
			
		||||
 | 
			
		||||
  # If mode == 'invite', this is the period an invitation token is
 | 
			
		||||
  # considered valid.
 | 
			
		||||
  invite-time = "3 days"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The mode `invite` is intended to open the application only to some
 | 
			
		||||
users. The admin can create these invitation keys and distribute them
 | 
			
		||||
to the desired people. For this, the `new-invite-password` must be
 | 
			
		||||
given. The idea is that only the person who installs docspell knows
 | 
			
		||||
this. If it is not set, then invitation won't work. New invitation
 | 
			
		||||
keys can be generated from within the web application or via REST
 | 
			
		||||
calls (using `curl`, for example).
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Authentication
 | 
			
		||||
 | 
			
		||||
Authentication works in two ways:
 | 
			
		||||
 | 
			
		||||
- with an account-name / password pair
 | 
			
		||||
- with an authentication token
 | 
			
		||||
 | 
			
		||||
The initial authentication must occur with an accountname/password
 | 
			
		||||
pair. This will generate an authentication token which is valid for a
 | 
			
		||||
some time. Subsequent calls to secured routes can use this token. The
 | 
			
		||||
token can be given as a normal http header or via a cookie header.
 | 
			
		||||
 | 
			
		||||
These settings apply only to the REST server.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.auth {
 | 
			
		||||
  server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
 | 
			
		||||
  session-valid = "5 minutes"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `server-secret` is used to sign the token. If multiple REST
 | 
			
		||||
servers are deployed, all must share the same server secret. Otherwise
 | 
			
		||||
tokens from one instance are not valid on another instance. The secret
 | 
			
		||||
can be given as Base64 encoded string or in hex form. Use the prefix
 | 
			
		||||
`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
 | 
			
		||||
of the string are used.
 | 
			
		||||
 | 
			
		||||
The `session-valid` determines how long a token is valid. This can be
 | 
			
		||||
just some minutes, the web application obtains new ones
 | 
			
		||||
periodically. So a rather short time is recommended.
 | 
			
		||||
 | 
			
		||||
## OpenID Connect / OAuth2
 | 
			
		||||
 | 
			
		||||
You can integrate Docspell into your SSO solution via [OpenID
 | 
			
		||||
Connect](https://openid.net/connect/) (OIDC). This requires to set up
 | 
			
		||||
an OpenID Provider (OP) somewhere and to configure Docspell
 | 
			
		||||
accordingly to act as the relying party.
 | 
			
		||||
 | 
			
		||||
You can define multiple OPs to use. For some examples, please see the
 | 
			
		||||
default configuration file [below](#rest-server).
 | 
			
		||||
 | 
			
		||||
The configuration of a provider highly depends on how it is setup.
 | 
			
		||||
Here is an example for a setup using
 | 
			
		||||
[keycloak](https://www.keycloak.org):
 | 
			
		||||
 | 
			
		||||
``` conf
 | 
			
		||||
provider = {
 | 
			
		||||
  provider-id = "keycloak",
 | 
			
		||||
  client-id = "docspell",
 | 
			
		||||
  client-secret = "example-secret-439e-bf06-911e4cdd56a6",
 | 
			
		||||
  scope = "profile", # scope is required for OIDC
 | 
			
		||||
  authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
 | 
			
		||||
  token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
 | 
			
		||||
  #User URL is not used when signature key is set.
 | 
			
		||||
  #user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
 | 
			
		||||
  sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
 | 
			
		||||
  sig-algo = "RS512"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `provider-id` is some identifier that is used in the URL to
 | 
			
		||||
distinguish between possibly multiple providers. The `client-id` and
 | 
			
		||||
`client-secret` define the two parameters required for a "confidential
 | 
			
		||||
client". The different URLs are best explained at the [keycloak
 | 
			
		||||
docs](https://www.keycloak.org/docs/latest/server_admin/).
 | 
			
		||||
They are available for all OPs in some way. The `user-url` is not
 | 
			
		||||
required, if the access token is already containing the necessary
 | 
			
		||||
data. If not, then docspell performs another request to the
 | 
			
		||||
`user-url`, which must be the user-info endpoint, to obtain the
 | 
			
		||||
required user data.
 | 
			
		||||
 | 
			
		||||
If the data is taken from the token directly and not via a request to
 | 
			
		||||
the user-info endpoint, then the token must be validated using the
 | 
			
		||||
given `sign-key` and `sig-algo`. These two values are then required to
 | 
			
		||||
specify! However, if the user-info endpoint should be used, then leave
 | 
			
		||||
the `sign-key` empty and specify the correct url in `user-url`. When
 | 
			
		||||
specifying the `sign-key` use a prefix of `b64:` if it is Base64
 | 
			
		||||
encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
 | 
			
		||||
are used, which is most probably not wanted for this setting.
 | 
			
		||||
 | 
			
		||||
Once the user is authenticated, docspell tries to setup an account and
 | 
			
		||||
does some checks. For this it must get to the username and collective
 | 
			
		||||
name somehow. How it does this, can be specified by the `user-key` and
 | 
			
		||||
`collective-key` settings:
 | 
			
		||||
 | 
			
		||||
``` conf
 | 
			
		||||
# The collective of the user is given in the access token as
 | 
			
		||||
# property `docspell_collective`.
 | 
			
		||||
collective-key = "lookup:docspell_collective",
 | 
			
		||||
# The username to use for the docspell account
 | 
			
		||||
user-key = "preferred_username"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `user-key` is some string that is used to search the JSON response
 | 
			
		||||
from the OP for an object with that key. The search happens
 | 
			
		||||
recursively, so the field can be in a nested object. The found value
 | 
			
		||||
is used as the user name. Keycloak transmits the `preferred_username`
 | 
			
		||||
when asking for the `profile` scope. This can be used as the user
 | 
			
		||||
name.
 | 
			
		||||
 | 
			
		||||
The collective name can be obtained by different ways. For example,
 | 
			
		||||
you can instruct your OP (like keycloak) to provide a collective name
 | 
			
		||||
in the token and/or user-info responses. If you do this, then use the
 | 
			
		||||
`lookup:` prefix as in the example above. This instructs docspell to
 | 
			
		||||
search for a value the same way as the `user-key`. You can also set a
 | 
			
		||||
fixed collective, using `fixed:` prefix; in this case all users are in
 | 
			
		||||
the same collective! A third option is to prefix it with `account:` -
 | 
			
		||||
then the value that is looked up is interpreted as the full account
 | 
			
		||||
name, like `collective/user` and the `user-key` setting is ignored. If
 | 
			
		||||
you want to put each user in its own collective, you can just use the
 | 
			
		||||
same value as in `user-key`, only prefixed with `lookup:`. In the
 | 
			
		||||
example it would be `lookup:preferred_username`.
 | 
			
		||||
 | 
			
		||||
If you find that these methods do not suffice for your case, please
 | 
			
		||||
open an issue.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## File Backends
 | 
			
		||||
 | 
			
		||||
Docspell allows to choose from different storage backends for binary
 | 
			
		||||
files. You can choose between:
 | 
			
		||||
 | 
			
		||||
1. *Database (the recommended default)*
 | 
			
		||||
 | 
			
		||||
   The database can be used to store the files as well. It is the
 | 
			
		||||
   default. It doesn't require any other configuration and works well
 | 
			
		||||
   with multiple instances of restservers and joex nodes.
 | 
			
		||||
2. *S3*
 | 
			
		||||
 | 
			
		||||
   The S3 backend allows to store files in an S3 compatible storage.
 | 
			
		||||
   It was tested with MinIO, which is possible to self host.
 | 
			
		||||
 | 
			
		||||
3. *Filesystem*
 | 
			
		||||
 | 
			
		||||
   The filesystem can also be used directly, by specifying a
 | 
			
		||||
   directory. Be aware that _all_ nodes must have read and write
 | 
			
		||||
   access into this directory! When running multiple nodes over a
 | 
			
		||||
   network, consider using one of the above instead. Docspell uses a
 | 
			
		||||
   fixed structure for storing the files below the given directory, it
 | 
			
		||||
   cannot be configured.
 | 
			
		||||
 | 
			
		||||
When using S3 or filesystem, remember to backup the database *and* the
 | 
			
		||||
files!
 | 
			
		||||
 | 
			
		||||
Note that Docspell not only stores the file that are uploaded, but
 | 
			
		||||
also some other files for internal use.
 | 
			
		||||
 | 
			
		||||
### Configuring
 | 
			
		||||
 | 
			
		||||
{% warningbubble(title="Note") %}
 | 
			
		||||
 | 
			
		||||
Each node must have the same config for its file backend! When using
 | 
			
		||||
the filesystem, make sure all processes can access the directory with
 | 
			
		||||
read and write permissions.
 | 
			
		||||
 | 
			
		||||
{% end %}
 | 
			
		||||
 | 
			
		||||
The file storage backend can be configured inside the `files` section
 | 
			
		||||
(see the default configs below):
 | 
			
		||||
 | 
			
		||||
```conf
 | 
			
		||||
files {
 | 
			
		||||
  …
 | 
			
		||||
  default-store = "database"
 | 
			
		||||
 | 
			
		||||
  stores = {
 | 
			
		||||
    database =
 | 
			
		||||
      { enabled = true
 | 
			
		||||
        type = "default-database"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    filesystem =
 | 
			
		||||
      { enabled = false
 | 
			
		||||
        type = "file-system"
 | 
			
		||||
        directory = "/some/directory"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    minio =
 | 
			
		||||
     { enabled = false
 | 
			
		||||
       type = "s3"
 | 
			
		||||
       endpoint = "http://localhost:9000"
 | 
			
		||||
       access-key = "username"
 | 
			
		||||
       secret-key = "password"
 | 
			
		||||
       bucket = "docspell"
 | 
			
		||||
     }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `stores` object defines a set of stores and the `default-store`
 | 
			
		||||
selects the one that should be used. All disabled store configurations
 | 
			
		||||
are removed from the list. Thus the `default-store` must be enabled.
 | 
			
		||||
Other enabled stores can be used as the target when copying files (see
 | 
			
		||||
below).
 | 
			
		||||
 | 
			
		||||
A store configuration requires a `enabled` and `type` property.
 | 
			
		||||
Depending on the `type` property, other properties are required, they
 | 
			
		||||
are presented above. The available storage types are
 | 
			
		||||
`default-database`, `file-system` and `s3`.
 | 
			
		||||
 | 
			
		||||
If you use the docker setup, you can find the corresponding
 | 
			
		||||
environment variables to the above config snippet
 | 
			
		||||
[below](#environment-variables).
 | 
			
		||||
 | 
			
		||||
### Change Backends
 | 
			
		||||
 | 
			
		||||
It is possible to change backends with a bit of manual effort. When
 | 
			
		||||
doing this, please make sure that the application is not used. It is
 | 
			
		||||
important that no file is uploaded during the following steps.
 | 
			
		||||
 | 
			
		||||
The [cli](@/docs/tools/cli.md) will be used, please set it up first
 | 
			
		||||
and you need to enable the [admin endpoint](#admin-endpoint). Config
 | 
			
		||||
changes mentioned here must be applied to all nodes - joex and
 | 
			
		||||
restserver!
 | 
			
		||||
 | 
			
		||||
1. In the config, enable a second file backend (besides the default)
 | 
			
		||||
   you want to change to and start docspell as normal. Don't change
 | 
			
		||||
   `default-store` yet.
 | 
			
		||||
2. Run the file integrity check in order to see whether all files are
 | 
			
		||||
   ok as they are in the current store. This can be done using the
 | 
			
		||||
   [cli](@/docs/tools/cli.md) by running:
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin file-integrity-check
 | 
			
		||||
   ```
 | 
			
		||||
3. Run the copy files admin command which will copy all files from the
 | 
			
		||||
   current `default-store` to all other enabled stores.
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin clone-file-repository
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
   And wait until it's done :-). You can see the progress in the jobs
 | 
			
		||||
   page when logged in as `docspell-system` or just look at the logs.
 | 
			
		||||
4. In the config, change the `default-store` to the one you just
 | 
			
		||||
   copied all the files to and restart docspell.
 | 
			
		||||
5. Login and do some smoke tests. Then run the file integrity check
 | 
			
		||||
   again:
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin file-integrity-check
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
If all is fine, then you are done and are now using the new file
 | 
			
		||||
backend. If the second integrity check fails, please open an issue.
 | 
			
		||||
You need then to revert the config change of step 4 to use the
 | 
			
		||||
previous `default-store` again.
 | 
			
		||||
 | 
			
		||||
If you want to delete the files from the database, you can do so by
 | 
			
		||||
running the following SQL against the database:
 | 
			
		||||
 | 
			
		||||
```sql
 | 
			
		||||
DELETE FROM filechunk
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
You can copy them back into the database using the steps above.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## File Processing
 | 
			
		||||
 | 
			
		||||
Files are being processed by the joex component. So all the respective
 | 
			
		||||
configuration is in this config only.
 | 
			
		||||
 | 
			
		||||
File processing involves several stages, detailed information can be
 | 
			
		||||
found [here](@/docs/joex/file-processing.md#text-analysis) and in the
 | 
			
		||||
corresponding sections in [joex default config](#joex).
 | 
			
		||||
 | 
			
		||||
Configuration allows to define the external tools and set some
 | 
			
		||||
limitations to control memory usage. The sections are:
 | 
			
		||||
 | 
			
		||||
- `docspell.joex.extraction`
 | 
			
		||||
- `docspell.joex.text-analysis`
 | 
			
		||||
- `docspell.joex.convert`
 | 
			
		||||
 | 
			
		||||
Options to external commands can use variables that are replaced by
 | 
			
		||||
values at runtime. Variables are enclosed in double braces `{{…}}`.
 | 
			
		||||
Please see the default configuration for what variables exist per
 | 
			
		||||
command.
 | 
			
		||||
 | 
			
		||||
### Classification
 | 
			
		||||
 | 
			
		||||
In `text-analysis.classification` you can define how many documents at
 | 
			
		||||
most should be used for learning. The default settings should work
 | 
			
		||||
well for most cases. However, it always depends on the amount of data
 | 
			
		||||
and the machine that runs joex. For example, by default the documents
 | 
			
		||||
to learn from are limited to 600 (`classification.item-count`) and
 | 
			
		||||
every text is cut after 5000 characters (`text-analysis.max-length`).
 | 
			
		||||
This is fine if *most* of your documents are small and only a few are
 | 
			
		||||
near 5000 characters). But if *all* your documents are very large, you
 | 
			
		||||
probably need to either assign more heap memory or go down with the
 | 
			
		||||
limits.
 | 
			
		||||
 | 
			
		||||
Classification can be disabled, too, for when it's not needed.
 | 
			
		||||
 | 
			
		||||
### NLP
 | 
			
		||||
 | 
			
		||||
This setting defines which NLP mode to use. It defaults to `full`,
 | 
			
		||||
which requires more memory for certain languages (with the advantage
 | 
			
		||||
of better results). Other values are `basic`, `regexonly` and
 | 
			
		||||
`disabled`. The modes `full` and `basic` use pre-defined lanugage
 | 
			
		||||
models for procesing documents of languaes German, English, French and
 | 
			
		||||
Spanish. These require some amount of memory (see below).
 | 
			
		||||
 | 
			
		||||
The mode `basic` is like the "light" variant to `full`. It doesn't use
 | 
			
		||||
all NLP features, which makes memory consumption much lower, but comes
 | 
			
		||||
with the compromise of less accurate results.
 | 
			
		||||
 | 
			
		||||
The mode `regexonly` doesn't use pre-defined lanuage models, even if
 | 
			
		||||
available. It checks your address book against a document to find
 | 
			
		||||
metadata. That means, it is language independent. Also, when using
 | 
			
		||||
`full` or `basic` with lanugages where no pre-defined models exist, it
 | 
			
		||||
will degrade to `regexonly` for these.
 | 
			
		||||
 | 
			
		||||
The mode `disabled` skips NLP processing completely. This has least
 | 
			
		||||
impact in memory consumption, obviously, but then only the classifier
 | 
			
		||||
is used to find metadata (unless it is disabled, too).
 | 
			
		||||
 | 
			
		||||
You might want to try different modes and see what combination suits
 | 
			
		||||
best your usage pattern and machine running joex. If a powerful
 | 
			
		||||
machine is used, simply leave the defaults. When running on an
 | 
			
		||||
raspberry pi, for example, you might need to adjust things.
 | 
			
		||||
 | 
			
		||||
### Memory Usage
 | 
			
		||||
 | 
			
		||||
The memory requirements for the joex component depends on the document
 | 
			
		||||
language and the enabled features for text-analysis. The `nlp.mode`
 | 
			
		||||
setting has significant impact, especially when your documents are in
 | 
			
		||||
German. Here are some rough numbers on jvm heap usage (the same file
 | 
			
		||||
was used for all tries):
 | 
			
		||||
 | 
			
		||||
<table class="striped-basic">
 | 
			
		||||
<thead>
 | 
			
		||||
  <tr>
 | 
			
		||||
     <th>nlp.mode</th>
 | 
			
		||||
     <th>English</th>
 | 
			
		||||
     <th>German</th>
 | 
			
		||||
     <th>French</th>
 | 
			
		||||
 </tr>
 | 
			
		||||
</thead>
 | 
			
		||||
<tfoot>
 | 
			
		||||
</tfoot>
 | 
			
		||||
<tbody>
 | 
			
		||||
  <tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
 | 
			
		||||
  <tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
 | 
			
		||||
</tbody>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
Note that these are only rough numbers and they show the maximum used
 | 
			
		||||
heap memory while processing a file.
 | 
			
		||||
 | 
			
		||||
When using `mode=full`, a heap setting of at least `-Xmx1400M` is
 | 
			
		||||
recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
 | 
			
		||||
recommended.
 | 
			
		||||
 | 
			
		||||
Other languages can't use these two modes, and so don't require this
 | 
			
		||||
amount of memory (but don't have as good results). Then you can go
 | 
			
		||||
with less heap. For these languages, the nlp mode is the same as
 | 
			
		||||
`regexonly`.
 | 
			
		||||
 | 
			
		||||
Training the classifier is also memory intensive, which solely depends
 | 
			
		||||
on the size and number of documents that are being trained. However,
 | 
			
		||||
training the classifier is done periodically and can happen maybe
 | 
			
		||||
every two weeks. When classifying new documents, memory requirements
 | 
			
		||||
are lower, since the model already exists.
 | 
			
		||||
 | 
			
		||||
More details about these modes can be found
 | 
			
		||||
[here](@/docs/joex/file-processing.md#text-analysis).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The restserver component is very lightweight, here you can use
 | 
			
		||||
defaults.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# JVM Options
 | 
			
		||||
 | 
			
		||||
The start scripts support some options to configure the JVM. One often
 | 
			
		||||
used setting is the maximum heap size of the JVM. By default, java
 | 
			
		||||
determines it based on properties of the current machine. You can
 | 
			
		||||
specify it by given java startup options to the command:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This would limit the maximum heap to 1GB. The double slash separates
 | 
			
		||||
internal options and the arguments to the program. Another frequently
 | 
			
		||||
used option is to change the default temp directory. Usually it is
 | 
			
		||||
`/tmp`, but it may be desired to have a dedicated temp directory,
 | 
			
		||||
which can be configured:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The command:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -h
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
gives an overview of supported options.
 | 
			
		||||
 | 
			
		||||
It is recommended to run joex with the G1GC enabled. If you use java8,
 | 
			
		||||
you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
 | 
			
		||||
this is not necessary (but doesn't hurt either). This could look like
 | 
			
		||||
this:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Using these options you can define how much memory the JVM process is
 | 
			
		||||
able to use. This might be necessary to adopt depending on the usage
 | 
			
		||||
scenario and configured text analysis features.
 | 
			
		||||
 | 
			
		||||
Please have a look at the corresponding [section](@/docs/configure/_index.md#memory-usage).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Logging
 | 
			
		||||
 | 
			
		||||
By default, docspell logs to stdout. This works well, when managed by
 | 
			
		||||
systemd or other inits. Logging can be configured in the configuration
 | 
			
		||||
file or via environment variables. There are only two settings:
 | 
			
		||||
 | 
			
		||||
- `minimum-level` specifies the log level to control the verbosity.
 | 
			
		||||
  Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
 | 
			
		||||
  *Error*
 | 
			
		||||
- `format` this defines how the logs are formatted. There are two
 | 
			
		||||
  formats for humans: *Plain* and *Fancy*. And two more suited for
 | 
			
		||||
  machine consumption: *Json* and *Logfmt*. The *Json* format contains
 | 
			
		||||
  all details, while the others may omit some for readability
 | 
			
		||||
 | 
			
		||||
These settings are the same for joex and the restserver component.
 | 
			
		||||
 | 
			
		||||
# Default Config
 | 
			
		||||
## Rest Server
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/server.conf") }}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Joex
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
 | 
			
		||||
 | 
			
		||||
## Environment Variables
 | 
			
		||||
 | 
			
		||||
Environment variables can be used when there is no config file
 | 
			
		||||
supplied. The listing below shows all possible variables and their
 | 
			
		||||
default values.
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
 | 
			
		||||
No content here.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										39
									
								
								website/site/content/docs/configure/admin-endpoint.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								website/site/content/docs/configure/admin-endpoint.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Admin Endpoint"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 60
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
# Admin Endpoint
 | 
			
		||||
 | 
			
		||||
The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
 | 
			
		||||
for adminstration tasks. This is disabled by default and can be
 | 
			
		||||
enabled by providing a secret:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
...
 | 
			
		||||
  admin-endpoint {
 | 
			
		||||
    secret = "123"
 | 
			
		||||
  }
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This secret must be provided to all requests to a `/api/v1/admin/`
 | 
			
		||||
endpoint.
 | 
			
		||||
 | 
			
		||||
The most convenient way to execute admin tasks is to use the
 | 
			
		||||
[cli](@/docs/tools/cli.md). You get a list of possible admin commands
 | 
			
		||||
via `dsc admin help`.
 | 
			
		||||
 | 
			
		||||
To see the output of the commands, there are these ways:
 | 
			
		||||
 | 
			
		||||
1. looking at the joex logs, which gives most details.
 | 
			
		||||
2. Use the job-queue page when logged in as `docspell-system`
 | 
			
		||||
3. setup a [webhook](@/docs/webapp/notification.md) to be notified
 | 
			
		||||
   when a job finishes. This way you get a small message.
 | 
			
		||||
 | 
			
		||||
All admin tasks (and also some other system tasks) are run under the
 | 
			
		||||
account `docspell-system` (collective and user). You need to create
 | 
			
		||||
this account and setup the notification hooks in there - not in your
 | 
			
		||||
normal account.
 | 
			
		||||
							
								
								
									
										124
									
								
								website/site/content/docs/configure/authentication.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								website/site/content/docs/configure/authentication.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,124 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Authentication"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 70
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
## Authentication
 | 
			
		||||
 | 
			
		||||
Authentication works in two ways:
 | 
			
		||||
 | 
			
		||||
- with an account-name / password pair
 | 
			
		||||
- with an authentication token
 | 
			
		||||
 | 
			
		||||
The initial authentication must occur with an accountname/password
 | 
			
		||||
pair. This will generate an authentication token which is valid for a
 | 
			
		||||
some time. Subsequent calls to secured routes can use this token. The
 | 
			
		||||
token can be given as a normal http header or via a cookie header.
 | 
			
		||||
 | 
			
		||||
These settings apply only to the REST server.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.auth {
 | 
			
		||||
  server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
 | 
			
		||||
  session-valid = "5 minutes"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `server-secret` is used to sign the token. If multiple REST
 | 
			
		||||
servers are deployed, all must share the same server secret. Otherwise
 | 
			
		||||
tokens from one instance are not valid on another instance. The secret
 | 
			
		||||
can be given as Base64 encoded string or in hex form. Use the prefix
 | 
			
		||||
`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
 | 
			
		||||
of the string are used.
 | 
			
		||||
 | 
			
		||||
The `session-valid` determines how long a token is valid. This can be
 | 
			
		||||
just some minutes, the web application obtains new ones
 | 
			
		||||
periodically. So a rather short time is recommended.
 | 
			
		||||
 | 
			
		||||
## OpenID Connect / OAuth2
 | 
			
		||||
 | 
			
		||||
You can integrate Docspell into your SSO solution via [OpenID
 | 
			
		||||
Connect](https://openid.net/connect/) (OIDC). This requires to set up
 | 
			
		||||
an OpenID Provider (OP) somewhere and to configure Docspell
 | 
			
		||||
accordingly to act as the relying party.
 | 
			
		||||
 | 
			
		||||
You can define multiple OPs to use. For some examples, please see the
 | 
			
		||||
[default configuration](@/docs/configure/main.md#default-config).
 | 
			
		||||
 | 
			
		||||
The configuration of a provider highly depends on how it is setup.
 | 
			
		||||
Here is an example for a setup using
 | 
			
		||||
[keycloak](https://www.keycloak.org):
 | 
			
		||||
 | 
			
		||||
``` conf
 | 
			
		||||
provider = {
 | 
			
		||||
  provider-id = "keycloak",
 | 
			
		||||
  client-id = "docspell",
 | 
			
		||||
  client-secret = "example-secret-439e-bf06-911e4cdd56a6",
 | 
			
		||||
  scope = "profile", # scope is required for OIDC
 | 
			
		||||
  authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
 | 
			
		||||
  token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
 | 
			
		||||
  #User URL is not used when signature key is set.
 | 
			
		||||
  #user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
 | 
			
		||||
  sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
 | 
			
		||||
  sig-algo = "RS512"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `provider-id` is some identifier that is used in the URL to
 | 
			
		||||
distinguish between possibly multiple providers. The `client-id` and
 | 
			
		||||
`client-secret` define the two parameters required for a "confidential
 | 
			
		||||
client". The different URLs are best explained at the [keycloak
 | 
			
		||||
docs](https://www.keycloak.org/docs/latest/server_admin/).
 | 
			
		||||
They are available for all OPs in some way. The `user-url` is not
 | 
			
		||||
required, if the access token is already containing the necessary
 | 
			
		||||
data. If not, then docspell performs another request to the
 | 
			
		||||
`user-url`, which must be the user-info endpoint, to obtain the
 | 
			
		||||
required user data.
 | 
			
		||||
 | 
			
		||||
If the data is taken from the token directly and not via a request to
 | 
			
		||||
the user-info endpoint, then the token must be validated using the
 | 
			
		||||
given `sign-key` and `sig-algo`. These two values are then required to
 | 
			
		||||
specify! However, if the user-info endpoint should be used, then leave
 | 
			
		||||
the `sign-key` empty and specify the correct url in `user-url`. When
 | 
			
		||||
specifying the `sign-key` use a prefix of `b64:` if it is Base64
 | 
			
		||||
encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
 | 
			
		||||
are used, which is most probably not wanted for this setting.
 | 
			
		||||
 | 
			
		||||
Once the user is authenticated, docspell tries to setup an account and
 | 
			
		||||
does some checks. For this it must get to the username and collective
 | 
			
		||||
name somehow. How it does this, can be specified by the `user-key` and
 | 
			
		||||
`collective-key` settings:
 | 
			
		||||
 | 
			
		||||
``` conf
 | 
			
		||||
# The collective of the user is given in the access token as
 | 
			
		||||
# property `docspell_collective`.
 | 
			
		||||
collective-key = "lookup:docspell_collective",
 | 
			
		||||
# The username to use for the docspell account
 | 
			
		||||
user-key = "preferred_username"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `user-key` is some string that is used to search the JSON response
 | 
			
		||||
from the OP for an object with that key. The search happens
 | 
			
		||||
recursively, so the field can be in a nested object. The found value
 | 
			
		||||
is used as the user name. Keycloak transmits the `preferred_username`
 | 
			
		||||
when asking for the `profile` scope. This can be used as the user
 | 
			
		||||
name.
 | 
			
		||||
 | 
			
		||||
The collective name can be obtained by different ways. For example,
 | 
			
		||||
you can instruct your OP (like keycloak) to provide a collective name
 | 
			
		||||
in the token and/or user-info responses. If you do this, then use the
 | 
			
		||||
`lookup:` prefix as in the example above. This instructs docspell to
 | 
			
		||||
search for a value the same way as the `user-key`. You can also set a
 | 
			
		||||
fixed collective, using `fixed:` prefix; in this case all users are in
 | 
			
		||||
the same collective! A third option is to prefix it with `account:` -
 | 
			
		||||
then the value that is looked up is interpreted as the full account
 | 
			
		||||
name, like `collective/user` and the `user-key` setting is ignored. If
 | 
			
		||||
you want to put each user in its own collective, you can just use the
 | 
			
		||||
same value as in `user-key`, only prefixed with `lookup:`. In the
 | 
			
		||||
example it would be `lookup:preferred_username`.
 | 
			
		||||
 | 
			
		||||
If you find that these methods do not suffice for your case, please
 | 
			
		||||
open an issue.
 | 
			
		||||
							
								
								
									
										38
									
								
								website/site/content/docs/configure/baseurl.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								website/site/content/docs/configure/baseurl.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Base URL"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 90
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
## Baseurl
 | 
			
		||||
 | 
			
		||||
The base url is an important setting that defines the http URL where
 | 
			
		||||
the corresponding component can be reached. It applies to both
 | 
			
		||||
components. For a joex component, the url must be resolvable from a
 | 
			
		||||
REST server component. The REST server also uses this url to create
 | 
			
		||||
absolute urls and to configure the authenication cookie.
 | 
			
		||||
 | 
			
		||||
By default it is build using the information from the `bind` setting,
 | 
			
		||||
which is `http://localhost:7880`.
 | 
			
		||||
 | 
			
		||||
If the default is not changed, docspell will use the request to
 | 
			
		||||
determine the base-url. It first inspects the `X-Forwarded-For` header
 | 
			
		||||
that is often used with reverse proxies. If that is not present, the
 | 
			
		||||
`Host` header of the request is used. However, if the `base-url`
 | 
			
		||||
setting is changed, then only this setting is used.
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
docspell.server.base-url = ...
 | 
			
		||||
docspell.joex.base-url = ...
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If you are unsure, leave it at its default.
 | 
			
		||||
 | 
			
		||||
### Examples
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
docspell.server.baseurl = "https://docspell.example.com"
 | 
			
		||||
docspell.joex.baseurl = "http://192.168.101.10"
 | 
			
		||||
```
 | 
			
		||||
							
								
								
									
										27
									
								
								website/site/content/docs/configure/bind.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								website/site/content/docs/configure/bind.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Bind"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 12
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
## Bind
 | 
			
		||||
 | 
			
		||||
The host and port the http server binds to. This applies to both
 | 
			
		||||
components. The joex component also exposes a small REST api to
 | 
			
		||||
inspect its state and notify the scheduler.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.bind {
 | 
			
		||||
  address = localhost
 | 
			
		||||
  port = 7880
 | 
			
		||||
}
 | 
			
		||||
docspell.joex.bind {
 | 
			
		||||
  address = localhost
 | 
			
		||||
  port = 7878
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
By default, it binds to `localhost` and some predefined port. This
 | 
			
		||||
must be changed, if components are on different machines.
 | 
			
		||||
							
								
								
									
										71
									
								
								website/site/content/docs/configure/database.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								website/site/content/docs/configure/database.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Database"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Details about configuring the database."
 | 
			
		||||
weight = 20
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Database
 | 
			
		||||
 | 
			
		||||
The database holds by default all the data and must be configured
 | 
			
		||||
exactly the same on all nodes.
 | 
			
		||||
 | 
			
		||||
The following are supported DBs:
 | 
			
		||||
 | 
			
		||||
- PostgreSQL (recommended)
 | 
			
		||||
- MariaDB
 | 
			
		||||
- H2
 | 
			
		||||
 | 
			
		||||
This has to be specified for the rest server and joex. By default, a
 | 
			
		||||
H2 database in the current `/tmp` directory is configured.
 | 
			
		||||
 | 
			
		||||
## Options
 | 
			
		||||
 | 
			
		||||
The config looks like this (both components):
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.joex.jdbc {
 | 
			
		||||
  url = ...
 | 
			
		||||
  user = ...
 | 
			
		||||
  password = ...
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
docspell.server.backend.jdbc {
 | 
			
		||||
  url = ...
 | 
			
		||||
  user = ...
 | 
			
		||||
  password = ...
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `url` is the connection to the database. It must start with
 | 
			
		||||
`jdbc`, followed by name of the database. The rest is specific to the
 | 
			
		||||
database used: it is either a path to a file for H2 or a host/database
 | 
			
		||||
url for MariaDB and PostgreSQL.
 | 
			
		||||
 | 
			
		||||
When using H2, the user and password can be chosen freely on first
 | 
			
		||||
start, but must stay the same on subsequent starts. Usually, the user
 | 
			
		||||
is `sa` and the password is left empty. Additionally, the url must
 | 
			
		||||
include these options:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Examples
 | 
			
		||||
 | 
			
		||||
PostgreSQL:
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:postgresql://localhost:5432/docspelldb"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
MariaDB:
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:mariadb://localhost:3306/docspelldb"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
H2
 | 
			
		||||
```
 | 
			
		||||
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
 | 
			
		||||
```
 | 
			
		||||
							
								
								
									
										147
									
								
								website/site/content/docs/configure/file-backends.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								website/site/content/docs/configure/file-backends.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,147 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "File Backends"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 30
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
## File Backends
 | 
			
		||||
 | 
			
		||||
Docspell allows to choose from different storage backends for binary
 | 
			
		||||
files. You can choose between:
 | 
			
		||||
 | 
			
		||||
1. *Database (the recommended default)*
 | 
			
		||||
 | 
			
		||||
   The database can be used to store the files as well. It is the
 | 
			
		||||
   default. It doesn't require any other configuration and works well
 | 
			
		||||
   with multiple instances of restservers and joex nodes.
 | 
			
		||||
2. *S3*
 | 
			
		||||
 | 
			
		||||
   The S3 backend allows to store files in an S3 compatible storage.
 | 
			
		||||
   It was tested with MinIO, which is possible to self host.
 | 
			
		||||
 | 
			
		||||
3. *Filesystem*
 | 
			
		||||
 | 
			
		||||
   The filesystem can also be used directly, by specifying a
 | 
			
		||||
   directory. Be aware that _all_ nodes must have read and write
 | 
			
		||||
   access into this directory! When running multiple nodes over a
 | 
			
		||||
   network, consider using one of the above instead. Docspell uses a
 | 
			
		||||
   fixed structure for storing the files below the given directory, it
 | 
			
		||||
   cannot be configured.
 | 
			
		||||
 | 
			
		||||
When using S3 or filesystem, remember to backup the database *and* the
 | 
			
		||||
files!
 | 
			
		||||
 | 
			
		||||
Note that Docspell not only stores the file that are uploaded, but
 | 
			
		||||
also some other files for internal use.
 | 
			
		||||
 | 
			
		||||
### Configuring
 | 
			
		||||
 | 
			
		||||
{% warningbubble(title="Note") %}
 | 
			
		||||
 | 
			
		||||
Each node must have the same config for its file backend! When using
 | 
			
		||||
the filesystem, make sure all processes can access the directory with
 | 
			
		||||
read and write permissions.
 | 
			
		||||
 | 
			
		||||
{% end %}
 | 
			
		||||
 | 
			
		||||
The file storage backend can be configured inside the `files` section
 | 
			
		||||
(see the [default configs](@/docs/configure/main.md#default-config)):
 | 
			
		||||
 | 
			
		||||
```conf
 | 
			
		||||
files {
 | 
			
		||||
  …
 | 
			
		||||
  default-store = "database"
 | 
			
		||||
 | 
			
		||||
  stores = {
 | 
			
		||||
    database =
 | 
			
		||||
      { enabled = true
 | 
			
		||||
        type = "default-database"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    filesystem =
 | 
			
		||||
      { enabled = false
 | 
			
		||||
        type = "file-system"
 | 
			
		||||
        directory = "/some/directory"
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    minio =
 | 
			
		||||
     { enabled = false
 | 
			
		||||
       type = "s3"
 | 
			
		||||
       endpoint = "http://localhost:9000"
 | 
			
		||||
       access-key = "username"
 | 
			
		||||
       secret-key = "password"
 | 
			
		||||
       bucket = "docspell"
 | 
			
		||||
     }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The `stores` object defines a set of stores and the `default-store`
 | 
			
		||||
selects the one that should be used. All disabled store configurations
 | 
			
		||||
are removed from the list. Thus the `default-store` must be enabled.
 | 
			
		||||
Other enabled stores can be used as the target when copying files (see
 | 
			
		||||
below).
 | 
			
		||||
 | 
			
		||||
A store configuration requires a `enabled` and `type` property.
 | 
			
		||||
Depending on the `type` property, other properties are required, they
 | 
			
		||||
are presented above. The available storage types are
 | 
			
		||||
`default-database`, `file-system` and `s3`.
 | 
			
		||||
 | 
			
		||||
If you use the docker setup, you can find the corresponding
 | 
			
		||||
environment variables to the above config snippet
 | 
			
		||||
[below](#environment-variables).
 | 
			
		||||
 | 
			
		||||
### Change Backends
 | 
			
		||||
 | 
			
		||||
It is possible to change backends with a bit of manual effort. When
 | 
			
		||||
doing this, please make sure that the application is not used. It is
 | 
			
		||||
important that no file is uploaded during the following steps.
 | 
			
		||||
 | 
			
		||||
The [cli](@/docs/tools/cli.md) will be used, please set it up first
 | 
			
		||||
and you need to enable the [admin endpoint](#admin-endpoint). Config
 | 
			
		||||
changes mentioned here must be applied to all nodes - joex and
 | 
			
		||||
restserver!
 | 
			
		||||
 | 
			
		||||
1. In the config, enable a second file backend (besides the default)
 | 
			
		||||
   you want to change to and start docspell as normal. Don't change
 | 
			
		||||
   `default-store` yet.
 | 
			
		||||
2. Run the file integrity check in order to see whether all files are
 | 
			
		||||
   ok as they are in the current store. This can be done using the
 | 
			
		||||
   [cli](@/docs/tools/cli.md) by running:
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin file-integrity-check
 | 
			
		||||
   ```
 | 
			
		||||
3. Run the copy files admin command which will copy all files from the
 | 
			
		||||
   current `default-store` to all other enabled stores.
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin clone-file-repository
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
   And wait until it's done :-). You can see the progress in the jobs
 | 
			
		||||
   page when logged in as `docspell-system` or just look at the logs.
 | 
			
		||||
4. In the config, change the `default-store` to the one you just
 | 
			
		||||
   copied all the files to and restart docspell.
 | 
			
		||||
5. Login and do some smoke tests. Then run the file integrity check
 | 
			
		||||
   again:
 | 
			
		||||
 | 
			
		||||
   ```bash
 | 
			
		||||
   dsc admin file-integrity-check
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
If all is fine, then you are done and are now using the new file
 | 
			
		||||
backend. If the second integrity check fails, please open an issue.
 | 
			
		||||
You need then to revert the config change of step 4 to use the
 | 
			
		||||
previous `default-store` again.
 | 
			
		||||
 | 
			
		||||
If you want to delete the files from the database, you can do so by
 | 
			
		||||
running the following SQL against the database:
 | 
			
		||||
 | 
			
		||||
```sql
 | 
			
		||||
DELETE FROM filechunk
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
You can copy them back into the database using the steps above.
 | 
			
		||||
							
								
								
									
										122
									
								
								website/site/content/docs/configure/file-processing.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								website/site/content/docs/configure/file-processing.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,122 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "File Processing"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 40
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
## File Processing
 | 
			
		||||
 | 
			
		||||
Files are being processed by the joex component. So all the respective
 | 
			
		||||
configuration is in this config only.
 | 
			
		||||
 | 
			
		||||
File processing involves several stages, detailed information can be
 | 
			
		||||
found [here](@/docs/joex/file-processing.md#text-analysis) and in the
 | 
			
		||||
corresponding sections in [joex default
 | 
			
		||||
config](@/docs/configure/main.md#joex).
 | 
			
		||||
 | 
			
		||||
Configuration allows to define the external tools and set some
 | 
			
		||||
limitations to control memory usage. The sections are:
 | 
			
		||||
 | 
			
		||||
- `docspell.joex.extraction`
 | 
			
		||||
- `docspell.joex.text-analysis`
 | 
			
		||||
- `docspell.joex.convert`
 | 
			
		||||
 | 
			
		||||
Options to external commands can use variables that are replaced by
 | 
			
		||||
values at runtime. Variables are enclosed in double braces `{{…}}`.
 | 
			
		||||
Please see the default configuration for what variables exist per
 | 
			
		||||
command.
 | 
			
		||||
 | 
			
		||||
### Classification
 | 
			
		||||
 | 
			
		||||
In `text-analysis.classification` you can define how many documents at
 | 
			
		||||
most should be used for learning. The default settings should work
 | 
			
		||||
well for most cases. However, it always depends on the amount of data
 | 
			
		||||
and the machine that runs joex. For example, by default the documents
 | 
			
		||||
to learn from are limited to 600 (`classification.item-count`) and
 | 
			
		||||
every text is cut after 5000 characters (`text-analysis.max-length`).
 | 
			
		||||
This is fine if *most* of your documents are small and only a few are
 | 
			
		||||
near 5000 characters). But if *all* your documents are very large, you
 | 
			
		||||
probably need to either assign more heap memory or go down with the
 | 
			
		||||
limits.
 | 
			
		||||
 | 
			
		||||
Classification can be disabled, too, for when it's not needed.
 | 
			
		||||
 | 
			
		||||
### NLP
 | 
			
		||||
 | 
			
		||||
This setting defines which NLP mode to use. It defaults to `full`,
 | 
			
		||||
which requires more memory for certain languages (with the advantage
 | 
			
		||||
of better results). Other values are `basic`, `regexonly` and
 | 
			
		||||
`disabled`. The modes `full` and `basic` use pre-defined lanugage
 | 
			
		||||
models for procesing documents of languaes German, English, French and
 | 
			
		||||
Spanish. These require some amount of memory (see below).
 | 
			
		||||
 | 
			
		||||
The mode `basic` is like the "light" variant to `full`. It doesn't use
 | 
			
		||||
all NLP features, which makes memory consumption much lower, but comes
 | 
			
		||||
with the compromise of less accurate results.
 | 
			
		||||
 | 
			
		||||
The mode `regexonly` doesn't use pre-defined lanuage models, even if
 | 
			
		||||
available. It checks your address book against a document to find
 | 
			
		||||
metadata. That means, it is language independent. Also, when using
 | 
			
		||||
`full` or `basic` with lanugages where no pre-defined models exist, it
 | 
			
		||||
will degrade to `regexonly` for these.
 | 
			
		||||
 | 
			
		||||
The mode `disabled` skips NLP processing completely. This has least
 | 
			
		||||
impact in memory consumption, obviously, but then only the classifier
 | 
			
		||||
is used to find metadata (unless it is disabled, too).
 | 
			
		||||
 | 
			
		||||
You might want to try different modes and see what combination suits
 | 
			
		||||
best your usage pattern and machine running joex. If a powerful
 | 
			
		||||
machine is used, simply leave the defaults. When running on an
 | 
			
		||||
raspberry pi, for example, you might need to adjust things.
 | 
			
		||||
 | 
			
		||||
### Memory Usage
 | 
			
		||||
 | 
			
		||||
The memory requirements for the joex component depends on the document
 | 
			
		||||
language and the enabled features for text-analysis. The `nlp.mode`
 | 
			
		||||
setting has significant impact, especially when your documents are in
 | 
			
		||||
German. Here are some rough numbers on jvm heap usage (the same file
 | 
			
		||||
was used for all tries):
 | 
			
		||||
 | 
			
		||||
<table class="striped-basic">
 | 
			
		||||
<thead>
 | 
			
		||||
  <tr>
 | 
			
		||||
     <th>nlp.mode</th>
 | 
			
		||||
     <th>English</th>
 | 
			
		||||
     <th>German</th>
 | 
			
		||||
     <th>French</th>
 | 
			
		||||
 </tr>
 | 
			
		||||
</thead>
 | 
			
		||||
<tfoot>
 | 
			
		||||
</tfoot>
 | 
			
		||||
<tbody>
 | 
			
		||||
  <tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
 | 
			
		||||
  <tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
 | 
			
		||||
</tbody>
 | 
			
		||||
</table>
 | 
			
		||||
 | 
			
		||||
Note that these are only rough numbers and they show the maximum used
 | 
			
		||||
heap memory while processing a file.
 | 
			
		||||
 | 
			
		||||
When using `mode=full`, a heap setting of at least `-Xmx1400M` is
 | 
			
		||||
recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
 | 
			
		||||
recommended.
 | 
			
		||||
 | 
			
		||||
Other languages can't use these two modes, and so don't require this
 | 
			
		||||
amount of memory (but don't have as good results). Then you can go
 | 
			
		||||
with less heap. For these languages, the nlp mode is the same as
 | 
			
		||||
`regexonly`.
 | 
			
		||||
 | 
			
		||||
Training the classifier is also memory intensive, which solely depends
 | 
			
		||||
on the size and number of documents that are being trained. However,
 | 
			
		||||
training the classifier is done periodically and can happen maybe
 | 
			
		||||
every two weeks. When classifying new documents, memory requirements
 | 
			
		||||
are lower, since the model already exists.
 | 
			
		||||
 | 
			
		||||
More details about these modes can be found
 | 
			
		||||
[here](@/docs/joex/file-processing.md#text-analysis).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
The restserver component is very lightweight, here you can use
 | 
			
		||||
defaults.
 | 
			
		||||
							
								
								
									
										176
									
								
								website/site/content/docs/configure/fulltext-search.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										176
									
								
								website/site/content/docs/configure/fulltext-search.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,176 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Full-Text Search"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Details about configuring the fulltext search."
 | 
			
		||||
weight = 50
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Full-Text Search
 | 
			
		||||
 | 
			
		||||
Fulltext search is optional and provided by external systems. There
 | 
			
		||||
are currently [Apache SOLR](https://solr.apache.org) and [PostgreSQL's
 | 
			
		||||
text search](https://www.postgresql.org/docs/14/textsearch.html)
 | 
			
		||||
available.
 | 
			
		||||
 | 
			
		||||
You can enable and configure the fulltext search backends as described
 | 
			
		||||
below and then choose the backend:
 | 
			
		||||
 | 
			
		||||
```conf
 | 
			
		||||
full-text-search {
 | 
			
		||||
  enabled = true
 | 
			
		||||
  # Which backend to use, either solr or postgresql
 | 
			
		||||
  backend = "solr"
 | 
			
		||||
  …
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
All docspell components must provide the same fulltext search
 | 
			
		||||
configuration.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## SOLR
 | 
			
		||||
 | 
			
		||||
[Apache SOLR](https://solr.apache.org) can be used to provide the
 | 
			
		||||
full-text search. This is defined in the `full-text-search.solr`
 | 
			
		||||
subsection:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
...
 | 
			
		||||
  full-text-search {
 | 
			
		||||
    ...
 | 
			
		||||
    solr = {
 | 
			
		||||
      url = "http://localhost:8983/solr/docspell"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The default configuration at the end of this page contains more
 | 
			
		||||
information about each setting.
 | 
			
		||||
 | 
			
		||||
The `solr.url` is the mandatory setting that you need to change to
 | 
			
		||||
point to your SOLR instance. Then you need to set the `enabled` flag
 | 
			
		||||
to `true`.
 | 
			
		||||
 | 
			
		||||
When installing docspell manually, just install solr and create a core
 | 
			
		||||
as described in the [solr
 | 
			
		||||
documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
 | 
			
		||||
That will provide you with the connection url (the last part is the
 | 
			
		||||
core name). If Docspell detects an empty core it will run a schema
 | 
			
		||||
setup on start automatically.
 | 
			
		||||
 | 
			
		||||
The `full-text-search.solr` options are the same for joex and the
 | 
			
		||||
restserver.
 | 
			
		||||
 | 
			
		||||
Sometimes it is necessary to re-create the entire index, for example
 | 
			
		||||
if you upgrade SOLR or delete the core to provide a new one (see
 | 
			
		||||
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
 | 
			
		||||
details). Another way is to restart docspell (while clearing the
 | 
			
		||||
index). If docspell detects an empty index at startup, it will submit
 | 
			
		||||
a task to build the index automatically.
 | 
			
		||||
 | 
			
		||||
Note that a collective can also re-index their data using a similiar
 | 
			
		||||
endpoint; but this is only deleting their data and doesn't do a full
 | 
			
		||||
re-index.
 | 
			
		||||
 | 
			
		||||
The solr index doesn't contain any new information, it can be
 | 
			
		||||
regenerated any time using the above REST call. Thus it doesn't need
 | 
			
		||||
to be backed up.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## PostgreSQL
 | 
			
		||||
 | 
			
		||||
PostgreSQL provides many additional features, one of them is [text
 | 
			
		||||
search](https://www.postgresql.org/docs/14/textsearch.html). Docspell
 | 
			
		||||
can utilize this to provide the fulltext search feature. This is
 | 
			
		||||
especially useful, if PostgreSQL is used as the primary database for
 | 
			
		||||
docspell.
 | 
			
		||||
 | 
			
		||||
You can choose to use the same database or separate connection. The
 | 
			
		||||
fulltext search will create a single table `ftspsql_search` that holds
 | 
			
		||||
all necessary data. When doing backups, you can exclude this table as
 | 
			
		||||
it can be recreated from the primary data any time.
 | 
			
		||||
 | 
			
		||||
The configuration is placed inside `full-text-search`:
 | 
			
		||||
 | 
			
		||||
```conf
 | 
			
		||||
full-text-search {
 | 
			
		||||
  …
 | 
			
		||||
  postgresql = {
 | 
			
		||||
    use-default-connection = false
 | 
			
		||||
 | 
			
		||||
    jdbc {
 | 
			
		||||
      url = "jdbc:postgresql://server:5432/db"
 | 
			
		||||
      user = "pguser"
 | 
			
		||||
      password = ""
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pg-config = {
 | 
			
		||||
    }
 | 
			
		||||
    pg-query-parser = "websearch_to_tsquery"
 | 
			
		||||
    pg-rank-normalization = [ 4 ]
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The flag `use-default-connection` can be set to `true` if you use
 | 
			
		||||
PostgreSQL as the primary db to have it also used for the fulltext
 | 
			
		||||
search. If set to `false`, the subsequent `jdbc` block defines the
 | 
			
		||||
connection to the postgres database to use.
 | 
			
		||||
 | 
			
		||||
It follows some settings to tune PostgreSQL's text search feature.
 | 
			
		||||
Please visit [their
 | 
			
		||||
documentation](https://www.postgresql.org/docs/14/textsearch.html) for
 | 
			
		||||
all the details.
 | 
			
		||||
 | 
			
		||||
- `pg-config`: this is an optional mapping from document languages as
 | 
			
		||||
  used in Docspell to a PostgreSQL text search configuration. Not all
 | 
			
		||||
  languages are equally well supported out of the box. You can create
 | 
			
		||||
  your own text search config in PostgreSQL and then define it in this
 | 
			
		||||
  map for your language. For example:
 | 
			
		||||
 | 
			
		||||
  ```conf
 | 
			
		||||
  pg-config = {
 | 
			
		||||
    english = "my-english"
 | 
			
		||||
    german = "my-german"
 | 
			
		||||
  }
 | 
			
		||||
  ```
 | 
			
		||||
 | 
			
		||||
  By default, the predefined configs are used for some lanugages and
 | 
			
		||||
  otherwise fallback to `simple`.
 | 
			
		||||
 | 
			
		||||
  *If you change this setting, you must re-index everything.*
 | 
			
		||||
- `pg-query-parser`: the parser applied to the fulltext query. By
 | 
			
		||||
  default it is `websearch_to_tsquery`. (relevant [doc
 | 
			
		||||
  link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES))
 | 
			
		||||
- `pg-rank-normalization`: this is used to tweak rank calculation that
 | 
			
		||||
  affects the order of the elements returned from a query. It is an
 | 
			
		||||
  array of numbers out of `1`, `2`, `4`, `8`, `16` or `32`. (relevant
 | 
			
		||||
  [doc
 | 
			
		||||
  link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Re-create the index
 | 
			
		||||
 | 
			
		||||
There is an [admin route](@/docs/api/intro.md#admin) that allows to
 | 
			
		||||
re-create the entire index (for all collectives). This is possible via
 | 
			
		||||
a call:
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
or use the [cli](@/docs/tools/cli.md):
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
dsc admin -a test123 recreate-index
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Here the `test123` is the key defined with `admin-endpoint.secret`. If
 | 
			
		||||
it is empty (the default), this call is disabled (all admin routes).
 | 
			
		||||
Otherwise, the POST request will submit a system task that is executed
 | 
			
		||||
by a joex instance eventually.
 | 
			
		||||
 | 
			
		||||
Using this endpoint, the entire index (including the schema) will be
 | 
			
		||||
re-created.
 | 
			
		||||
							
								
								
									
										192
									
								
								website/site/content/docs/configure/main.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								website/site/content/docs/configure/main.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,192 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Main"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 10
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
# Configuration
 | 
			
		||||
 | 
			
		||||
Docspell's executables (restserver and joex) can take one argument – a
 | 
			
		||||
configuration file. If that is not given, the defaults are used,
 | 
			
		||||
overriden by environment variables. A config file overrides default
 | 
			
		||||
values, so only values that differ from the defaults are necessary.
 | 
			
		||||
The complete default options and their documentation is at the end of
 | 
			
		||||
this page.
 | 
			
		||||
 | 
			
		||||
Besides the config file, another way is to provide individual settings
 | 
			
		||||
via key-value pairs to the executable by the `-D` option. For example
 | 
			
		||||
to override only `base-url` you could add the argument
 | 
			
		||||
`-Ddocspell.server.base-url=…` to the command. Multiple options are
 | 
			
		||||
possible. For more than few values this is very tedious, obviously, so
 | 
			
		||||
the recommended way is to maintain a config file. If these options
 | 
			
		||||
*and* a file is provded, then any setting given via the `-D…` option
 | 
			
		||||
overrides the same setting from the config file.
 | 
			
		||||
 | 
			
		||||
At last, it is possible to configure docspell via environment
 | 
			
		||||
variables if there is no config file supplied (if a config file *is*
 | 
			
		||||
supplied, it is always preferred). Note that this approach is limited,
 | 
			
		||||
as arrays are not supported. A list of environment variables can be
 | 
			
		||||
found at the [end of this page](#environment-variables). The
 | 
			
		||||
environment variable name follows the corresponding config key - where
 | 
			
		||||
dots are replaced by underscores and dashes are replaced by two
 | 
			
		||||
underscores. For example, the config key `docspell.server.app-name`
 | 
			
		||||
can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
 | 
			
		||||
 | 
			
		||||
It is also possible to specify environment variables inside a config
 | 
			
		||||
file (to get a mix of both) - please see the [documentation of the
 | 
			
		||||
config library](https://github.com/lightbend/config#standard-behavior)
 | 
			
		||||
for more on this.
 | 
			
		||||
 | 
			
		||||
# File Format
 | 
			
		||||
 | 
			
		||||
The format of the configuration files can be
 | 
			
		||||
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
 | 
			
		||||
JSON or what this [config
 | 
			
		||||
library](https://github.com/lightbend/config) understands. The default
 | 
			
		||||
values below are in HOCON format, which is recommended, since it
 | 
			
		||||
allows comments and has some [advanced
 | 
			
		||||
features](https://github.com/lightbend/config#features-of-hocon).
 | 
			
		||||
Please also see their documentation for more details.
 | 
			
		||||
 | 
			
		||||
A short description (please check the links for better understanding):
 | 
			
		||||
The config consists of key-value pairs and can be written in a
 | 
			
		||||
JSON-like format (called HOCON). Keys are organized in trees, and a
 | 
			
		||||
key defines a full path into the tree. There are two ways:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
a.b.c.d=15
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
or
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
a {
 | 
			
		||||
  b {
 | 
			
		||||
    c {
 | 
			
		||||
      d = 15
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Both are exactly the same and these forms are both used at the same
 | 
			
		||||
time. Usually the braces approach is used to group some more settings,
 | 
			
		||||
for better readability.
 | 
			
		||||
 | 
			
		||||
Strings that contain "not-so-common" characters should be enclosed in
 | 
			
		||||
quotes. It is possible to define values at the top of the file and
 | 
			
		||||
reuse them on different locations via the `${full.path.to.key}`
 | 
			
		||||
syntax. When using these variables, they *must not* be enclosed in
 | 
			
		||||
quotes.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Config Options
 | 
			
		||||
 | 
			
		||||
The configuration of both components uses separate namespaces. The
 | 
			
		||||
configuration for the REST server is below `docspell.server`, while
 | 
			
		||||
the one for joex is below `docspell.joex`.
 | 
			
		||||
 | 
			
		||||
You can therefore use two separate config files or one single file
 | 
			
		||||
containing both namespaces.
 | 
			
		||||
 | 
			
		||||
## App-id
 | 
			
		||||
 | 
			
		||||
The `app-id` is the identifier of the corresponding instance. It *must
 | 
			
		||||
be unique* for all instances. By default the REST server uses `rest1`
 | 
			
		||||
and joex `joex1`. It is recommended to overwrite this setting to have
 | 
			
		||||
an explicit and stable identifier should multiple instances are
 | 
			
		||||
intended.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.app-id = "rest1"
 | 
			
		||||
docspell.joex.app-id = "joex1"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Other options
 | 
			
		||||
 | 
			
		||||
Please see the menu on the left for details about specific
 | 
			
		||||
configuration options.
 | 
			
		||||
 | 
			
		||||
# JVM Options
 | 
			
		||||
 | 
			
		||||
The start scripts support some options to configure the JVM. One often
 | 
			
		||||
used setting is the maximum heap size of the JVM. By default, java
 | 
			
		||||
determines it based on properties of the current machine. You can
 | 
			
		||||
specify it by given java startup options to the command:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This would limit the maximum heap to 1GB. The double slash separates
 | 
			
		||||
internal options and the arguments to the program. Another frequently
 | 
			
		||||
used option is to change the default temp directory. Usually it is
 | 
			
		||||
`/tmp`, but it may be desired to have a dedicated temp directory,
 | 
			
		||||
which can be configured:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The command:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver -h
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
gives an overview of supported options.
 | 
			
		||||
 | 
			
		||||
It is recommended to run joex with the G1GC enabled. If you use java8,
 | 
			
		||||
you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
 | 
			
		||||
this is not necessary (but doesn't hurt either). This could look like
 | 
			
		||||
this:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Using these options you can define how much memory the JVM process is
 | 
			
		||||
able to use. This might be necessary to adopt depending on the usage
 | 
			
		||||
scenario and configured text analysis features.
 | 
			
		||||
 | 
			
		||||
Please have a look at the corresponding
 | 
			
		||||
[section](@/docs/configure/file-processing.md#memory-usage).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Logging
 | 
			
		||||
 | 
			
		||||
By default, docspell logs to stdout. This works well, when managed by
 | 
			
		||||
systemd or other inits. Logging can be configured in the configuration
 | 
			
		||||
file or via environment variables. There are only two settings:
 | 
			
		||||
 | 
			
		||||
- `minimum-level` specifies the log level to control the verbosity.
 | 
			
		||||
  Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
 | 
			
		||||
  *Error*
 | 
			
		||||
- `format` this defines how the logs are formatted. There are two
 | 
			
		||||
  formats for humans: *Plain* and *Fancy*. And two more suited for
 | 
			
		||||
  machine consumption: *Json* and *Logfmt*. The *Json* format contains
 | 
			
		||||
  all details, while the others may omit some for readability
 | 
			
		||||
 | 
			
		||||
These settings are the same for joex and the restserver component.
 | 
			
		||||
 | 
			
		||||
# Default Config
 | 
			
		||||
## Rest Server
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/server.conf") }}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Joex
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
 | 
			
		||||
 | 
			
		||||
## Environment Variables
 | 
			
		||||
 | 
			
		||||
Environment variables can be used when there is no config file
 | 
			
		||||
supplied. The listing below shows all possible variables and their
 | 
			
		||||
default values.
 | 
			
		||||
 | 
			
		||||
{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
 | 
			
		||||
							
								
								
									
										44
									
								
								website/site/content/docs/configure/registration.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								website/site/content/docs/configure/registration.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
			
		||||
+++
 | 
			
		||||
title = "Registration"
 | 
			
		||||
insert_anchor_links = "right"
 | 
			
		||||
description = "Describes the configuration file and shows all default settings."
 | 
			
		||||
weight = 80
 | 
			
		||||
template = "docs.html"
 | 
			
		||||
+++
 | 
			
		||||
 | 
			
		||||
# Registration Options
 | 
			
		||||
 | 
			
		||||
This defines if and how new users can create accounts. There are 3
 | 
			
		||||
options:
 | 
			
		||||
 | 
			
		||||
- *closed* no new user can sign up
 | 
			
		||||
- *open* new users can sign up
 | 
			
		||||
- *invite* new users can sign up but require an invitation key
 | 
			
		||||
 | 
			
		||||
This applies only to the REST sevrer component.
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
docspell.server.backend.signup {
 | 
			
		||||
  mode = "open"
 | 
			
		||||
 | 
			
		||||
  # If mode == 'invite', a password must be provided to generate
 | 
			
		||||
  # invitation keys. It must not be empty.
 | 
			
		||||
  new-invite-password = ""
 | 
			
		||||
 | 
			
		||||
  # If mode == 'invite', this is the period an invitation token is
 | 
			
		||||
  # considered valid.
 | 
			
		||||
  invite-time = "3 days"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The mode `invite` is intended to open the application only to some
 | 
			
		||||
users. The admin can create these invitation keys and distribute them
 | 
			
		||||
to the desired people. For this, the `new-invite-password` must be
 | 
			
		||||
given. The idea is that only the person who installs docspell knows
 | 
			
		||||
this. If it is not set, then invitation won't work. New invitation
 | 
			
		||||
keys can be generated from within the web application or via REST
 | 
			
		||||
calls (using `curl`, for example).
 | 
			
		||||
 | 
			
		||||
``` bash
 | 
			
		||||
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
 | 
			
		||||
```
 | 
			
		||||
@@ -14,7 +14,8 @@ template = "docs.html"
 | 
			
		||||
- Handle multiple documents as one unit
 | 
			
		||||
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
 | 
			
		||||
- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
 | 
			
		||||
  on [Apache SOLR](https://solr.apache.org)
 | 
			
		||||
  on [Apache SOLR](https://solr.apache.org) or [PostgreSQL's text
 | 
			
		||||
  search](https://www.postgresql.org/docs/14/textsearch.html)
 | 
			
		||||
- Conversion to PDF: all files are converted into a PDF file. PDFs
 | 
			
		||||
  with only images (as often returned from scanners) are converted
 | 
			
		||||
  into searchable PDF/A pdfs.
 | 
			
		||||
@@ -36,7 +37,8 @@ template = "docs.html"
 | 
			
		||||
  [REST Api](@/docs/api/_index.md); allows to [generate
 | 
			
		||||
  clients](https://openapi-generator.tech/docs/generators) for many
 | 
			
		||||
  languages
 | 
			
		||||
- [OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2)
 | 
			
		||||
- [OpenID
 | 
			
		||||
  Connect](@/docs/configure/authentication.md#openid-connect-oauth2)
 | 
			
		||||
  support allows Docspell to integrate into your SSO setup, for
 | 
			
		||||
  example with keycloak.
 | 
			
		||||
- Two-Factor Authentication using [TOTP](@/docs/webapp/totp.md) built
 | 
			
		||||
 
 | 
			
		||||
@@ -74,9 +74,10 @@ $ ./docspell-joex*/bin/docspell-joex
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This will startup both components using the default configuration.
 | 
			
		||||
Please refer to the [configuration page](@/docs/configure/_index.md)
 | 
			
		||||
for how to create a custom config file. Once you have your config
 | 
			
		||||
file, simply pass it as argument to the command:
 | 
			
		||||
Please refer to the [configuration
 | 
			
		||||
page](@/docs/configure/main.md) for how to create a custom
 | 
			
		||||
config file. Once you have your config file, simply pass it as
 | 
			
		||||
argument to the command:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
 | 
			
		||||
@@ -110,7 +111,7 @@ Fulltext search is powered by [SOLR](https://solr.apache.org). You
 | 
			
		||||
need to install solr and create a core for docspell. Then cange the
 | 
			
		||||
solr url for both components (restserver and joex) accordingly. See
 | 
			
		||||
the relevant section in the [config
 | 
			
		||||
page](@/docs/configure/_index.md#full-text-search-solr).
 | 
			
		||||
page](@/docs/configure/fulltext-search.md).
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
### Watching a directory
 | 
			
		||||
 
 | 
			
		||||
@@ -102,7 +102,7 @@ When using H2, make sure that all components access the same database
 | 
			
		||||
– the jdbc url must point to the same file. Then, it is important to
 | 
			
		||||
add the options
 | 
			
		||||
`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
 | 
			
		||||
of the url. See the [config page](@/docs/configure/_index.md#jdbc) for
 | 
			
		||||
of the url. See the [config page](@/docs/configure/database.md) for
 | 
			
		||||
an example.
 | 
			
		||||
 | 
			
		||||
For large installations, PostgreSQL or MariaDB is recommended. Create
 | 
			
		||||
 
 | 
			
		||||
@@ -30,10 +30,10 @@ result in long processing times for OCR and text analysis. The board
 | 
			
		||||
should provide 4G of RAM (like the current RPi4), especially if also a
 | 
			
		||||
database and solr are running next to it. The memory required by joex
 | 
			
		||||
depends on the config and document language. Please pick a value that
 | 
			
		||||
suits your setup from [here](@/docs/configure/_index.md#memory-usage).
 | 
			
		||||
For boards like the RPi, it might be necessary to use
 | 
			
		||||
`nlp.mode=basic`, rather than `nlp.mode=full`. You should also set the
 | 
			
		||||
joex pool size to 1.
 | 
			
		||||
suits your setup from
 | 
			
		||||
[here](@/docs/configure/file-processing.md#memory-usage). For boards
 | 
			
		||||
like the RPi, it might be necessary to use `nlp.mode=basic`, rather
 | 
			
		||||
than `nlp.mode=full`. You should also set the joex pool size to 1.
 | 
			
		||||
 | 
			
		||||
An example: on this [UP
 | 
			
		||||
board](https://up-board.org/up/specifications/) with an Intel Atom
 | 
			
		||||
 
 | 
			
		||||
@@ -80,7 +80,7 @@ line are required. As you see for `wkhtmltopdf` the page size is fixed
 | 
			
		||||
to DIN A4. Other commands are configured like this as well.
 | 
			
		||||
 | 
			
		||||
For the default values, please see the [configuration
 | 
			
		||||
page](@/docs/configure/_index.md#joex).
 | 
			
		||||
page](@/docs/configure/main.md#joex).
 | 
			
		||||
 | 
			
		||||
## Duplicate Check
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -23,7 +23,7 @@ For larger installations, it is probably better to run several joex
 | 
			
		||||
components on different machines. That works out of the box, as long
 | 
			
		||||
as all components point to the same database and use different
 | 
			
		||||
`app-id`s (see [configuring
 | 
			
		||||
docspell](@/docs/configure/_index.md#app-id)).
 | 
			
		||||
docspell](@/docs/configure/main.md#app-id)).
 | 
			
		||||
 | 
			
		||||
When files are submitted to docspell, they are stored in the database
 | 
			
		||||
and all known joex components are notified about new work. Then they
 | 
			
		||||
 
 | 
			
		||||
@@ -323,8 +323,8 @@ full detail.
 | 
			
		||||
These are a set of commands that simply call a route at the server to
 | 
			
		||||
submit a maintenance task or to reset the password of some user. These
 | 
			
		||||
commands require the [admin
 | 
			
		||||
secret](@/docs/configure/_index.md#admin-endpoint) either in the
 | 
			
		||||
config file or as an argument.
 | 
			
		||||
secret](@/docs/configure/admin-endpoint.md) either in the config file
 | 
			
		||||
or as an argument.
 | 
			
		||||
 | 
			
		||||
### Reset user password
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -19,15 +19,15 @@ _UI Settings_. Among other things, there is a _Item Cards_ section:
 | 
			
		||||
This defines how many of the item notes to display in the card. You
 | 
			
		||||
can set it to `0` to not show any notes at all. This is only a "soft
 | 
			
		||||
limit", there is also a "hard limit" in [docspell's
 | 
			
		||||
configuration](@/docs/configure/_index.md#rest-server) (see `max-note-length`),
 | 
			
		||||
that is an upper limit to this value.
 | 
			
		||||
configuration](@/docs/configure/main.md#rest-server) (see
 | 
			
		||||
`max-note-length`), that is an upper limit to this value.
 | 
			
		||||
 | 
			
		||||
### Size of item preview
 | 
			
		||||
 | 
			
		||||
The item preview is an image of the first page of the first
 | 
			
		||||
attachment. You can change the order of attachments in the item detail
 | 
			
		||||
view. This image has a predefined size, which is specified [docspell's
 | 
			
		||||
configuration](@/docs/configure/_index.md#joex) (see
 | 
			
		||||
configuration](@/docs/configure/main.md#joex) (see
 | 
			
		||||
`extraction.preview.dpi`). The size for displaying it, can be
 | 
			
		||||
specified via this setting. A _small_ preview uses about 80px width, a
 | 
			
		||||
_medium_ one 160px and _large_ means to use the available space in the
 | 
			
		||||
 
 | 
			
		||||
@@ -191,9 +191,9 @@ file to look for duplicates, too.
 | 
			
		||||
 | 
			
		||||
Docspell will go through all folders and download mails in “batches”.
 | 
			
		||||
This size can be set by the admin in the [configuration
 | 
			
		||||
file](@/docs/configure/_index.md#joex) and applies to all these tasks
 | 
			
		||||
(same for all users). This batch only contains the mail headers and
 | 
			
		||||
not the complete mail.
 | 
			
		||||
file](@/docs/configure/main.md#joex) and applies to all these
 | 
			
		||||
tasks (same for all users). This batch only contains the mail headers
 | 
			
		||||
and not the complete mail.
 | 
			
		||||
 | 
			
		||||
Then each mail is downloaded completely one by one and converted into
 | 
			
		||||
an [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) file
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,8 @@ Docspell has built-in support for two-factor (2FA) authentication
 | 
			
		||||
using
 | 
			
		||||
[TOTP](https://en.wikipedia.org/wiki/Time-based_One-Time_Password)s.
 | 
			
		||||
For anything more, consider a dedicated account management tool and
 | 
			
		||||
[OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2).
 | 
			
		||||
[OpenID
 | 
			
		||||
Connect](@/docs/configure/authentication.md#openid-connect-oauth2).
 | 
			
		||||
 | 
			
		||||
## Setup
 | 
			
		||||
 | 
			
		||||
@@ -65,7 +66,7 @@ client](@/docs/tools/cli.md) to execute an admin command that removes
 | 
			
		||||
2FA for a given user.
 | 
			
		||||
 | 
			
		||||
For this to work, you need to [enable the admin
 | 
			
		||||
endpoint](@/docs/configure/_index.md#admin-endpoint). Then execute the
 | 
			
		||||
endpoint](@/docs/configure/admin-endpoint.md). Then execute the
 | 
			
		||||
`disable-2fa` admin command and specify the complete account.
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user