mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-04 12:30:12 +00:00 
			
		
		
		
	Merge pull request #1462 from eikek/postgres-fulltext
Postgres fulltext
This commit is contained in:
		
							
								
								
									
										44
									
								
								build.sbt
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								build.sbt
									
									
									
									
									
								
							@@ -319,19 +319,6 @@ val common = project
 | 
				
			|||||||
  )
 | 
					  )
 | 
				
			||||||
  .dependsOn(loggingApi)
 | 
					  .dependsOn(loggingApi)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
val config = project
 | 
					 | 
				
			||||||
  .in(file("modules/config"))
 | 
					 | 
				
			||||||
  .disablePlugins(RevolverPlugin)
 | 
					 | 
				
			||||||
  .settings(sharedSettings)
 | 
					 | 
				
			||||||
  .withTestSettings
 | 
					 | 
				
			||||||
  .settings(
 | 
					 | 
				
			||||||
    name := "docspell-config",
 | 
					 | 
				
			||||||
    libraryDependencies ++=
 | 
					 | 
				
			||||||
      Dependencies.fs2 ++
 | 
					 | 
				
			||||||
        Dependencies.pureconfig
 | 
					 | 
				
			||||||
  )
 | 
					 | 
				
			||||||
  .dependsOn(common, loggingApi)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
val loggingScribe = project
 | 
					val loggingScribe = project
 | 
				
			||||||
  .in(file("modules/logging/scribe"))
 | 
					  .in(file("modules/logging/scribe"))
 | 
				
			||||||
  .disablePlugins(RevolverPlugin)
 | 
					  .disablePlugins(RevolverPlugin)
 | 
				
			||||||
@@ -619,6 +606,20 @@ val ftssolr = project
 | 
				
			|||||||
  )
 | 
					  )
 | 
				
			||||||
  .dependsOn(common, ftsclient)
 | 
					  .dependsOn(common, ftsclient)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					val ftspsql = project
 | 
				
			||||||
 | 
					  .in(file("modules/fts-psql"))
 | 
				
			||||||
 | 
					  .disablePlugins(RevolverPlugin)
 | 
				
			||||||
 | 
					  .settings(sharedSettings)
 | 
				
			||||||
 | 
					  .withTestSettings
 | 
				
			||||||
 | 
					  .settings(
 | 
				
			||||||
 | 
					    name := "docspell-fts-psql",
 | 
				
			||||||
 | 
					    libraryDependencies ++=
 | 
				
			||||||
 | 
					      Dependencies.doobie ++
 | 
				
			||||||
 | 
					        Dependencies.postgres ++
 | 
				
			||||||
 | 
					        Dependencies.flyway
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					  .dependsOn(common, ftsclient, store % "compile->test;test->test")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
val restapi = project
 | 
					val restapi = project
 | 
				
			||||||
  .in(file("modules/restapi"))
 | 
					  .in(file("modules/restapi"))
 | 
				
			||||||
  .disablePlugins(RevolverPlugin)
 | 
					  .disablePlugins(RevolverPlugin)
 | 
				
			||||||
@@ -715,6 +716,20 @@ val webapp = project
 | 
				
			|||||||
  )
 | 
					  )
 | 
				
			||||||
  .dependsOn(query.js)
 | 
					  .dependsOn(query.js)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Config project shared among the two applications only
 | 
				
			||||||
 | 
					val config = project
 | 
				
			||||||
 | 
					  .in(file("modules/config"))
 | 
				
			||||||
 | 
					  .disablePlugins(RevolverPlugin)
 | 
				
			||||||
 | 
					  .settings(sharedSettings)
 | 
				
			||||||
 | 
					  .withTestSettings
 | 
				
			||||||
 | 
					  .settings(
 | 
				
			||||||
 | 
					    name := "docspell-config",
 | 
				
			||||||
 | 
					    libraryDependencies ++=
 | 
				
			||||||
 | 
					      Dependencies.fs2 ++
 | 
				
			||||||
 | 
					        Dependencies.pureconfig
 | 
				
			||||||
 | 
					  )
 | 
				
			||||||
 | 
					  .dependsOn(common, loggingApi, ftspsql, store)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// --- Application(s)
 | 
					// --- Application(s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
val joex = project
 | 
					val joex = project
 | 
				
			||||||
@@ -769,6 +784,7 @@ val joex = project
 | 
				
			|||||||
    joexapi,
 | 
					    joexapi,
 | 
				
			||||||
    restapi,
 | 
					    restapi,
 | 
				
			||||||
    ftssolr,
 | 
					    ftssolr,
 | 
				
			||||||
 | 
					    ftspsql,
 | 
				
			||||||
    pubsubNaive,
 | 
					    pubsubNaive,
 | 
				
			||||||
    notificationImpl,
 | 
					    notificationImpl,
 | 
				
			||||||
    schedulerImpl
 | 
					    schedulerImpl
 | 
				
			||||||
@@ -841,6 +857,7 @@ val restserver = project
 | 
				
			|||||||
    backend,
 | 
					    backend,
 | 
				
			||||||
    webapp,
 | 
					    webapp,
 | 
				
			||||||
    ftssolr,
 | 
					    ftssolr,
 | 
				
			||||||
 | 
					    ftspsql,
 | 
				
			||||||
    oidc,
 | 
					    oidc,
 | 
				
			||||||
    pubsubNaive,
 | 
					    pubsubNaive,
 | 
				
			||||||
    notificationImpl,
 | 
					    notificationImpl,
 | 
				
			||||||
@@ -926,6 +943,7 @@ val root = project
 | 
				
			|||||||
    analysis,
 | 
					    analysis,
 | 
				
			||||||
    ftsclient,
 | 
					    ftsclient,
 | 
				
			||||||
    ftssolr,
 | 
					    ftssolr,
 | 
				
			||||||
 | 
					    ftspsql,
 | 
				
			||||||
    files,
 | 
					    files,
 | 
				
			||||||
    store,
 | 
					    store,
 | 
				
			||||||
    joexapi,
 | 
					    joexapi,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -62,7 +62,14 @@ object CreateIndex {
 | 
				
			|||||||
        val items = store
 | 
					        val items = store
 | 
				
			||||||
          .transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
 | 
					          .transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
 | 
				
			||||||
          .map(nn =>
 | 
					          .map(nn =>
 | 
				
			||||||
            TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
 | 
					            TextData.item(
 | 
				
			||||||
 | 
					              nn.id,
 | 
				
			||||||
 | 
					              nn.collective,
 | 
				
			||||||
 | 
					              nn.folder,
 | 
				
			||||||
 | 
					              Option(nn.name),
 | 
				
			||||||
 | 
					              nn.notes,
 | 
				
			||||||
 | 
					              nn.language
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
          )
 | 
					          )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fts.indexData(logger, attachs ++ items)
 | 
					        fts.indexData(logger, attachs ++ items)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -605,7 +605,14 @@ object OItem {
 | 
				
			|||||||
                .transact(RItem.updateNotes(item, collective, notes))
 | 
					                .transact(RItem.updateNotes(item, collective, notes))
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            .flatTap(
 | 
					            .flatTap(
 | 
				
			||||||
              onSuccessIgnoreError(fts.updateItemNotes(logger, item, collective, notes))
 | 
					              onSuccessIgnoreError {
 | 
				
			||||||
 | 
					                store
 | 
				
			||||||
 | 
					                  .transact(RCollective.findLanguage(collective))
 | 
				
			||||||
 | 
					                  .map(_.getOrElse(Language.English))
 | 
				
			||||||
 | 
					                  .flatMap(lang =>
 | 
				
			||||||
 | 
					                    fts.updateItemNotes(logger, item, collective, lang, notes)
 | 
				
			||||||
 | 
					                  )
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def setName(item: Ident, name: String, collective: Ident): F[UpdateResult] =
 | 
					        def setName(item: Ident, name: String, collective: Ident): F[UpdateResult] =
 | 
				
			||||||
@@ -615,7 +622,14 @@ object OItem {
 | 
				
			|||||||
                .transact(RItem.updateName(item, collective, name))
 | 
					                .transact(RItem.updateName(item, collective, name))
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            .flatTap(
 | 
					            .flatTap(
 | 
				
			||||||
              onSuccessIgnoreError(fts.updateItemName(logger, item, collective, name))
 | 
					              onSuccessIgnoreError {
 | 
				
			||||||
 | 
					                store
 | 
				
			||||||
 | 
					                  .transact(RCollective.findLanguage(collective))
 | 
				
			||||||
 | 
					                  .map(_.getOrElse(Language.English))
 | 
				
			||||||
 | 
					                  .flatMap(lang =>
 | 
				
			||||||
 | 
					                    fts.updateItemName(logger, item, collective, lang, name)
 | 
				
			||||||
 | 
					                  )
 | 
				
			||||||
 | 
					              }
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def setNameMultiple(
 | 
					        def setNameMultiple(
 | 
				
			||||||
@@ -733,10 +747,17 @@ object OItem {
 | 
				
			|||||||
            )
 | 
					            )
 | 
				
			||||||
            .flatTap(
 | 
					            .flatTap(
 | 
				
			||||||
              onSuccessIgnoreError(
 | 
					              onSuccessIgnoreError(
 | 
				
			||||||
                OptionT(store.transact(RAttachment.findItemId(attachId)))
 | 
					                OptionT(store.transact(RAttachment.findItemAndLanguage(attachId)))
 | 
				
			||||||
                  .semiflatMap(itemId =>
 | 
					                  .semiflatMap { case (itemId, lang) =>
 | 
				
			||||||
                    fts.updateAttachmentName(logger, itemId, attachId, collective, name)
 | 
					                    fts.updateAttachmentName(
 | 
				
			||||||
                  )
 | 
					                      logger,
 | 
				
			||||||
 | 
					                      itemId,
 | 
				
			||||||
 | 
					                      attachId,
 | 
				
			||||||
 | 
					                      collective,
 | 
				
			||||||
 | 
					                      lang.getOrElse(Language.English),
 | 
				
			||||||
 | 
					                      name
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
 | 
					                  }
 | 
				
			||||||
                  .fold(())(identity)
 | 
					                  .fold(())(identity)
 | 
				
			||||||
              )
 | 
					              )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -14,7 +14,7 @@ case class Banner(
 | 
				
			|||||||
    configFile: Option[String],
 | 
					    configFile: Option[String],
 | 
				
			||||||
    appId: Ident,
 | 
					    appId: Ident,
 | 
				
			||||||
    baseUrl: LenientUri,
 | 
					    baseUrl: LenientUri,
 | 
				
			||||||
    ftsUrl: Option[LenientUri],
 | 
					    ftsInfo: Option[String],
 | 
				
			||||||
    fileStoreConfig: FileStoreConfig
 | 
					    fileStoreConfig: FileStoreConfig
 | 
				
			||||||
) {
 | 
					) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -35,7 +35,7 @@ case class Banner(
 | 
				
			|||||||
      s"Id:       ${appId.id}",
 | 
					      s"Id:       ${appId.id}",
 | 
				
			||||||
      s"Base-Url: ${baseUrl.asString}",
 | 
					      s"Base-Url: ${baseUrl.asString}",
 | 
				
			||||||
      s"Database: ${jdbcUrl.asString}",
 | 
					      s"Database: ${jdbcUrl.asString}",
 | 
				
			||||||
      s"Fts:      ${ftsUrl.map(_.asString).getOrElse("-")}",
 | 
					      s"Fts:      ${ftsInfo.getOrElse("-")}",
 | 
				
			||||||
      s"Config:   ${configFile.getOrElse("")}",
 | 
					      s"Config:   ${configFile.getOrElse("")}",
 | 
				
			||||||
      s"FileRepo: ${fileStoreConfig}",
 | 
					      s"FileRepo: ${fileStoreConfig}",
 | 
				
			||||||
      ""
 | 
					      ""
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										27
									
								
								modules/config/src/main/scala/docspell/config/FtsType.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								modules/config/src/main/scala/docspell/config/FtsType.scala
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.data.NonEmptyList
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sealed trait FtsType {
 | 
				
			||||||
 | 
					  def name: String
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object FtsType {
 | 
				
			||||||
 | 
					  case object Solr extends FtsType { val name = "solr" }
 | 
				
			||||||
 | 
					  case object PostgreSQL extends FtsType { val name = "postgresql" }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val all: NonEmptyList[FtsType] =
 | 
				
			||||||
 | 
					    NonEmptyList.of(Solr, PostgreSQL)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def fromName(str: String): Either[String, FtsType] =
 | 
				
			||||||
 | 
					    all.find(_.name.equalsIgnoreCase(str)).toRight(s"Unknown fts type: $str")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def unsafeFromName(str: String): FtsType =
 | 
				
			||||||
 | 
					    fromName(str).fold(sys.error, identity)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -10,9 +10,11 @@ import java.nio.file.{Path => JPath}
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import scala.reflect.ClassTag
 | 
					import scala.reflect.ClassTag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.syntax.all._
 | 
				
			||||||
import fs2.io.file.Path
 | 
					import fs2.io.file.Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import docspell.common._
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftspsql.{PgQueryParser, RankNormalization}
 | 
				
			||||||
import docspell.logging.{Level, LogConfig}
 | 
					import docspell.logging.{Level, LogConfig}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import com.github.eikek.calev.CalEvent
 | 
					import com.github.eikek.calev.CalEvent
 | 
				
			||||||
@@ -85,11 +87,28 @@ object Implicits {
 | 
				
			|||||||
  implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
 | 
					  implicit val fileStoreTypeReader: ConfigReader[FileStoreType] =
 | 
				
			||||||
    ConfigReader[String].emap(reason(FileStoreType.fromString))
 | 
					    ConfigReader[String].emap(reason(FileStoreType.fromString))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def reason[A: ClassTag](
 | 
					  implicit val pgQueryParserReader: ConfigReader[PgQueryParser] =
 | 
				
			||||||
      f: String => Either[String, A]
 | 
					    ConfigReader[String].emap(reason(PgQueryParser.fromName))
 | 
				
			||||||
  ): String => Either[FailureReason, A] =
 | 
					
 | 
				
			||||||
 | 
					  implicit val pgRankNormalizationReader: ConfigReader[RankNormalization] =
 | 
				
			||||||
 | 
					    ConfigReader[List[Int]].emap(
 | 
				
			||||||
 | 
					      reason(ints => ints.traverse(RankNormalization.byNumber).map(_.reduce(_ && _)))
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val languageReader: ConfigReader[Language] =
 | 
				
			||||||
 | 
					    ConfigReader[String].emap(reason(Language.fromString))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit def languageMapReader[B: ConfigReader]: ConfigReader[Map[Language, B]] =
 | 
				
			||||||
 | 
					    pureconfig.configurable.genericMapReader[Language, B](reason(Language.fromString))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val ftsTypeReader: ConfigReader[FtsType] =
 | 
				
			||||||
 | 
					    ConfigReader[String].emap(reason(FtsType.fromName))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def reason[T, A: ClassTag](
 | 
				
			||||||
 | 
					      f: T => Either[String, A]
 | 
				
			||||||
 | 
					  ): T => Either[FailureReason, A] =
 | 
				
			||||||
    in =>
 | 
					    in =>
 | 
				
			||||||
      f(in).left.map(str =>
 | 
					      f(in).left.map(str =>
 | 
				
			||||||
        CannotConvert(in, implicitly[ClassTag[A]].runtimeClass.toString, str)
 | 
					        CannotConvert(in.toString, implicitly[ClassTag[A]].runtimeClass.toString, str)
 | 
				
			||||||
      )
 | 
					      )
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.config
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftspsql._
 | 
				
			||||||
 | 
					import docspell.store.JdbcConfig
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					case class PgFtsConfig(
 | 
				
			||||||
 | 
					    useDefaultConnection: Boolean,
 | 
				
			||||||
 | 
					    jdbc: JdbcConfig,
 | 
				
			||||||
 | 
					    pgQueryParser: PgQueryParser,
 | 
				
			||||||
 | 
					    pgRankNormalization: RankNormalization,
 | 
				
			||||||
 | 
					    pgConfig: Map[Language, String]
 | 
				
			||||||
 | 
					) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def toPsqlConfig(stdConn: JdbcConfig): PsqlConfig = {
 | 
				
			||||||
 | 
					    val db =
 | 
				
			||||||
 | 
					      if (useDefaultConnection) stdConn
 | 
				
			||||||
 | 
					      else jdbc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PsqlConfig(
 | 
				
			||||||
 | 
					      db.url,
 | 
				
			||||||
 | 
					      db.user,
 | 
				
			||||||
 | 
					      Password(db.password),
 | 
				
			||||||
 | 
					      pgConfig,
 | 
				
			||||||
 | 
					      pgQueryParser,
 | 
				
			||||||
 | 
					      pgRankNormalization
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object PgFtsConfig {}
 | 
				
			||||||
@@ -68,19 +68,24 @@ trait FtsClient[F[_]] {
 | 
				
			|||||||
      logger: Logger[F],
 | 
					      logger: Logger[F],
 | 
				
			||||||
      itemId: Ident,
 | 
					      itemId: Ident,
 | 
				
			||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
 | 
					      language: Language,
 | 
				
			||||||
      name: String
 | 
					      name: String
 | 
				
			||||||
  ): F[Unit] =
 | 
					  ): F[Unit] =
 | 
				
			||||||
    updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None))
 | 
					    updateIndex(
 | 
				
			||||||
 | 
					      logger,
 | 
				
			||||||
 | 
					      TextData.item(itemId, collective, None, Some(name), None, language)
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def updateItemNotes(
 | 
					  def updateItemNotes(
 | 
				
			||||||
      logger: Logger[F],
 | 
					      logger: Logger[F],
 | 
				
			||||||
      itemId: Ident,
 | 
					      itemId: Ident,
 | 
				
			||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
 | 
					      language: Language,
 | 
				
			||||||
      notes: Option[String]
 | 
					      notes: Option[String]
 | 
				
			||||||
  ): F[Unit] =
 | 
					  ): F[Unit] =
 | 
				
			||||||
    updateIndex(
 | 
					    updateIndex(
 | 
				
			||||||
      logger,
 | 
					      logger,
 | 
				
			||||||
      TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")))
 | 
					      TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")), language)
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def updateAttachmentName(
 | 
					  def updateAttachmentName(
 | 
				
			||||||
@@ -88,6 +93,7 @@ trait FtsClient[F[_]] {
 | 
				
			|||||||
      itemId: Ident,
 | 
					      itemId: Ident,
 | 
				
			||||||
      attachId: Ident,
 | 
					      attachId: Ident,
 | 
				
			||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
 | 
					      language: Language,
 | 
				
			||||||
      name: Option[String]
 | 
					      name: Option[String]
 | 
				
			||||||
  ): F[Unit] =
 | 
					  ): F[Unit] =
 | 
				
			||||||
    updateIndex(
 | 
					    updateIndex(
 | 
				
			||||||
@@ -97,7 +103,7 @@ trait FtsClient[F[_]] {
 | 
				
			|||||||
        attachId,
 | 
					        attachId,
 | 
				
			||||||
        collective,
 | 
					        collective,
 | 
				
			||||||
        None,
 | 
					        None,
 | 
				
			||||||
        Language.English,
 | 
					        language,
 | 
				
			||||||
        Some(name.getOrElse("")),
 | 
					        Some(name.getOrElse("")),
 | 
				
			||||||
        None
 | 
					        None
 | 
				
			||||||
      )
 | 
					      )
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -18,6 +18,8 @@ sealed trait TextData {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  def folder: Option[Ident]
 | 
					  def folder: Option[Ident]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def language: Language
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
 | 
					  final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
 | 
				
			||||||
    this match {
 | 
					    this match {
 | 
				
			||||||
      case a: TextData.Attachment => f(a)
 | 
					      case a: TextData.Attachment => f(a)
 | 
				
			||||||
@@ -32,7 +34,7 @@ object TextData {
 | 
				
			|||||||
      attachId: Ident,
 | 
					      attachId: Ident,
 | 
				
			||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
      folder: Option[Ident],
 | 
					      folder: Option[Ident],
 | 
				
			||||||
      lang: Language,
 | 
					      language: Language,
 | 
				
			||||||
      name: Option[String],
 | 
					      name: Option[String],
 | 
				
			||||||
      text: Option[String]
 | 
					      text: Option[String]
 | 
				
			||||||
  ) extends TextData {
 | 
					  ) extends TextData {
 | 
				
			||||||
@@ -57,7 +59,8 @@ object TextData {
 | 
				
			|||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
      folder: Option[Ident],
 | 
					      folder: Option[Ident],
 | 
				
			||||||
      name: Option[String],
 | 
					      name: Option[String],
 | 
				
			||||||
      notes: Option[String]
 | 
					      notes: Option[String],
 | 
				
			||||||
 | 
					      language: Language
 | 
				
			||||||
  ) extends TextData {
 | 
					  ) extends TextData {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    val id = Ident.unsafe("item") / item
 | 
					    val id = Ident.unsafe("item") / item
 | 
				
			||||||
@@ -69,8 +72,9 @@ object TextData {
 | 
				
			|||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
      folder: Option[Ident],
 | 
					      folder: Option[Ident],
 | 
				
			||||||
      name: Option[String],
 | 
					      name: Option[String],
 | 
				
			||||||
      notes: Option[String]
 | 
					      notes: Option[String],
 | 
				
			||||||
 | 
					      lang: Language
 | 
				
			||||||
  ): TextData =
 | 
					  ): TextData =
 | 
				
			||||||
    Item(item, collective, folder, name, notes)
 | 
					    Item(item, collective, folder, name, notes, lang)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -0,0 +1,27 @@
 | 
				
			|||||||
 | 
					create table "ftspsql_search"(
 | 
				
			||||||
 | 
					  "id" varchar(254) not null primary key,
 | 
				
			||||||
 | 
					  "item_id" varchar(254) not null,
 | 
				
			||||||
 | 
					  "collective" varchar(254) not null,
 | 
				
			||||||
 | 
					  "lang" varchar(254) not null,
 | 
				
			||||||
 | 
					  "attach_id" varchar(254),
 | 
				
			||||||
 | 
					  "folder_id" varchar(254),
 | 
				
			||||||
 | 
					  "updated_at" timestamptz not null default current_timestamp,
 | 
				
			||||||
 | 
					  --- content columns
 | 
				
			||||||
 | 
					  "attach_name" text,
 | 
				
			||||||
 | 
					  "attach_content" text,
 | 
				
			||||||
 | 
					  "item_name" text,
 | 
				
			||||||
 | 
					  "item_notes" text,
 | 
				
			||||||
 | 
					  --- index column
 | 
				
			||||||
 | 
					  "fts_config" regconfig not null,
 | 
				
			||||||
 | 
					  "text_index" tsvector
 | 
				
			||||||
 | 
					    generated always as (
 | 
				
			||||||
 | 
					     setweight(to_tsvector("fts_config", coalesce("attach_name", '')), 'B') ||
 | 
				
			||||||
 | 
					     setweight(to_tsvector("fts_config", coalesce("item_name", '')), 'B') ||
 | 
				
			||||||
 | 
					     setweight(to_tsvector("fts_config", coalesce("attach_content", '')), 'C') ||
 | 
				
			||||||
 | 
					     setweight(to_tsvector("fts_config", coalesce("item_notes", '')), 'C')) stored
 | 
				
			||||||
 | 
					);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					create index "ftspsql_search_ftsidx" on "ftspsql_search" using GIN ("text_index");
 | 
				
			||||||
 | 
					create index "ftpsql_search_item_idx" on "ftspsql_search"("item_id");
 | 
				
			||||||
 | 
					create index "ftpsql_search_attach_idx" on "ftspsql_search"("attach_id");
 | 
				
			||||||
 | 
					create index "ftpsql_search_folder_idx" on "ftspsql_search"("folder_id");
 | 
				
			||||||
@@ -0,0 +1,43 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.effect._
 | 
				
			||||||
 | 
					import cats.implicits._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import org.flywaydb.core.Flyway
 | 
				
			||||||
 | 
					import org.flywaydb.core.api.output.MigrateResult
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					final class DbMigration[F[_]: Sync](cfg: PsqlConfig) {
 | 
				
			||||||
 | 
					  private[this] val logger = docspell.logging.getLogger[F]
 | 
				
			||||||
 | 
					  private val location: String = "classpath:db/psqlfts"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def run: F[MigrateResult] =
 | 
				
			||||||
 | 
					    for {
 | 
				
			||||||
 | 
					      fw <- createFlyway
 | 
				
			||||||
 | 
					      _ <- logger.info(s"Running FTS migrations")
 | 
				
			||||||
 | 
					      result <- Sync[F].blocking(fw.migrate())
 | 
				
			||||||
 | 
					    } yield result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def createFlyway: F[Flyway] =
 | 
				
			||||||
 | 
					    for {
 | 
				
			||||||
 | 
					      _ <- logger.info(s"Creating Flyway for: $location")
 | 
				
			||||||
 | 
					      fw = Flyway
 | 
				
			||||||
 | 
					        .configure()
 | 
				
			||||||
 | 
					        .table("flyway_fts_history")
 | 
				
			||||||
 | 
					        .cleanDisabled(true)
 | 
				
			||||||
 | 
					        .dataSource(cfg.url.asString, cfg.user, cfg.password.pass)
 | 
				
			||||||
 | 
					        .locations(location)
 | 
				
			||||||
 | 
					        .baselineOnMigrate(true)
 | 
				
			||||||
 | 
					        .load()
 | 
				
			||||||
 | 
					    } yield fw
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object DbMigration {
 | 
				
			||||||
 | 
					  def apply[F[_]: Sync](cfg: PsqlConfig): DbMigration[F] =
 | 
				
			||||||
 | 
					    new DbMigration[F](cfg)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import doobie._
 | 
				
			||||||
 | 
					import doobie.util.log.Success
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					trait DoobieMeta {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val sqlLogging: LogHandler = LogHandler {
 | 
				
			||||||
 | 
					    case e @ Success(_, _, _, _) =>
 | 
				
			||||||
 | 
					      DoobieMeta.logger.debug("SQL " + e)
 | 
				
			||||||
 | 
					    case e =>
 | 
				
			||||||
 | 
					      DoobieMeta.logger.error(s"SQL Failure: $e")
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val metaIdent: Meta[Ident] =
 | 
				
			||||||
 | 
					    Meta[String].timap(Ident.unsafe)(_.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val metaLanguage: Meta[Language] =
 | 
				
			||||||
 | 
					    Meta[String].timap(Language.unsafe)(_.iso3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object DoobieMeta {
 | 
				
			||||||
 | 
					  private val logger = org.log4s.getLogger
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,65 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.syntax.all._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common.{Ident, Language}
 | 
				
			||||||
 | 
					import docspell.ftsclient.TextData
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					final case class FtsRecord(
 | 
				
			||||||
 | 
					    id: Ident,
 | 
				
			||||||
 | 
					    itemId: Ident,
 | 
				
			||||||
 | 
					    collective: Ident,
 | 
				
			||||||
 | 
					    language: Language,
 | 
				
			||||||
 | 
					    attachId: Option[Ident],
 | 
				
			||||||
 | 
					    folderId: Option[Ident],
 | 
				
			||||||
 | 
					    attachName: Option[String],
 | 
				
			||||||
 | 
					    attachContent: Option[String],
 | 
				
			||||||
 | 
					    itemName: Option[String],
 | 
				
			||||||
 | 
					    itemNotes: Option[String]
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object FtsRecord {
 | 
				
			||||||
 | 
					  def fromTextData(td: TextData): FtsRecord =
 | 
				
			||||||
 | 
					    td match {
 | 
				
			||||||
 | 
					      case TextData.Attachment(
 | 
				
			||||||
 | 
					            item,
 | 
				
			||||||
 | 
					            attachId,
 | 
				
			||||||
 | 
					            collective,
 | 
				
			||||||
 | 
					            folder,
 | 
				
			||||||
 | 
					            language,
 | 
				
			||||||
 | 
					            name,
 | 
				
			||||||
 | 
					            text
 | 
				
			||||||
 | 
					          ) =>
 | 
				
			||||||
 | 
					        FtsRecord(
 | 
				
			||||||
 | 
					          td.id,
 | 
				
			||||||
 | 
					          item,
 | 
				
			||||||
 | 
					          collective,
 | 
				
			||||||
 | 
					          language,
 | 
				
			||||||
 | 
					          attachId.some,
 | 
				
			||||||
 | 
					          folder,
 | 
				
			||||||
 | 
					          name,
 | 
				
			||||||
 | 
					          text,
 | 
				
			||||||
 | 
					          None,
 | 
				
			||||||
 | 
					          None
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					      case TextData.Item(item, collective, folder, name, notes, language) =>
 | 
				
			||||||
 | 
					        FtsRecord(
 | 
				
			||||||
 | 
					          td.id,
 | 
				
			||||||
 | 
					          item,
 | 
				
			||||||
 | 
					          collective,
 | 
				
			||||||
 | 
					          language,
 | 
				
			||||||
 | 
					          None,
 | 
				
			||||||
 | 
					          folder,
 | 
				
			||||||
 | 
					          None,
 | 
				
			||||||
 | 
					          None,
 | 
				
			||||||
 | 
					          name,
 | 
				
			||||||
 | 
					          notes
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,197 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.data.NonEmptyList
 | 
				
			||||||
 | 
					import fs2.Chunk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftsclient.FtsQuery
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import doobie._
 | 
				
			||||||
 | 
					import doobie.implicits._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object FtsRepository extends DoobieMeta {
 | 
				
			||||||
 | 
					  val table = fr"ftspsql_search"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
 | 
				
			||||||
 | 
					      q: FtsQuery
 | 
				
			||||||
 | 
					  ): ConnectionIO[SearchSummary] = {
 | 
				
			||||||
 | 
					    val selectRank = mkSelectRank(rn)
 | 
				
			||||||
 | 
					    val query = mkQueryPart(pq, q)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sql"""select count(id), coalesce(max($selectRank), 0)
 | 
				
			||||||
 | 
					         |from $table, $query
 | 
				
			||||||
 | 
					         |where ${mkCondition(q)} AND query @@ text_index 
 | 
				
			||||||
 | 
					         |""".stripMargin
 | 
				
			||||||
 | 
					      .query[SearchSummary]
 | 
				
			||||||
 | 
					      .unique
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def search(pq: PgQueryParser, rn: RankNormalization)(
 | 
				
			||||||
 | 
					      q: FtsQuery,
 | 
				
			||||||
 | 
					      withHighlighting: Boolean
 | 
				
			||||||
 | 
					  ): ConnectionIO[Vector[SearchResult]] = {
 | 
				
			||||||
 | 
					    val selectRank = mkSelectRank(rn)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val hlOption =
 | 
				
			||||||
 | 
					      s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val selectHl =
 | 
				
			||||||
 | 
					      if (!withHighlighting) fr"null as highlight"
 | 
				
			||||||
 | 
					      else
 | 
				
			||||||
 | 
					        fr"""ts_headline(
 | 
				
			||||||
 | 
					            |    fts_config,
 | 
				
			||||||
 | 
					            |    coalesce(attach_name, '') ||
 | 
				
			||||||
 | 
					            |    ' ' || coalesce(attach_content, '') ||
 | 
				
			||||||
 | 
					            |    ' ' || coalesce(item_name, '') ||
 | 
				
			||||||
 | 
					            |    ' ' || coalesce(item_notes, ''), query, $hlOption) as highlight""".stripMargin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val select =
 | 
				
			||||||
 | 
					      fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val query = mkQueryPart(pq, q)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    sql"""select $select 
 | 
				
			||||||
 | 
					         |from $table, $query
 | 
				
			||||||
 | 
					         |where ${mkCondition(q)} AND query @@ text_index 
 | 
				
			||||||
 | 
					         |order by rank desc
 | 
				
			||||||
 | 
					         |limit ${q.limit}
 | 
				
			||||||
 | 
					         |offset ${q.offset}
 | 
				
			||||||
 | 
					         |""".stripMargin
 | 
				
			||||||
 | 
					      .query[SearchResult]
 | 
				
			||||||
 | 
					      .to[Vector]
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private def mkCondition(q: FtsQuery): Fragment = {
 | 
				
			||||||
 | 
					    val coll = fr"collective = ${q.collective}"
 | 
				
			||||||
 | 
					    val items =
 | 
				
			||||||
 | 
					      NonEmptyList.fromList(q.items.toList).map { nel =>
 | 
				
			||||||
 | 
					        val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
 | 
				
			||||||
 | 
					        fr"item_id in ($ids)"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val folders =
 | 
				
			||||||
 | 
					      NonEmptyList.fromList(q.folders.toList).map { nel =>
 | 
				
			||||||
 | 
					        val ids = nel.map(id => fr"$id").reduceLeft(_ ++ fr"," ++ _)
 | 
				
			||||||
 | 
					        fr"folder_id in ($ids)"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
 | 
				
			||||||
 | 
					    val fname = Fragment.const(p.name)
 | 
				
			||||||
 | 
					    fr"$fname(fts_config, ${q.q}) query"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private def mkSelectRank(rn: RankNormalization): Fragment = {
 | 
				
			||||||
 | 
					    val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
 | 
				
			||||||
 | 
					    fr"ts_rank_cd(text_index, query, $bits)"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def replace(
 | 
				
			||||||
 | 
					      pgConfig: Language => String
 | 
				
			||||||
 | 
					  )(r: FtsRecord): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    (fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
 | 
				
			||||||
 | 
					      commas(
 | 
				
			||||||
 | 
					        sql"${r.id}",
 | 
				
			||||||
 | 
					        sql"${r.itemId}",
 | 
				
			||||||
 | 
					        sql"${r.collective}",
 | 
				
			||||||
 | 
					        sql"${r.language}",
 | 
				
			||||||
 | 
					        sql"${r.attachId}",
 | 
				
			||||||
 | 
					        sql"${r.folderId}",
 | 
				
			||||||
 | 
					        sql"${r.attachName}",
 | 
				
			||||||
 | 
					        sql"${r.attachContent}",
 | 
				
			||||||
 | 
					        sql"${r.itemName}",
 | 
				
			||||||
 | 
					        sql"${r.itemNotes}",
 | 
				
			||||||
 | 
					        sql"${pgConfig(r.language)}::regconfig"
 | 
				
			||||||
 | 
					      ) ++ sql") on conflict (id) do update set " ++ commas(
 | 
				
			||||||
 | 
					        sql"lang = ${r.language}",
 | 
				
			||||||
 | 
					        sql"folder_id = ${r.folderId}",
 | 
				
			||||||
 | 
					        sql"attach_name = ${r.attachName}",
 | 
				
			||||||
 | 
					        sql"attach_content = ${r.attachContent}",
 | 
				
			||||||
 | 
					        sql"item_name = ${r.itemName}",
 | 
				
			||||||
 | 
					        sql"item_notes = ${r.itemNotes}",
 | 
				
			||||||
 | 
					        sql"fts_config = ${pgConfig(r.language)}::regconfig"
 | 
				
			||||||
 | 
					      )).update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    (fr"UPDATE $table SET" ++ commas(
 | 
				
			||||||
 | 
					      sql"lang = ${r.language}",
 | 
				
			||||||
 | 
					      sql"folder_id = ${r.folderId}",
 | 
				
			||||||
 | 
					      sql"attach_name = ${r.attachName}",
 | 
				
			||||||
 | 
					      sql"attach_content = ${r.attachContent}",
 | 
				
			||||||
 | 
					      sql"item_name = ${r.itemName}",
 | 
				
			||||||
 | 
					      sql"item_notes = ${r.itemNotes}",
 | 
				
			||||||
 | 
					      sql"fts_config = ${pgConfig(r.language)}::regconfig"
 | 
				
			||||||
 | 
					    ) ++ fr"WHERE id = ${r.id}").update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def updateFolder(
 | 
				
			||||||
 | 
					      itemId: Ident,
 | 
				
			||||||
 | 
					      collective: Ident,
 | 
				
			||||||
 | 
					      folder: Option[Ident]
 | 
				
			||||||
 | 
					  ): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    (sql"UPDATE $table" ++
 | 
				
			||||||
 | 
					      fr"SET folder_id = $folder" ++
 | 
				
			||||||
 | 
					      fr"WHERE item_id = $itemId AND collective = $collective").update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def deleteByItemId(itemId: Ident): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    sql"DELETE FROM $table WHERE item_id = $itemId".update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def deleteByAttachId(attachId: Ident): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    sql"DELETE FROM $table WHERE attach_id = $attachId".update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def deleteAll: ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    sql"DELETE FROM $table".update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def delete(collective: Ident): ConnectionIO[Int] =
 | 
				
			||||||
 | 
					    sql"DELETE FROM $table WHERE collective = $collective".update.run
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def resetAll: ConnectionIO[Int] = {
 | 
				
			||||||
 | 
					    val dropFlyway = sql"DROP TABLE IF EXISTS flyway_fts_history".update.run
 | 
				
			||||||
 | 
					    val dropSearch = sql"DROP TABLE IF EXISTS $table".update.run
 | 
				
			||||||
 | 
					    for {
 | 
				
			||||||
 | 
					      a <- dropFlyway
 | 
				
			||||||
 | 
					      b <- dropSearch
 | 
				
			||||||
 | 
					    } yield a + b
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private def commas(fr: Fragment, frn: Fragment*): Fragment =
 | 
				
			||||||
 | 
					    frn.foldLeft(fr)(_ ++ fr"," ++ _)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
 | 
				
			||||||
 | 
					    select.applyOrElse(language, defaultPgConfig)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def defaultPgConfig(language: Language): String =
 | 
				
			||||||
 | 
					    language match {
 | 
				
			||||||
 | 
					      case Language.English    => "english"
 | 
				
			||||||
 | 
					      case Language.German     => "german"
 | 
				
			||||||
 | 
					      case Language.French     => "french"
 | 
				
			||||||
 | 
					      case Language.Italian    => "italian"
 | 
				
			||||||
 | 
					      case Language.Spanish    => "spanish"
 | 
				
			||||||
 | 
					      case Language.Hungarian  => "hungarian"
 | 
				
			||||||
 | 
					      case Language.Portuguese => "portuguese"
 | 
				
			||||||
 | 
					      case Language.Danish     => "danish"
 | 
				
			||||||
 | 
					      case Language.Finnish    => "finnish"
 | 
				
			||||||
 | 
					      case Language.Norwegian  => "norwegian"
 | 
				
			||||||
 | 
					      case Language.Swedish    => "swedish"
 | 
				
			||||||
 | 
					      case Language.Russian    => "russian"
 | 
				
			||||||
 | 
					      case Language.Romanian   => "romanian"
 | 
				
			||||||
 | 
					      case Language.Dutch      => "dutch"
 | 
				
			||||||
 | 
					      case Language.Czech      => "simple"
 | 
				
			||||||
 | 
					      case Language.Latvian    => "simple"
 | 
				
			||||||
 | 
					      case Language.Japanese   => "simple"
 | 
				
			||||||
 | 
					      case Language.Hebrew     => "simple"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.data.NonEmptyList
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sealed trait PgQueryParser {
 | 
				
			||||||
 | 
					  def name: String
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object PgQueryParser {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  case object ToTsQuery extends PgQueryParser {
 | 
				
			||||||
 | 
					    val name = "to_tsquery"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  case object Plain extends PgQueryParser {
 | 
				
			||||||
 | 
					    val name = "plainto_tsquery"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  case object Phrase extends PgQueryParser {
 | 
				
			||||||
 | 
					    val name = "phraseto_tsquery"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  case object Websearch extends PgQueryParser {
 | 
				
			||||||
 | 
					    val name = "websearch_to_tsquery"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val all: NonEmptyList[PgQueryParser] =
 | 
				
			||||||
 | 
					    NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def fromName(name: String): Either[String, PgQueryParser] =
 | 
				
			||||||
 | 
					    all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def unsafeFromName(name: String): PgQueryParser =
 | 
				
			||||||
 | 
					    fromName(name).fold(sys.error, identity)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,31 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					final case class PsqlConfig(
 | 
				
			||||||
 | 
					    url: LenientUri,
 | 
				
			||||||
 | 
					    user: String,
 | 
				
			||||||
 | 
					    password: Password,
 | 
				
			||||||
 | 
					    pgConfigSelect: PartialFunction[Language, String],
 | 
				
			||||||
 | 
					    pgQueryParser: PgQueryParser,
 | 
				
			||||||
 | 
					    rankNormalization: RankNormalization
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object PsqlConfig {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
 | 
				
			||||||
 | 
					    PsqlConfig(
 | 
				
			||||||
 | 
					      url,
 | 
				
			||||||
 | 
					      user,
 | 
				
			||||||
 | 
					      password,
 | 
				
			||||||
 | 
					      PartialFunction.empty,
 | 
				
			||||||
 | 
					      PgQueryParser.Websearch,
 | 
				
			||||||
 | 
					      RankNormalization.Mhd && RankNormalization.Scale
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,155 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import scala.concurrent.ExecutionContext
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.effect._
 | 
				
			||||||
 | 
					import cats.implicits._
 | 
				
			||||||
 | 
					import fs2.Stream
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftsclient._
 | 
				
			||||||
 | 
					import docspell.logging.Logger
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import com.zaxxer.hikari.HikariDataSource
 | 
				
			||||||
 | 
					import doobie._
 | 
				
			||||||
 | 
					import doobie.hikari.HikariTransactor
 | 
				
			||||||
 | 
					import doobie.implicits._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
 | 
				
			||||||
 | 
					    extends FtsClient[F] {
 | 
				
			||||||
 | 
					  val engine = Ident.unsafe("postgres")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val config = cfg
 | 
				
			||||||
 | 
					  private[ftspsql] val transactor = xa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private[this] val searchSummary =
 | 
				
			||||||
 | 
					    FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
 | 
				
			||||||
 | 
					  private[this] val search =
 | 
				
			||||||
 | 
					    FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private[this] val replaceChunk =
 | 
				
			||||||
 | 
					    FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
 | 
				
			||||||
 | 
					  private[this] val updateChunk =
 | 
				
			||||||
 | 
					    FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def initialize: F[List[FtsMigration[F]]] =
 | 
				
			||||||
 | 
					    Sync[F].pure(
 | 
				
			||||||
 | 
					      List(
 | 
				
			||||||
 | 
					        FtsMigration(
 | 
				
			||||||
 | 
					          0,
 | 
				
			||||||
 | 
					          engine,
 | 
				
			||||||
 | 
					          "initialize",
 | 
				
			||||||
 | 
					          DbMigration[F](cfg).run.as(FtsMigration.Result.WorkDone)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def initializeNew: List[FtsMigration[F]] =
 | 
				
			||||||
 | 
					    List(
 | 
				
			||||||
 | 
					      FtsMigration(
 | 
				
			||||||
 | 
					        10,
 | 
				
			||||||
 | 
					        engine,
 | 
				
			||||||
 | 
					        "reset",
 | 
				
			||||||
 | 
					        FtsRepository.resetAll.transact(xa).as(FtsMigration.Result.workDone)
 | 
				
			||||||
 | 
					      ),
 | 
				
			||||||
 | 
					      FtsMigration(
 | 
				
			||||||
 | 
					        20,
 | 
				
			||||||
 | 
					        engine,
 | 
				
			||||||
 | 
					        "schema",
 | 
				
			||||||
 | 
					        DbMigration[F](cfg).run.as(FtsMigration.Result.workDone)
 | 
				
			||||||
 | 
					      ),
 | 
				
			||||||
 | 
					      FtsMigration(20, engine, "index all", FtsMigration.Result.indexAll.pure[F])
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def search(q: FtsQuery): F[FtsResult] =
 | 
				
			||||||
 | 
					    for {
 | 
				
			||||||
 | 
					      startNanos <- Sync[F].delay(System.nanoTime())
 | 
				
			||||||
 | 
					      summary <- searchSummary(q).transact(xa)
 | 
				
			||||||
 | 
					      results <- search(q, true).transact(xa)
 | 
				
			||||||
 | 
					      endNanos <- Sync[F].delay(System.nanoTime())
 | 
				
			||||||
 | 
					      duration = Duration.nanos(endNanos - startNanos)
 | 
				
			||||||
 | 
					      res = SearchResult
 | 
				
			||||||
 | 
					        .toFtsResult(summary, results)
 | 
				
			||||||
 | 
					        .copy(qtime = duration)
 | 
				
			||||||
 | 
					    } yield res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
 | 
				
			||||||
 | 
					    data
 | 
				
			||||||
 | 
					      .map(FtsRecord.fromTextData)
 | 
				
			||||||
 | 
					      .chunkN(50)
 | 
				
			||||||
 | 
					      .evalMap(chunk =>
 | 
				
			||||||
 | 
					        logger.debug(s"Add to fts index ${chunk.size} records") *>
 | 
				
			||||||
 | 
					          replaceChunk(chunk).transact(xa)
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					      .compile
 | 
				
			||||||
 | 
					      .drain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
 | 
				
			||||||
 | 
					    data
 | 
				
			||||||
 | 
					      .map(FtsRecord.fromTextData)
 | 
				
			||||||
 | 
					      .chunkN(50)
 | 
				
			||||||
 | 
					      .evalMap(chunk =>
 | 
				
			||||||
 | 
					        logger.debug(s"Update fts index with ${chunk.size} records") *>
 | 
				
			||||||
 | 
					          updateChunk(chunk).transact(xa)
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					      .compile
 | 
				
			||||||
 | 
					      .drain
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def updateFolder(
 | 
				
			||||||
 | 
					      logger: Logger[F],
 | 
				
			||||||
 | 
					      itemId: Ident,
 | 
				
			||||||
 | 
					      collective: Ident,
 | 
				
			||||||
 | 
					      folder: Option[Ident]
 | 
				
			||||||
 | 
					  ): F[Unit] =
 | 
				
			||||||
 | 
					    logger.debug(s"Update folder '${folder
 | 
				
			||||||
 | 
					        .map(_.id)}' in fts for collective ${collective.id} and item ${itemId.id}") *>
 | 
				
			||||||
 | 
					      FtsRepository.updateFolder(itemId, collective, folder).transact(xa).void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
 | 
				
			||||||
 | 
					    logger.debug(s"Removing item from fts index: ${itemId.id}") *>
 | 
				
			||||||
 | 
					      FtsRepository.deleteByItemId(itemId).transact(xa).void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
 | 
				
			||||||
 | 
					    logger.debug(s"Removing attachment from fts index: ${attachId.id}") *>
 | 
				
			||||||
 | 
					      FtsRepository.deleteByAttachId(attachId).transact(xa).void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def clearAll(logger: Logger[F]): F[Unit] =
 | 
				
			||||||
 | 
					    logger.info(s"Deleting complete FTS index") *>
 | 
				
			||||||
 | 
					      FtsRepository.deleteAll.transact(xa).void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def clear(logger: Logger[F], collective: Ident): F[Unit] =
 | 
				
			||||||
 | 
					    logger.info(s"Deleting index for collective ${collective.id}") *>
 | 
				
			||||||
 | 
					      FtsRepository.delete(collective).transact(xa).void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object PsqlFtsClient {
 | 
				
			||||||
 | 
					  def apply[F[_]: Async](
 | 
				
			||||||
 | 
					      cfg: PsqlConfig,
 | 
				
			||||||
 | 
					      connectEC: ExecutionContext
 | 
				
			||||||
 | 
					  ): Resource[F, PsqlFtsClient[F]] = {
 | 
				
			||||||
 | 
					    val acquire = Sync[F].delay(new HikariDataSource())
 | 
				
			||||||
 | 
					    val free: HikariDataSource => F[Unit] = ds => Sync[F].delay(ds.close())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for {
 | 
				
			||||||
 | 
					      ds <- Resource.make(acquire)(free)
 | 
				
			||||||
 | 
					      _ = Resource.pure {
 | 
				
			||||||
 | 
					        ds.setJdbcUrl(cfg.url.asString)
 | 
				
			||||||
 | 
					        ds.setUsername(cfg.user)
 | 
				
			||||||
 | 
					        ds.setPassword(cfg.password.pass)
 | 
				
			||||||
 | 
					        ds.setDriverClassName("org.postgresql.Driver")
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      xa = HikariTransactor[F](ds, connectEC)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      pc = new PsqlFtsClient[F](cfg, xa)
 | 
				
			||||||
 | 
					    } yield pc
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
 | 
				
			||||||
 | 
					    new PsqlFtsClient[F](cfg, xa)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,46 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.Order
 | 
				
			||||||
 | 
					import cats.data.NonEmptySet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					sealed trait RankNormalization { self =>
 | 
				
			||||||
 | 
					  def value: NonEmptySet[Int]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def &&(other: RankNormalization): RankNormalization =
 | 
				
			||||||
 | 
					    new RankNormalization { val value = self.value ++ other.value }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object RankNormalization {
 | 
				
			||||||
 | 
					// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
 | 
				
			||||||
 | 
					  case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
 | 
				
			||||||
 | 
					  case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
 | 
				
			||||||
 | 
					  case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
 | 
				
			||||||
 | 
					  case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
 | 
				
			||||||
 | 
					  case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
 | 
				
			||||||
 | 
					  case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def byNumber(n: Int): Either[String, RankNormalization] =
 | 
				
			||||||
 | 
					    all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit val order: Order[RankNormalization] =
 | 
				
			||||||
 | 
					    Order.by(_.value.reduce)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val all: NonEmptySet[RankNormalization] =
 | 
				
			||||||
 | 
					    NonEmptySet.of(
 | 
				
			||||||
 | 
					      IgnoreDocLength,
 | 
				
			||||||
 | 
					      LogDocLength,
 | 
				
			||||||
 | 
					      DocLength,
 | 
				
			||||||
 | 
					      Mhd,
 | 
				
			||||||
 | 
					      UniqueWords,
 | 
				
			||||||
 | 
					      LogUniqueWords,
 | 
				
			||||||
 | 
					      Scale
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,53 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftsclient.FtsResult
 | 
				
			||||||
 | 
					import docspell.ftsclient.FtsResult.{ItemMatch, MatchData}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					final case class SearchResult(
 | 
				
			||||||
 | 
					    id: Ident,
 | 
				
			||||||
 | 
					    itemId: Ident,
 | 
				
			||||||
 | 
					    collective: Ident,
 | 
				
			||||||
 | 
					    language: Language,
 | 
				
			||||||
 | 
					    attachId: Option[Ident],
 | 
				
			||||||
 | 
					    folderId: Option[Ident],
 | 
				
			||||||
 | 
					    attachName: Option[String],
 | 
				
			||||||
 | 
					    itemName: Option[String],
 | 
				
			||||||
 | 
					    rank: Double,
 | 
				
			||||||
 | 
					    highlight: Option[String]
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					object SearchResult {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def toFtsResult(summary: SearchSummary, results: Vector[SearchResult]): FtsResult = {
 | 
				
			||||||
 | 
					    def mkEntry(r: SearchResult): (ItemMatch, (Ident, List[String])) = {
 | 
				
			||||||
 | 
					      def create(md: MatchData) = ItemMatch(r.id, r.itemId, r.collective, r.rank, md)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      val itemMatch =
 | 
				
			||||||
 | 
					        r.attachId match {
 | 
				
			||||||
 | 
					          case Some(aId) =>
 | 
				
			||||||
 | 
					            create(FtsResult.AttachmentData(aId, r.attachName.getOrElse("")))
 | 
				
			||||||
 | 
					          case None =>
 | 
				
			||||||
 | 
					            create(FtsResult.ItemData)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (itemMatch, r.id -> r.highlight.toList)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    val (items, hl) = results.map(mkEntry).unzip
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    FtsResult(
 | 
				
			||||||
 | 
					      Duration.zero,
 | 
				
			||||||
 | 
					      summary.count.toInt,
 | 
				
			||||||
 | 
					      summary.maxScore,
 | 
				
			||||||
 | 
					      hl.toMap,
 | 
				
			||||||
 | 
					      items.toList
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					case class SearchSummary(count: Long, maxScore: Double)
 | 
				
			||||||
@@ -0,0 +1,52 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.effect._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.logging.TestLoggingConfig
 | 
				
			||||||
 | 
					import docspell.logging.{Level, LogConfig}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
				
			||||||
 | 
					import com.dimafeng.testcontainers.munit.TestContainerForAll
 | 
				
			||||||
 | 
					import doobie.implicits._
 | 
				
			||||||
 | 
					import munit.CatsEffectSuite
 | 
				
			||||||
 | 
					import org.testcontainers.utility.DockerImageName
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class MigrationTest
 | 
				
			||||||
 | 
					    extends CatsEffectSuite
 | 
				
			||||||
 | 
					    with PgFixtures
 | 
				
			||||||
 | 
					    with TestContainerForAll
 | 
				
			||||||
 | 
					    with TestLoggingConfig {
 | 
				
			||||||
 | 
					  override val containerDef: PostgreSQLContainer.Def =
 | 
				
			||||||
 | 
					    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  override def docspellLogConfig: LogConfig =
 | 
				
			||||||
 | 
					    LogConfig(Level.Debug, LogConfig.Format.Fancy)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  override def rootMinimumLevel = Level.Warn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test("create schema") {
 | 
				
			||||||
 | 
					    withContainers { cnt =>
 | 
				
			||||||
 | 
					      val jdbc =
 | 
				
			||||||
 | 
					        PsqlConfig.defaults(
 | 
				
			||||||
 | 
					          LenientUri.unsafe(cnt.jdbcUrl),
 | 
				
			||||||
 | 
					          cnt.username,
 | 
				
			||||||
 | 
					          Password(cnt.password)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for {
 | 
				
			||||||
 | 
					        _ <- DbMigration[IO](jdbc).run
 | 
				
			||||||
 | 
					        n <- runQuery(cnt)(
 | 
				
			||||||
 | 
					          sql"SELECT count(*) FROM ${FtsRepository.table}".query[Int].unique
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        _ = assertEquals(n, 0)
 | 
				
			||||||
 | 
					      } yield ()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,77 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import javax.sql.DataSource
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.effect._
 | 
				
			||||||
 | 
					import cats.syntax.all._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.ftsclient.TextData
 | 
				
			||||||
 | 
					import docspell.store.{JdbcConfig, StoreFixture}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
				
			||||||
 | 
					import doobie._
 | 
				
			||||||
 | 
					import doobie.implicits._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					trait PgFixtures {
 | 
				
			||||||
 | 
					  def ident(n: String): Ident = Ident.unsafe(n)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def psqlConfig(cnt: PostgreSQLContainer): PsqlConfig =
 | 
				
			||||||
 | 
					    PsqlConfig.defaults(
 | 
				
			||||||
 | 
					      LenientUri.unsafe(cnt.jdbcUrl),
 | 
				
			||||||
 | 
					      cnt.username,
 | 
				
			||||||
 | 
					      Password(cnt.password)
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def jdbcConfig(cnt: PostgreSQLContainer): JdbcConfig =
 | 
				
			||||||
 | 
					    JdbcConfig(LenientUri.unsafe(cnt.jdbcUrl), cnt.username, cnt.password)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def dataSource(cnt: PostgreSQLContainer): Resource[IO, DataSource] =
 | 
				
			||||||
 | 
					    StoreFixture.dataSource(jdbcConfig(cnt))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def transactor(cnt: PostgreSQLContainer): Resource[IO, Transactor[IO]] =
 | 
				
			||||||
 | 
					    dataSource(cnt).flatMap(StoreFixture.makeXA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def psqlFtsClient(cnt: PostgreSQLContainer): Resource[IO, PsqlFtsClient[IO]] =
 | 
				
			||||||
 | 
					    transactor(cnt)
 | 
				
			||||||
 | 
					      .map(xa => PsqlFtsClient.fromTransactor(psqlConfig(cnt), xa))
 | 
				
			||||||
 | 
					      .evalTap(client => DbMigration[IO](client.config).run)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def runQuery[A](cnt: PostgreSQLContainer)(q: ConnectionIO[A]): IO[A] =
 | 
				
			||||||
 | 
					    transactor(cnt).use(q.transact(_))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  implicit class QueryOps[A](self: ConnectionIO[A]) {
 | 
				
			||||||
 | 
					    def exec(implicit client: PsqlFtsClient[IO]): IO[A] =
 | 
				
			||||||
 | 
					      self.transact(client.transactor)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val collective1 = ident("coll1")
 | 
				
			||||||
 | 
					  val collective2 = ident("coll2")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val itemData: TextData.Item =
 | 
				
			||||||
 | 
					    TextData.Item(
 | 
				
			||||||
 | 
					      item = ident("item-id-1"),
 | 
				
			||||||
 | 
					      collective = collective1,
 | 
				
			||||||
 | 
					      folder = None,
 | 
				
			||||||
 | 
					      name = "mydoc.pdf".some,
 | 
				
			||||||
 | 
					      notes = Some("my notes are these"),
 | 
				
			||||||
 | 
					      language = Language.English
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val attachData: TextData.Attachment =
 | 
				
			||||||
 | 
					    TextData.Attachment(
 | 
				
			||||||
 | 
					      item = ident("item-id-1"),
 | 
				
			||||||
 | 
					      attachId = ident("attach-id-1"),
 | 
				
			||||||
 | 
					      collective = collective1,
 | 
				
			||||||
 | 
					      folder = None,
 | 
				
			||||||
 | 
					      language = Language.English,
 | 
				
			||||||
 | 
					      name = "mydoc.pdf".some,
 | 
				
			||||||
 | 
					      text = "lorem ipsum dolores est".some
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -0,0 +1,151 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Copyright 2020 Eike K. & Contributors
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * SPDX-License-Identifier: AGPL-3.0-or-later
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package docspell.ftspsql
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import cats.effect._
 | 
				
			||||||
 | 
					import cats.syntax.all._
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import docspell.ftsclient.{FtsQuery, TextData}
 | 
				
			||||||
 | 
					import docspell.logging.{Level, LogConfig, TestLoggingConfig}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import com.dimafeng.testcontainers.PostgreSQLContainer
 | 
				
			||||||
 | 
					import com.dimafeng.testcontainers.munit.TestContainerForAll
 | 
				
			||||||
 | 
					import doobie.implicits._
 | 
				
			||||||
 | 
					import munit.CatsEffectSuite
 | 
				
			||||||
 | 
					import org.testcontainers.utility.DockerImageName
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PsqlFtsClientTest
 | 
				
			||||||
 | 
					    extends CatsEffectSuite
 | 
				
			||||||
 | 
					    with PgFixtures
 | 
				
			||||||
 | 
					    with TestContainerForAll
 | 
				
			||||||
 | 
					    with TestLoggingConfig {
 | 
				
			||||||
 | 
					  override val containerDef: PostgreSQLContainer.Def =
 | 
				
			||||||
 | 
					    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  val logger = docspell.logging.getLogger[IO]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private val table = FtsRepository.table
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  override def docspellLogConfig: LogConfig =
 | 
				
			||||||
 | 
					    LogConfig(Level.Debug, LogConfig.Format.Fancy)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  override def rootMinimumLevel = Level.Warn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test("insert data into index") {
 | 
				
			||||||
 | 
					    withContainers { cnt =>
 | 
				
			||||||
 | 
					      psqlFtsClient(cnt).use { implicit client =>
 | 
				
			||||||
 | 
					        def assertions(id: TextData.Item, ad: TextData.Attachment) =
 | 
				
			||||||
 | 
					          for {
 | 
				
			||||||
 | 
					            n <- sql"SELECT count(*) from $table".query[Int].unique.exec
 | 
				
			||||||
 | 
					            _ = assertEquals(n, 2)
 | 
				
			||||||
 | 
					            itemStored <-
 | 
				
			||||||
 | 
					              sql"select item_name, item_notes from $table WHERE id = ${id.id}"
 | 
				
			||||||
 | 
					                .query[(Option[String], Option[String])]
 | 
				
			||||||
 | 
					                .unique
 | 
				
			||||||
 | 
					                .exec
 | 
				
			||||||
 | 
					            _ = assertEquals(itemStored, (id.name, id.notes))
 | 
				
			||||||
 | 
					            attachStored <-
 | 
				
			||||||
 | 
					              sql"select attach_name, attach_content from $table where id = ${ad.id}"
 | 
				
			||||||
 | 
					                .query[(Option[String], Option[String])]
 | 
				
			||||||
 | 
					                .unique
 | 
				
			||||||
 | 
					                .exec
 | 
				
			||||||
 | 
					            _ = assertEquals(attachStored, (ad.name, ad.text))
 | 
				
			||||||
 | 
					          } yield ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for {
 | 
				
			||||||
 | 
					          _ <- client.indexData(logger, itemData, attachData)
 | 
				
			||||||
 | 
					          _ <- assertions(itemData, attachData)
 | 
				
			||||||
 | 
					          _ <- client.indexData(logger, itemData, attachData)
 | 
				
			||||||
 | 
					          _ <- assertions(itemData, attachData)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          _ <- client.indexData(
 | 
				
			||||||
 | 
					            logger,
 | 
				
			||||||
 | 
					            itemData.copy(notes = None),
 | 
				
			||||||
 | 
					            attachData.copy(name = "ha.pdf".some)
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					          _ <- assertions(
 | 
				
			||||||
 | 
					            itemData.copy(notes = None),
 | 
				
			||||||
 | 
					            attachData.copy(name = "ha.pdf".some)
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					        } yield ()
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test("clear index") {
 | 
				
			||||||
 | 
					    withContainers { cnt =>
 | 
				
			||||||
 | 
					      psqlFtsClient(cnt).use { implicit client =>
 | 
				
			||||||
 | 
					        for {
 | 
				
			||||||
 | 
					          _ <- client.indexData(logger, itemData, attachData)
 | 
				
			||||||
 | 
					          _ <- client.clearAll(logger)
 | 
				
			||||||
 | 
					          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
				
			||||||
 | 
					          _ = assertEquals(n, 0)
 | 
				
			||||||
 | 
					        } yield ()
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test("clear index by collective") {
 | 
				
			||||||
 | 
					    withContainers { cnt =>
 | 
				
			||||||
 | 
					      psqlFtsClient(cnt).use { implicit client =>
 | 
				
			||||||
 | 
					        for {
 | 
				
			||||||
 | 
					          _ <- client.indexData(
 | 
				
			||||||
 | 
					            logger,
 | 
				
			||||||
 | 
					            itemData,
 | 
				
			||||||
 | 
					            attachData,
 | 
				
			||||||
 | 
					            itemData.copy(collective = collective2, item = ident("item-id-2")),
 | 
				
			||||||
 | 
					            attachData.copy(collective = collective2, item = ident("item-id-2"))
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
				
			||||||
 | 
					          _ = assertEquals(n, 4)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          _ <- client.clear(logger, collective1)
 | 
				
			||||||
 | 
					          n <- sql"select count(*) from $table".query[Int].unique.exec
 | 
				
			||||||
 | 
					          _ = assertEquals(n, 2)
 | 
				
			||||||
 | 
					        } yield ()
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  test("search by query") {
 | 
				
			||||||
 | 
					    def query(s: String): FtsQuery =
 | 
				
			||||||
 | 
					      FtsQuery(
 | 
				
			||||||
 | 
					        q = s,
 | 
				
			||||||
 | 
					        collective = collective1,
 | 
				
			||||||
 | 
					        items = Set.empty,
 | 
				
			||||||
 | 
					        folders = Set.empty,
 | 
				
			||||||
 | 
					        limit = 10,
 | 
				
			||||||
 | 
					        offset = 0,
 | 
				
			||||||
 | 
					        highlight = FtsQuery.HighlightSetting.default
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    withContainers { cnt =>
 | 
				
			||||||
 | 
					      psqlFtsClient(cnt).use { implicit client =>
 | 
				
			||||||
 | 
					        for {
 | 
				
			||||||
 | 
					          _ <- client.indexData(
 | 
				
			||||||
 | 
					            logger,
 | 
				
			||||||
 | 
					            itemData,
 | 
				
			||||||
 | 
					            attachData,
 | 
				
			||||||
 | 
					            itemData.copy(collective = collective2, item = ident("item-id-2")),
 | 
				
			||||||
 | 
					            attachData.copy(collective = collective2, item = ident("item-id-2"))
 | 
				
			||||||
 | 
					          )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          res0 <- client.search(query("lorem uiaeduiae"))
 | 
				
			||||||
 | 
					          _ = assertEquals(res0.count, 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          res1 <- client.search(query("lorem"))
 | 
				
			||||||
 | 
					          _ = assertEquals(res1.count, 1)
 | 
				
			||||||
 | 
					          _ = assertEquals(res1.results.head.id, attachData.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          res2 <- client.search(query("note"))
 | 
				
			||||||
 | 
					          _ = assertEquals(res2.count, 1)
 | 
				
			||||||
 | 
					          _ = assertEquals(res2.results.head.id, itemData.id)
 | 
				
			||||||
 | 
					        } yield ()
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -22,7 +22,7 @@ trait JsonCodec {
 | 
				
			|||||||
    new Encoder[TextData.Attachment] {
 | 
					    new Encoder[TextData.Attachment] {
 | 
				
			||||||
      final def apply(td: TextData.Attachment): Json = {
 | 
					      final def apply(td: TextData.Attachment): Json = {
 | 
				
			||||||
        val cnt =
 | 
					        val cnt =
 | 
				
			||||||
          (Field.contentField(td.lang).name, Json.fromString(td.text.getOrElse("")))
 | 
					          (Field.contentField(td.language).name, Json.fromString(td.text.getOrElse("")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        Json.fromFields(
 | 
					        Json.fromFields(
 | 
				
			||||||
          cnt :: List(
 | 
					          cnt :: List(
 | 
				
			||||||
@@ -165,7 +165,7 @@ trait JsonCodec {
 | 
				
			|||||||
        val setter = List(
 | 
					        val setter = List(
 | 
				
			||||||
          td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
 | 
					          td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
 | 
				
			||||||
          td.text.map(txt =>
 | 
					          td.text.map(txt =>
 | 
				
			||||||
            (Field.contentField(td.lang).name, Map("set" -> txt.asJson).asJson)
 | 
					            (Field.contentField(td.language).name, Map("set" -> txt.asJson).asJson)
 | 
				
			||||||
          )
 | 
					          )
 | 
				
			||||||
        ).flatten
 | 
					        ).flatten
 | 
				
			||||||
        Json.fromFields(
 | 
					        Json.fromFields(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -697,6 +697,9 @@ Docpell Update Check
 | 
				
			|||||||
    # Currently the SOLR search platform is supported.
 | 
					    # Currently the SOLR search platform is supported.
 | 
				
			||||||
    enabled = false
 | 
					    enabled = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Which backend to use, either solr or postgresql
 | 
				
			||||||
 | 
					    backend = "solr"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Configuration for the SOLR backend.
 | 
					    # Configuration for the SOLR backend.
 | 
				
			||||||
    solr = {
 | 
					    solr = {
 | 
				
			||||||
      # The URL to solr
 | 
					      # The URL to solr
 | 
				
			||||||
@@ -713,6 +716,43 @@ Docpell Update Check
 | 
				
			|||||||
      q-op = "OR"
 | 
					      q-op = "OR"
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Configuration for PostgreSQL backend
 | 
				
			||||||
 | 
					    postgresql = {
 | 
				
			||||||
 | 
					      # Whether to use the default database, only works if it is
 | 
				
			||||||
 | 
					      # postgresql
 | 
				
			||||||
 | 
					      use-default-connection = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # The database connection.
 | 
				
			||||||
 | 
					      jdbc {
 | 
				
			||||||
 | 
					        url = "jdbc:postgresql://server:5432/db"
 | 
				
			||||||
 | 
					        user = "pguser"
 | 
				
			||||||
 | 
					        password = ""
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # A mapping from a language to a postgres text search config. By
 | 
				
			||||||
 | 
					      # default a language is mapped to a predefined config.
 | 
				
			||||||
 | 
					      # PostgreSQL has predefined configs for some languages. This
 | 
				
			||||||
 | 
					      # setting allows to create a custom text search config and
 | 
				
			||||||
 | 
					      # define it here for some or all languages.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # Example:
 | 
				
			||||||
 | 
					      #  { german = "my-german" }
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
 | 
				
			||||||
 | 
					      pg-config = {
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Define which query parser to use.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
 | 
				
			||||||
 | 
					      pg-query-parser = "websearch_to_tsquery"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Allows to define a normalization for the ranking.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
				
			||||||
 | 
					      pg-rank-normalization = [ 4 ]
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Settings for running the index migration tasks
 | 
					    # Settings for running the index migration tasks
 | 
				
			||||||
    migration = {
 | 
					    migration = {
 | 
				
			||||||
      # Chunk size to use when indexing data from the database. This
 | 
					      # Chunk size to use when indexing data from the database. This
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -13,6 +13,7 @@ import docspell.analysis.TextAnalysisConfig
 | 
				
			|||||||
import docspell.analysis.classifier.TextClassifierConfig
 | 
					import docspell.analysis.classifier.TextClassifierConfig
 | 
				
			||||||
import docspell.backend.Config.Files
 | 
					import docspell.backend.Config.Files
 | 
				
			||||||
import docspell.common._
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.config.{FtsType, PgFtsConfig}
 | 
				
			||||||
import docspell.convert.ConvertConfig
 | 
					import docspell.convert.ConvertConfig
 | 
				
			||||||
import docspell.extract.ExtractConfig
 | 
					import docspell.extract.ExtractConfig
 | 
				
			||||||
import docspell.ftssolr.SolrConfig
 | 
					import docspell.ftssolr.SolrConfig
 | 
				
			||||||
@@ -65,9 +66,25 @@ object Config {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  case class FullTextSearch(
 | 
					  case class FullTextSearch(
 | 
				
			||||||
      enabled: Boolean,
 | 
					      enabled: Boolean,
 | 
				
			||||||
 | 
					      backend: FtsType,
 | 
				
			||||||
      migration: FullTextSearch.Migration,
 | 
					      migration: FullTextSearch.Migration,
 | 
				
			||||||
      solr: SolrConfig
 | 
					      solr: SolrConfig,
 | 
				
			||||||
  )
 | 
					      postgresql: PgFtsConfig
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def info: String =
 | 
				
			||||||
 | 
					      if (!enabled) "Disabled."
 | 
				
			||||||
 | 
					      else
 | 
				
			||||||
 | 
					        backend match {
 | 
				
			||||||
 | 
					          case FtsType.Solr =>
 | 
				
			||||||
 | 
					            s"Solr(${solr.url.asString})"
 | 
				
			||||||
 | 
					          case FtsType.PostgreSQL =>
 | 
				
			||||||
 | 
					            if (postgresql.useDefaultConnection)
 | 
				
			||||||
 | 
					              "PostgreSQL(default)"
 | 
				
			||||||
 | 
					            else
 | 
				
			||||||
 | 
					              s"PostgreSQL(${postgresql.jdbc.url.asString})"
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  object FullTextSearch {
 | 
					  object FullTextSearch {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,7 +9,7 @@ package docspell.joex
 | 
				
			|||||||
import cats.effect.Async
 | 
					import cats.effect.Async
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import docspell.config.Implicits._
 | 
					import docspell.config.Implicits._
 | 
				
			||||||
import docspell.config.{ConfigFactory, Validation}
 | 
					import docspell.config.{ConfigFactory, FtsType, Validation}
 | 
				
			||||||
import docspell.scheduler.CountingScheme
 | 
					import docspell.scheduler.CountingScheme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import emil.MailAddress
 | 
					import emil.MailAddress
 | 
				
			||||||
@@ -53,6 +53,14 @@ object ConfigFile {
 | 
				
			|||||||
        cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
 | 
					        cfg => cfg.updateCheck.enabled && cfg.updateCheck.subject.els.isEmpty,
 | 
				
			||||||
        "No subject given for enabled update check!"
 | 
					        "No subject given for enabled update check!"
 | 
				
			||||||
      ),
 | 
					      ),
 | 
				
			||||||
      Validation(cfg => cfg.files.validate.map(_ => cfg))
 | 
					      Validation(cfg => cfg.files.validate.map(_ => cfg)),
 | 
				
			||||||
 | 
					      Validation.failWhen(
 | 
				
			||||||
 | 
					        cfg =>
 | 
				
			||||||
 | 
					          cfg.fullTextSearch.enabled &&
 | 
				
			||||||
 | 
					            cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
 | 
				
			||||||
 | 
					            cfg.fullTextSearch.postgresql.useDefaultConnection &&
 | 
				
			||||||
 | 
					            !cfg.jdbc.dbmsName.contains("postgresql"),
 | 
				
			||||||
 | 
					        s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -102,7 +102,8 @@ object JoexAppImpl extends MailAddressCodec {
 | 
				
			|||||||
      termSignal: SignallingRef[F, Boolean],
 | 
					      termSignal: SignallingRef[F, Boolean],
 | 
				
			||||||
      store: Store[F],
 | 
					      store: Store[F],
 | 
				
			||||||
      httpClient: Client[F],
 | 
					      httpClient: Client[F],
 | 
				
			||||||
      pubSub: PubSub[F]
 | 
					      pubSub: PubSub[F],
 | 
				
			||||||
 | 
					      pools: Pools
 | 
				
			||||||
  ): Resource[F, JoexApp[F]] =
 | 
					  ): Resource[F, JoexApp[F]] =
 | 
				
			||||||
    for {
 | 
					    for {
 | 
				
			||||||
      joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
 | 
					      joexLogger <- Resource.pure(docspell.logging.getLogger[F](s"joex-${cfg.appId.id}"))
 | 
				
			||||||
@@ -120,6 +121,7 @@ object JoexAppImpl extends MailAddressCodec {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
      tasks <- JoexTasks.resource(
 | 
					      tasks <- JoexTasks.resource(
 | 
				
			||||||
        cfg,
 | 
					        cfg,
 | 
				
			||||||
 | 
					        pools,
 | 
				
			||||||
        jobStoreModule,
 | 
					        jobStoreModule,
 | 
				
			||||||
        httpClient,
 | 
					        httpClient,
 | 
				
			||||||
        pubSubT,
 | 
					        pubSubT,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -52,7 +52,7 @@ object JoexServer {
 | 
				
			|||||||
        httpClient
 | 
					        httpClient
 | 
				
			||||||
      )(Topics.all.map(_.topic))
 | 
					      )(Topics.all.map(_.topic))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub)
 | 
					      joexApp <- JoexAppImpl.create[F](cfg, signal, store, httpClient, pubSub, pools)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      httpApp = Router(
 | 
					      httpApp = Router(
 | 
				
			||||||
        "/internal" -> InternalHeader(settings.internalRouteKey) {
 | 
					        "/internal" -> InternalHeader(settings.internalRouteKey) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -12,7 +12,9 @@ import docspell.analysis.TextAnalyser
 | 
				
			|||||||
import docspell.backend.fulltext.CreateIndex
 | 
					import docspell.backend.fulltext.CreateIndex
 | 
				
			||||||
import docspell.backend.ops._
 | 
					import docspell.backend.ops._
 | 
				
			||||||
import docspell.common._
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.config.FtsType
 | 
				
			||||||
import docspell.ftsclient.FtsClient
 | 
					import docspell.ftsclient.FtsClient
 | 
				
			||||||
 | 
					import docspell.ftspsql.PsqlFtsClient
 | 
				
			||||||
import docspell.ftssolr.SolrFtsClient
 | 
					import docspell.ftssolr.SolrFtsClient
 | 
				
			||||||
import docspell.joex.analysis.RegexNerFile
 | 
					import docspell.joex.analysis.RegexNerFile
 | 
				
			||||||
import docspell.joex.emptytrash.EmptyTrashTask
 | 
					import docspell.joex.emptytrash.EmptyTrashTask
 | 
				
			||||||
@@ -211,6 +213,7 @@ object JoexTasks {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  def resource[F[_]: Async](
 | 
					  def resource[F[_]: Async](
 | 
				
			||||||
      cfg: Config,
 | 
					      cfg: Config,
 | 
				
			||||||
 | 
					      pools: Pools,
 | 
				
			||||||
      jobStoreModule: JobStoreModuleBuilder.Module[F],
 | 
					      jobStoreModule: JobStoreModuleBuilder.Module[F],
 | 
				
			||||||
      httpClient: Client[F],
 | 
					      httpClient: Client[F],
 | 
				
			||||||
      pubSub: PubSubT[F],
 | 
					      pubSub: PubSubT[F],
 | 
				
			||||||
@@ -221,7 +224,7 @@ object JoexTasks {
 | 
				
			|||||||
      joex <- OJoex(pubSub)
 | 
					      joex <- OJoex(pubSub)
 | 
				
			||||||
      store = jobStoreModule.store
 | 
					      store = jobStoreModule.store
 | 
				
			||||||
      upload <- OUpload(store, jobStoreModule.jobs)
 | 
					      upload <- OUpload(store, jobStoreModule.jobs)
 | 
				
			||||||
      fts <- createFtsClient(cfg)(httpClient)
 | 
					      fts <- createFtsClient(cfg, pools, store, httpClient)
 | 
				
			||||||
      createIndex <- CreateIndex.resource(fts, store)
 | 
					      createIndex <- CreateIndex.resource(fts, store)
 | 
				
			||||||
      itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs)
 | 
					      itemOps <- OItem(store, fts, createIndex, jobStoreModule.jobs)
 | 
				
			||||||
      itemSearchOps <- OItemSearch(store)
 | 
					      itemSearchOps <- OItemSearch(store)
 | 
				
			||||||
@@ -249,8 +252,24 @@ object JoexTasks {
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private def createFtsClient[F[_]: Async](
 | 
					  private def createFtsClient[F[_]: Async](
 | 
				
			||||||
      cfg: Config
 | 
					      cfg: Config,
 | 
				
			||||||
  )(client: Client[F]): Resource[F, FtsClient[F]] =
 | 
					      pools: Pools,
 | 
				
			||||||
    if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
					      store: Store[F],
 | 
				
			||||||
 | 
					      client: Client[F]
 | 
				
			||||||
 | 
					  ): Resource[F, FtsClient[F]] =
 | 
				
			||||||
 | 
					    if (cfg.fullTextSearch.enabled)
 | 
				
			||||||
 | 
					      cfg.fullTextSearch.backend match {
 | 
				
			||||||
 | 
					        case FtsType.Solr =>
 | 
				
			||||||
 | 
					          SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        case FtsType.PostgreSQL =>
 | 
				
			||||||
 | 
					          val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.jdbc)
 | 
				
			||||||
 | 
					          if (cfg.fullTextSearch.postgresql.useDefaultConnection)
 | 
				
			||||||
 | 
					            Resource.pure[F, FtsClient[F]](
 | 
				
			||||||
 | 
					              new PsqlFtsClient[F](psqlCfg, store.transactor)
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
					            PsqlFtsClient(psqlCfg, pools.connectEC)
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
					    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -31,7 +31,7 @@ object Main extends IOApp {
 | 
				
			|||||||
        Option(System.getProperty("config.file")),
 | 
					        Option(System.getProperty("config.file")),
 | 
				
			||||||
        cfg.appId,
 | 
					        cfg.appId,
 | 
				
			||||||
        cfg.baseUrl,
 | 
					        cfg.baseUrl,
 | 
				
			||||||
        Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
 | 
					        Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
 | 
				
			||||||
        cfg.files.defaultStoreConfig
 | 
					        cfg.files.defaultStoreConfig
 | 
				
			||||||
      )
 | 
					      )
 | 
				
			||||||
      _ <- logger.info(s"\n${banner.render("***>")}")
 | 
					      _ <- logger.info(s"\n${banner.render("***>")}")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,9 +47,10 @@ object TextExtraction {
 | 
				
			|||||||
          ctx.args.meta.collective,
 | 
					          ctx.args.meta.collective,
 | 
				
			||||||
          ctx.args.meta.folderId,
 | 
					          ctx.args.meta.folderId,
 | 
				
			||||||
          item.item.name.some,
 | 
					          item.item.name.some,
 | 
				
			||||||
          None
 | 
					          None,
 | 
				
			||||||
 | 
					          ctx.args.meta.language
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)).toSeq: _*)
 | 
					        _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)): _*)
 | 
				
			||||||
        dur <- start
 | 
					        dur <- start
 | 
				
			||||||
        extractedTags = txt.flatMap(_.tags).distinct.toList
 | 
					        extractedTags = txt.flatMap(_.tags).distinct.toList
 | 
				
			||||||
        _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.")
 | 
					        _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}.")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -289,6 +289,9 @@ docspell.server {
 | 
				
			|||||||
    # Currently the SOLR search platform is supported.
 | 
					    # Currently the SOLR search platform is supported.
 | 
				
			||||||
    enabled = false
 | 
					    enabled = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Which backend to use, either solr or postgresql
 | 
				
			||||||
 | 
					    backend = "solr"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Configuration for the SOLR backend.
 | 
					    # Configuration for the SOLR backend.
 | 
				
			||||||
    solr = {
 | 
					    solr = {
 | 
				
			||||||
      # The URL to solr
 | 
					      # The URL to solr
 | 
				
			||||||
@@ -304,6 +307,43 @@ docspell.server {
 | 
				
			|||||||
      # The default combiner for tokens. One of {AND, OR}.
 | 
					      # The default combiner for tokens. One of {AND, OR}.
 | 
				
			||||||
      q-op = "OR"
 | 
					      q-op = "OR"
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Configuration for PostgreSQL backend
 | 
				
			||||||
 | 
					    postgresql = {
 | 
				
			||||||
 | 
					      # Whether to use the default database, only works if it is
 | 
				
			||||||
 | 
					      # postgresql
 | 
				
			||||||
 | 
					      use-default-connection = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # The database connection.
 | 
				
			||||||
 | 
					      jdbc {
 | 
				
			||||||
 | 
					        url = "jdbc:postgresql://server:5432/db"
 | 
				
			||||||
 | 
					        user = "pguser"
 | 
				
			||||||
 | 
					        password = ""
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # A mapping from a language to a postgres text search config. By
 | 
				
			||||||
 | 
					      # default a language is mapped to a predefined config.
 | 
				
			||||||
 | 
					      # PostgreSQL has predefined configs for some languages. This
 | 
				
			||||||
 | 
					      # setting allows to create a custom text search config and
 | 
				
			||||||
 | 
					      # define it here for some or all languages.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # Example:
 | 
				
			||||||
 | 
					      #  { german = "my-german" }
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # See https://www.postgresql.org/docs/14/textsearch-tables.html ff.
 | 
				
			||||||
 | 
					      pg-config = {
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Define which query parser to use.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
 | 
				
			||||||
 | 
					      pg-query-parser = "websearch_to_tsquery"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Allows to define a normalization for the ranking.
 | 
				
			||||||
 | 
					      #
 | 
				
			||||||
 | 
					      # https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
 | 
				
			||||||
 | 
					      pg-rank-normalization = [ 4 ]
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  # Configuration for the backend.
 | 
					  # Configuration for the backend.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,6 +9,7 @@ package docspell.restserver
 | 
				
			|||||||
import docspell.backend.auth.Login
 | 
					import docspell.backend.auth.Login
 | 
				
			||||||
import docspell.backend.{Config => BackendConfig}
 | 
					import docspell.backend.{Config => BackendConfig}
 | 
				
			||||||
import docspell.common._
 | 
					import docspell.common._
 | 
				
			||||||
 | 
					import docspell.config.{FtsType, PgFtsConfig}
 | 
				
			||||||
import docspell.ftssolr.SolrConfig
 | 
					import docspell.ftssolr.SolrConfig
 | 
				
			||||||
import docspell.logging.LogConfig
 | 
					import docspell.logging.LogConfig
 | 
				
			||||||
import docspell.oidc.ProviderConfig
 | 
					import docspell.oidc.ProviderConfig
 | 
				
			||||||
@@ -92,7 +93,26 @@ object Config {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
 | 
					  case class FullTextSearch(
 | 
				
			||||||
 | 
					      enabled: Boolean,
 | 
				
			||||||
 | 
					      backend: FtsType,
 | 
				
			||||||
 | 
					      solr: SolrConfig,
 | 
				
			||||||
 | 
					      postgresql: PgFtsConfig
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def info: String =
 | 
				
			||||||
 | 
					      if (!enabled) "Disabled."
 | 
				
			||||||
 | 
					      else
 | 
				
			||||||
 | 
					        backend match {
 | 
				
			||||||
 | 
					          case FtsType.Solr =>
 | 
				
			||||||
 | 
					            s"Solr(${solr.url.asString})"
 | 
				
			||||||
 | 
					          case FtsType.PostgreSQL =>
 | 
				
			||||||
 | 
					            if (postgresql.useDefaultConnection)
 | 
				
			||||||
 | 
					              "PostgreSQL(default)"
 | 
				
			||||||
 | 
					            else
 | 
				
			||||||
 | 
					              s"PostgreSQL(${postgresql.jdbc.url.asString})"
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  object FullTextSearch {}
 | 
					  object FullTextSearch {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -13,7 +13,7 @@ import cats.effect.Async
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import docspell.backend.signup.{Config => SignupConfig}
 | 
					import docspell.backend.signup.{Config => SignupConfig}
 | 
				
			||||||
import docspell.config.Implicits._
 | 
					import docspell.config.Implicits._
 | 
				
			||||||
import docspell.config.{ConfigFactory, Validation}
 | 
					import docspell.config.{ConfigFactory, FtsType, Validation}
 | 
				
			||||||
import docspell.oidc.{ProviderConfig, SignatureAlgo}
 | 
					import docspell.oidc.{ProviderConfig, SignatureAlgo}
 | 
				
			||||||
import docspell.restserver.auth.OpenId
 | 
					import docspell.restserver.auth.OpenId
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -106,4 +106,15 @@ object ConfigFile {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  def filesValidate: Validation[Config] =
 | 
					  def filesValidate: Validation[Config] =
 | 
				
			||||||
    Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
 | 
					    Validation(cfg => cfg.backend.files.validate.map(_ => cfg))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def postgresFtsValidate: Validation[Config] =
 | 
				
			||||||
 | 
					    Validation.failWhen(
 | 
				
			||||||
 | 
					      cfg =>
 | 
				
			||||||
 | 
					        cfg.fullTextSearch.enabled &&
 | 
				
			||||||
 | 
					          cfg.fullTextSearch.backend == FtsType.PostgreSQL &&
 | 
				
			||||||
 | 
					          cfg.fullTextSearch.postgresql.useDefaultConnection &&
 | 
				
			||||||
 | 
					          !cfg.backend.jdbc.dbmsName.contains("postgresql"),
 | 
				
			||||||
 | 
					      s"PostgreSQL defined fulltext search backend with default-connection, which is not a PostgreSQL connection!"
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,7 +28,7 @@ object Main extends IOApp {
 | 
				
			|||||||
      Option(System.getProperty("config.file")),
 | 
					      Option(System.getProperty("config.file")),
 | 
				
			||||||
      cfg.appId,
 | 
					      cfg.appId,
 | 
				
			||||||
      cfg.baseUrl,
 | 
					      cfg.baseUrl,
 | 
				
			||||||
      Some(cfg.fullTextSearch.solr.url).filter(_ => cfg.fullTextSearch.enabled),
 | 
					      Some(cfg.fullTextSearch.info).filter(_ => cfg.fullTextSearch.enabled),
 | 
				
			||||||
      cfg.backend.files.defaultStoreConfig
 | 
					      cfg.backend.files.defaultStoreConfig
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    _ <- logger.info(s"\n${banner.render("***>")}")
 | 
					    _ <- logger.info(s"\n${banner.render("***>")}")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -12,7 +12,10 @@ import fs2.concurrent.Topic
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import docspell.backend.BackendApp
 | 
					import docspell.backend.BackendApp
 | 
				
			||||||
import docspell.backend.auth.{AuthToken, ShareToken}
 | 
					import docspell.backend.auth.{AuthToken, ShareToken}
 | 
				
			||||||
 | 
					import docspell.common.Pools
 | 
				
			||||||
 | 
					import docspell.config.FtsType
 | 
				
			||||||
import docspell.ftsclient.FtsClient
 | 
					import docspell.ftsclient.FtsClient
 | 
				
			||||||
 | 
					import docspell.ftspsql.PsqlFtsClient
 | 
				
			||||||
import docspell.ftssolr.SolrFtsClient
 | 
					import docspell.ftssolr.SolrFtsClient
 | 
				
			||||||
import docspell.notification.api.NotificationModule
 | 
					import docspell.notification.api.NotificationModule
 | 
				
			||||||
import docspell.notification.impl.NotificationModuleImpl
 | 
					import docspell.notification.impl.NotificationModuleImpl
 | 
				
			||||||
@@ -155,6 +158,7 @@ object RestAppImpl {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  def create[F[_]: Async](
 | 
					  def create[F[_]: Async](
 | 
				
			||||||
      cfg: Config,
 | 
					      cfg: Config,
 | 
				
			||||||
 | 
					      pools: Pools,
 | 
				
			||||||
      store: Store[F],
 | 
					      store: Store[F],
 | 
				
			||||||
      httpClient: Client[F],
 | 
					      httpClient: Client[F],
 | 
				
			||||||
      pubSub: PubSub[F],
 | 
					      pubSub: PubSub[F],
 | 
				
			||||||
@@ -163,7 +167,7 @@ object RestAppImpl {
 | 
				
			|||||||
    val logger = docspell.logging.getLogger[F](s"restserver-${cfg.appId.id}")
 | 
					    val logger = docspell.logging.getLogger[F](s"restserver-${cfg.appId.id}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for {
 | 
					    for {
 | 
				
			||||||
      ftsClient <- createFtsClient(cfg)(httpClient)
 | 
					      ftsClient <- createFtsClient(cfg, pools, store, httpClient)
 | 
				
			||||||
      pubSubT = PubSubT(pubSub, logger)
 | 
					      pubSubT = PubSubT(pubSub, logger)
 | 
				
			||||||
      javaEmil = JavaMailEmil(cfg.backend.mailSettings)
 | 
					      javaEmil = JavaMailEmil(cfg.backend.mailSettings)
 | 
				
			||||||
      notificationMod <- Resource.eval(
 | 
					      notificationMod <- Resource.eval(
 | 
				
			||||||
@@ -188,8 +192,25 @@ object RestAppImpl {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private def createFtsClient[F[_]: Async](
 | 
					  private def createFtsClient[F[_]: Async](
 | 
				
			||||||
      cfg: Config
 | 
					      cfg: Config,
 | 
				
			||||||
  )(client: Client[F]): Resource[F, FtsClient[F]] =
 | 
					      pools: Pools,
 | 
				
			||||||
    if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
					      store: Store[F],
 | 
				
			||||||
 | 
					      client: Client[F]
 | 
				
			||||||
 | 
					  ): Resource[F, FtsClient[F]] =
 | 
				
			||||||
 | 
					    if (cfg.fullTextSearch.enabled)
 | 
				
			||||||
 | 
					      cfg.fullTextSearch.backend match {
 | 
				
			||||||
 | 
					        case FtsType.Solr =>
 | 
				
			||||||
 | 
					          SolrFtsClient(cfg.fullTextSearch.solr, client)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        case FtsType.PostgreSQL =>
 | 
				
			||||||
 | 
					          val psqlCfg = cfg.fullTextSearch.postgresql.toPsqlConfig(cfg.backend.jdbc)
 | 
				
			||||||
 | 
					          if (cfg.fullTextSearch.postgresql.useDefaultConnection)
 | 
				
			||||||
 | 
					            Resource.pure[F, FtsClient[F]](
 | 
				
			||||||
 | 
					              new PsqlFtsClient[F](psqlCfg, store.transactor)
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					          else
 | 
				
			||||||
 | 
					            PsqlFtsClient(psqlCfg, pools.connectEC)
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
					    else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -88,7 +88,7 @@ object RestServer {
 | 
				
			|||||||
        store,
 | 
					        store,
 | 
				
			||||||
        httpClient
 | 
					        httpClient
 | 
				
			||||||
      )(Topics.all.map(_.topic))
 | 
					      )(Topics.all.map(_.topic))
 | 
				
			||||||
      restApp <- RestAppImpl.create[F](cfg, store, httpClient, pubSub, wsTopic)
 | 
					      restApp <- RestAppImpl.create[F](cfg, pools, store, httpClient, pubSub, wsTopic)
 | 
				
			||||||
    } yield (restApp, pubSub, setting)
 | 
					    } yield (restApp, pubSub, setting)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def createHttpApp[F[_]: Async](
 | 
					  def createHttpApp[F[_]: Async](
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,8 @@ trait Store[F[_]] {
 | 
				
			|||||||
  ): FileRepository[F]
 | 
					  ): FileRepository[F]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
 | 
					  def add(insert: ConnectionIO[Int], exists: ConnectionIO[Boolean]): F[AddResult]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  def transactor: Transactor[F]
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
object Store {
 | 
					object Store {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -24,8 +24,9 @@ final class StoreImpl[F[_]: Async](
 | 
				
			|||||||
    val fileRepo: FileRepository[F],
 | 
					    val fileRepo: FileRepository[F],
 | 
				
			||||||
    jdbc: JdbcConfig,
 | 
					    jdbc: JdbcConfig,
 | 
				
			||||||
    ds: DataSource,
 | 
					    ds: DataSource,
 | 
				
			||||||
    xa: Transactor[F]
 | 
					    val transactor: Transactor[F]
 | 
				
			||||||
) extends Store[F] {
 | 
					) extends Store[F] {
 | 
				
			||||||
 | 
					  private[this] val xa = transactor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def createFileRepository(
 | 
					  def createFileRepository(
 | 
				
			||||||
      cfg: FileRepositoryConfig,
 | 
					      cfg: FileRepositoryConfig,
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -576,7 +576,8 @@ object QItem {
 | 
				
			|||||||
      collective: Ident,
 | 
					      collective: Ident,
 | 
				
			||||||
      folder: Option[Ident],
 | 
					      folder: Option[Ident],
 | 
				
			||||||
      name: String,
 | 
					      name: String,
 | 
				
			||||||
      notes: Option[String]
 | 
					      notes: Option[String],
 | 
				
			||||||
 | 
					      language: Language
 | 
				
			||||||
  )
 | 
					  )
 | 
				
			||||||
  def allNameAndNotes(
 | 
					  def allNameAndNotes(
 | 
				
			||||||
      coll: Option[Ident],
 | 
					      coll: Option[Ident],
 | 
				
			||||||
@@ -584,10 +585,11 @@ object QItem {
 | 
				
			|||||||
      chunkSize: Int
 | 
					      chunkSize: Int
 | 
				
			||||||
  ): Stream[ConnectionIO, NameAndNotes] = {
 | 
					  ): Stream[ConnectionIO, NameAndNotes] = {
 | 
				
			||||||
    val i = RItem.as("i")
 | 
					    val i = RItem.as("i")
 | 
				
			||||||
 | 
					    val c = RCollective.as("c")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Select(
 | 
					    Select(
 | 
				
			||||||
      select(i.id, i.cid, i.folder, i.name, i.notes),
 | 
					      select(i.id, i.cid, i.folder, i.name, i.notes, c.language),
 | 
				
			||||||
      from(i)
 | 
					      from(i).innerJoin(c, c.id === i.cid)
 | 
				
			||||||
    ).where(
 | 
					    ).where(
 | 
				
			||||||
      i.state.in(ItemState.validStates) &&?
 | 
					      i.state.in(ItemState.validStates) &&?
 | 
				
			||||||
        itemIds.map(ids => i.id.in(ids)) &&?
 | 
					        itemIds.map(ids => i.id.in(ids)) &&?
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -236,8 +236,18 @@ object RAttachment {
 | 
				
			|||||||
      n3 <- DML.delete(T, T.id === attachId)
 | 
					      n3 <- DML.delete(T, T.id === attachId)
 | 
				
			||||||
    } yield n0 + n1 + n2 + n3
 | 
					    } yield n0 + n1 + n2 + n3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
 | 
					  def findItemAndLanguage(
 | 
				
			||||||
    Select(T.itemId.s, from(T), T.id === attachId).build.query[Ident].option
 | 
					      attachId: Ident
 | 
				
			||||||
 | 
					  ): ConnectionIO[Option[(Ident, Option[Language])]] = {
 | 
				
			||||||
 | 
					    val a = RAttachment.as("a")
 | 
				
			||||||
 | 
					    val m = RAttachmentMeta.as("m")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Select(
 | 
				
			||||||
 | 
					      select(a.itemId, m.language),
 | 
				
			||||||
 | 
					      from(a).leftJoin(m, m.id === a.id),
 | 
				
			||||||
 | 
					      a.id === attachId
 | 
				
			||||||
 | 
					    ).build.query[(Ident, Option[Language])].option
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def findAll(
 | 
					  def findAll(
 | 
				
			||||||
      coll: Option[Ident],
 | 
					      coll: Option[Ident],
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,7 +23,7 @@ class PostgresqlMigrateTest
 | 
				
			|||||||
    with TestContainerForAll
 | 
					    with TestContainerForAll
 | 
				
			||||||
    with TestLoggingConfig {
 | 
					    with TestLoggingConfig {
 | 
				
			||||||
  override val containerDef: PostgreSQLContainer.Def =
 | 
					  override val containerDef: PostgreSQLContainer.Def =
 | 
				
			||||||
    PostgreSQLContainer.Def(DockerImageName.parse("postgres:13"))
 | 
					    PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  test("postgres empty schema migration") {
 | 
					  test("postgres empty schema migration") {
 | 
				
			||||||
    assume(Docker.existsUnsafe, "docker doesn't exist!")
 | 
					    assume(Docker.existsUnsafe, "docker doesn't exist!")
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,6 +4,11 @@ let
 | 
				
			|||||||
  full-text-search = {
 | 
					  full-text-search = {
 | 
				
			||||||
    enabled = true;
 | 
					    enabled = true;
 | 
				
			||||||
    solr.url = "http://localhost:${toString config.services.solr.port}/solr/docspell";
 | 
					    solr.url = "http://localhost:${toString config.services.solr.port}/solr/docspell";
 | 
				
			||||||
 | 
					    postgresql = {
 | 
				
			||||||
 | 
					      pg-config = {
 | 
				
			||||||
 | 
					        "german" = "my-germam";
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
in
 | 
					in
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -213,6 +213,7 @@ Docpell Update Check
 | 
				
			|||||||
    };
 | 
					    };
 | 
				
			||||||
    full-text-search = {
 | 
					    full-text-search = {
 | 
				
			||||||
      enabled = false;
 | 
					      enabled = false;
 | 
				
			||||||
 | 
					      backend = "solr";
 | 
				
			||||||
      solr = {
 | 
					      solr = {
 | 
				
			||||||
        url = "http://localhost:8983/solr/docspell";
 | 
					        url = "http://localhost:8983/solr/docspell";
 | 
				
			||||||
        commit-within = 1000;
 | 
					        commit-within = 1000;
 | 
				
			||||||
@@ -220,6 +221,17 @@ Docpell Update Check
 | 
				
			|||||||
        def-type = "lucene";
 | 
					        def-type = "lucene";
 | 
				
			||||||
        q-op = "OR";
 | 
					        q-op = "OR";
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
 | 
					      postgresql = {
 | 
				
			||||||
 | 
					        use-default-connection = false;
 | 
				
			||||||
 | 
					        jdbc = {
 | 
				
			||||||
 | 
					          url = "jdbc:postgresql://server:5432/db";
 | 
				
			||||||
 | 
					          user = "pguser";
 | 
				
			||||||
 | 
					          password = "";
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        pg-config = {};
 | 
				
			||||||
 | 
					        pg-query-parser = "websearch_to_tsquery";
 | 
				
			||||||
 | 
					        pg-rank-normalization = [ 4 ];
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
      migration = {
 | 
					      migration = {
 | 
				
			||||||
        index-all-chunk = 10;
 | 
					        index-all-chunk = 10;
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
@@ -1371,6 +1383,12 @@ in {
 | 
				
			|||||||
                Currently the SOLR search platform is supported.
 | 
					                Currently the SOLR search platform is supported.
 | 
				
			||||||
              '';
 | 
					              '';
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					            backend = mkOption {
 | 
				
			||||||
 | 
					              type = types.str;
 | 
				
			||||||
 | 
					              default = defaults.full-text-search.backend;
 | 
				
			||||||
 | 
					              description = "The backend to use, either solr or postgresql";
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            solr = mkOption {
 | 
					            solr = mkOption {
 | 
				
			||||||
              type = types.submodule({
 | 
					              type = types.submodule({
 | 
				
			||||||
                options = {
 | 
					                options = {
 | 
				
			||||||
@@ -1408,6 +1426,61 @@ in {
 | 
				
			|||||||
              default = defaults.full-text-search.solr;
 | 
					              default = defaults.full-text-search.solr;
 | 
				
			||||||
              description = "Configuration for the SOLR backend.";
 | 
					              description = "Configuration for the SOLR backend.";
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            postgresql = mkOption {
 | 
				
			||||||
 | 
					              type = types.submodule({
 | 
				
			||||||
 | 
					                options = {
 | 
				
			||||||
 | 
					                  use-default-connection = mkOption {
 | 
				
			||||||
 | 
					                    type = types.bool;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.use-default-connection;
 | 
				
			||||||
 | 
					                    description = "Whether to use the primary db connection.";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  jdbc = mkOption {
 | 
				
			||||||
 | 
					                    type = types.submodule ({
 | 
				
			||||||
 | 
					                      options = {
 | 
				
			||||||
 | 
					                        url = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.url;
 | 
				
			||||||
 | 
					                          description = ''
 | 
				
			||||||
 | 
					                            The URL to the database.
 | 
				
			||||||
 | 
					                          '';
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                        user = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.user;
 | 
				
			||||||
 | 
					                          description = "The user name to connect to the database.";
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                        password = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.password;
 | 
				
			||||||
 | 
					                          description = "The password to connect to the database.";
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                      };
 | 
				
			||||||
 | 
					                    });
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.jdbc;
 | 
				
			||||||
 | 
					                    description = "Database connection settings";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-config = mkOption {
 | 
				
			||||||
 | 
					                    type = types.attrs;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-config;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-query-parser = mkOption {
 | 
				
			||||||
 | 
					                    type = types.str;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-query-parser;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-rank-normalization = mkOption {
 | 
				
			||||||
 | 
					                    type = types.listOf types.int;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-rank-normalization;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					              });
 | 
				
			||||||
 | 
					              default = defaults.full-text-search.postgresql;
 | 
				
			||||||
 | 
					              description = "PostgreSQL for fulltext search";
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            migration = mkOption {
 | 
					            migration = mkOption {
 | 
				
			||||||
              type = types.submodule({
 | 
					              type = types.submodule({
 | 
				
			||||||
                options = {
 | 
					                options = {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -62,6 +62,17 @@ let
 | 
				
			|||||||
        def-type = "lucene";
 | 
					        def-type = "lucene";
 | 
				
			||||||
        q-op = "OR";
 | 
					        q-op = "OR";
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
 | 
					      postgresql = {
 | 
				
			||||||
 | 
					        use-default-connection = false;
 | 
				
			||||||
 | 
					        jdbc = {
 | 
				
			||||||
 | 
					          url = "jdbc:postgresql://server:5432/db";
 | 
				
			||||||
 | 
					          user = "pguser";
 | 
				
			||||||
 | 
					          password = "";
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        pg-config = {};
 | 
				
			||||||
 | 
					        pg-query-parser = "websearch_to_tsquery";
 | 
				
			||||||
 | 
					        pg-rank-normalization = [ 4 ];
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    auth = {
 | 
					    auth = {
 | 
				
			||||||
      server-secret = "hex:caffee";
 | 
					      server-secret = "hex:caffee";
 | 
				
			||||||
@@ -575,6 +586,60 @@ in {
 | 
				
			|||||||
              default = defaults.full-text-search.solr;
 | 
					              default = defaults.full-text-search.solr;
 | 
				
			||||||
              description = "Configuration for the SOLR backend.";
 | 
					              description = "Configuration for the SOLR backend.";
 | 
				
			||||||
            };
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            postgresql = mkOption {
 | 
				
			||||||
 | 
					              type = types.submodule({
 | 
				
			||||||
 | 
					                options = {
 | 
				
			||||||
 | 
					                  use-default-connection = mkOption {
 | 
				
			||||||
 | 
					                    type = types.bool;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.use-default-connection;
 | 
				
			||||||
 | 
					                    description = "Whether to use the primary db connection.";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  jdbc = mkOption {
 | 
				
			||||||
 | 
					                    type = types.submodule ({
 | 
				
			||||||
 | 
					                      options = {
 | 
				
			||||||
 | 
					                        url = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.url;
 | 
				
			||||||
 | 
					                          description = ''
 | 
				
			||||||
 | 
					                            The URL to the database.
 | 
				
			||||||
 | 
					                          '';
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                        user = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.user;
 | 
				
			||||||
 | 
					                          description = "The user name to connect to the database.";
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                        password = mkOption {
 | 
				
			||||||
 | 
					                          type = types.str;
 | 
				
			||||||
 | 
					                          default = defaults.jdbc.password;
 | 
				
			||||||
 | 
					                          description = "The password to connect to the database.";
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                      };
 | 
				
			||||||
 | 
					                    });
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.jdbc;
 | 
				
			||||||
 | 
					                    description = "Database connection settings";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-config = mkOption {
 | 
				
			||||||
 | 
					                    type = types.attrs;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-config;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-query-parser = mkOption {
 | 
				
			||||||
 | 
					                    type = types.str;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-query-parser;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                  pg-rank-normalization = mkOption {
 | 
				
			||||||
 | 
					                    type = types.listOf types.int;
 | 
				
			||||||
 | 
					                    default = defaults.full-text-search.postgresql.pg-rank-normalization;
 | 
				
			||||||
 | 
					                    description = "";
 | 
				
			||||||
 | 
					                  };
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					              });
 | 
				
			||||||
 | 
					              default = defaults.full-text-search.postgresql;
 | 
				
			||||||
 | 
					              description = "PostgreSQL for fulltext search";
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
          };
 | 
					          };
 | 
				
			||||||
        });
 | 
					        });
 | 
				
			||||||
        default = defaults.full-text-search;
 | 
					        default = defaults.full-text-search;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -59,7 +59,7 @@ via the header `Docspell-Share-Auth`.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Docspell can be configured to be a relying party for OpenID Connect.
 | 
					Docspell can be configured to be a relying party for OpenID Connect.
 | 
				
			||||||
Please see [the config
 | 
					Please see [the config
 | 
				
			||||||
section](@/docs/configure/_index.md#openid-connect-oauth2) for
 | 
					section](@/docs/configure/authentication.md#openid-connect-oauth2) for
 | 
				
			||||||
details.
 | 
					details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -80,7 +80,7 @@ $ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/a
 | 
				
			|||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
To enable these endpoints, you must provide a secret in the
 | 
					To enable these endpoints, you must provide a secret in the
 | 
				
			||||||
[configuration](@/docs/configure/_index.md#admin-endpoint).
 | 
					[configuration](@/docs/configure/admin-endpoint.md).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Live Api
 | 
					## Live Api
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -163,7 +163,7 @@ on the same host or network).
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
The endpoint is disabled by default, an admin must change the
 | 
					The endpoint is disabled by default, an admin must change the
 | 
				
			||||||
`docspell.server.integration-endpoint.enabled` flag to `true` in the
 | 
					`docspell.server.integration-endpoint.enabled` flag to `true` in the
 | 
				
			||||||
[configuration file](@/docs/configure/_index.md#rest-server).
 | 
					[configuration file](@/docs/configure/main.md#rest-server).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
If queried by a `GET` request, it returns whether it is enabled and
 | 
					If queried by a `GET` request, it returns whether it is enabled and
 | 
				
			||||||
the collective exists.
 | 
					the collective exists.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3,803 +3,9 @@ title = "Configuration"
 | 
				
			|||||||
insert_anchor_links = "right"
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
description = "Describes the configuration file and shows all default settings."
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
weight = 40
 | 
					weight = 40
 | 
				
			||||||
template = "docs.html"
 | 
					template = "pages.html"
 | 
				
			||||||
 | 
					sort_by = "weight"
 | 
				
			||||||
 | 
					redirect_to = "docs/configure/main"
 | 
				
			||||||
+++
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Configuration
 | 
					No content here.
 | 
				
			||||||
 | 
					 | 
				
			||||||
Docspell's executables (restserver and joex) can take one argument – a
 | 
					 | 
				
			||||||
configuration file. If that is not given, the defaults are used,
 | 
					 | 
				
			||||||
overriden by environment variables. A config file overrides default
 | 
					 | 
				
			||||||
values, so only values that differ from the defaults are necessary.
 | 
					 | 
				
			||||||
The complete default options and their documentation is at the end of
 | 
					 | 
				
			||||||
this page.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Besides the config file, another way is to provide individual settings
 | 
					 | 
				
			||||||
via key-value pairs to the executable by the `-D` option. For example
 | 
					 | 
				
			||||||
to override only `base-url` you could add the argument
 | 
					 | 
				
			||||||
`-Ddocspell.server.base-url=…` to the command. Multiple options are
 | 
					 | 
				
			||||||
possible. For more than few values this is very tedious, obviously, so
 | 
					 | 
				
			||||||
the recommended way is to maintain a config file. If these options
 | 
					 | 
				
			||||||
*and* a file is provded, then any setting given via the `-D…` option
 | 
					 | 
				
			||||||
overrides the same setting from the config file.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
At last, it is possible to configure docspell via environment
 | 
					 | 
				
			||||||
variables if there is no config file supplied (if a config file *is*
 | 
					 | 
				
			||||||
supplied, it is always preferred). Note that this approach is limited,
 | 
					 | 
				
			||||||
as arrays are not supported. A list of environment variables can be
 | 
					 | 
				
			||||||
found at the [end of this page](#environment-variables). The
 | 
					 | 
				
			||||||
environment variable name follows the corresponding config key - where
 | 
					 | 
				
			||||||
dots are replaced by underscores and dashes are replaced by two
 | 
					 | 
				
			||||||
underscores. For example, the config key `docspell.server.app-name`
 | 
					 | 
				
			||||||
can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It is also possible to specify environment variables inside a config
 | 
					 | 
				
			||||||
file (to get a mix of both) - please see the [documentation of the
 | 
					 | 
				
			||||||
config library](https://github.com/lightbend/config#standard-behavior)
 | 
					 | 
				
			||||||
for more on this.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# File Format
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The format of the configuration files can be
 | 
					 | 
				
			||||||
[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
 | 
					 | 
				
			||||||
JSON or what this [config
 | 
					 | 
				
			||||||
library](https://github.com/lightbend/config) understands. The default
 | 
					 | 
				
			||||||
values below are in HOCON format, which is recommended, since it
 | 
					 | 
				
			||||||
allows comments and has some [advanced
 | 
					 | 
				
			||||||
features](https://github.com/lightbend/config#features-of-hocon).
 | 
					 | 
				
			||||||
Please also see their documentation for more details.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A short description (please check the links for better understanding):
 | 
					 | 
				
			||||||
The config consists of key-value pairs and can be written in a
 | 
					 | 
				
			||||||
JSON-like format (called HOCON). Keys are organized in trees, and a
 | 
					 | 
				
			||||||
key defines a full path into the tree. There are two ways:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
a.b.c.d=15
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
or
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
a {
 | 
					 | 
				
			||||||
  b {
 | 
					 | 
				
			||||||
    c {
 | 
					 | 
				
			||||||
      d = 15
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Both are exactly the same and these forms are both used at the same
 | 
					 | 
				
			||||||
time. Usually the braces approach is used to group some more settings,
 | 
					 | 
				
			||||||
for better readability.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Strings that contain "not-so-common" characters should be enclosed in
 | 
					 | 
				
			||||||
quotes. It is possible to define values at the top of the file and
 | 
					 | 
				
			||||||
reuse them on different locations via the `${full.path.to.key}`
 | 
					 | 
				
			||||||
syntax. When using these variables, they *must not* be enclosed in
 | 
					 | 
				
			||||||
quotes.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Important Config Options
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The configuration of both components uses separate namespaces. The
 | 
					 | 
				
			||||||
configuration for the REST server is below `docspell.server`, while
 | 
					 | 
				
			||||||
the one for joex is below `docspell.joex`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can therefore use two separate config files or one single file
 | 
					 | 
				
			||||||
containing both namespaces.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## JDBC
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This configures the connection to the database. This has to be
 | 
					 | 
				
			||||||
specified for the rest server and joex. By default, a H2 database in
 | 
					 | 
				
			||||||
the current `/tmp` directory is configured.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The config looks like this (both components):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
docspell.joex.jdbc {
 | 
					 | 
				
			||||||
  url = ...
 | 
					 | 
				
			||||||
  user = ...
 | 
					 | 
				
			||||||
  password = ...
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
docspell.server.backend.jdbc {
 | 
					 | 
				
			||||||
  url = ...
 | 
					 | 
				
			||||||
  user = ...
 | 
					 | 
				
			||||||
  password = ...
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `url` is the connection to the database. It must start with
 | 
					 | 
				
			||||||
`jdbc`, followed by name of the database. The rest is specific to the
 | 
					 | 
				
			||||||
database used: it is either a path to a file for H2 or a host/database
 | 
					 | 
				
			||||||
url for MariaDB and PostgreSQL.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When using H2, the user and password can be chosen freely on first
 | 
					 | 
				
			||||||
start, but must stay the same on subsequent starts. Usually, the user
 | 
					 | 
				
			||||||
is `sa` and the password is left empty. Additionally, the url must
 | 
					 | 
				
			||||||
include these options:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Examples
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
PostgreSQL:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
url = "jdbc:postgresql://localhost:5432/docspelldb"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MariaDB:
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
url = "jdbc:mariadb://localhost:3306/docspelldb"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
H2
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Admin Endpoint
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
 | 
					 | 
				
			||||||
for adminstration tasks. This is disabled by default and can be
 | 
					 | 
				
			||||||
enabled by providing a secret:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
...
 | 
					 | 
				
			||||||
  admin-endpoint {
 | 
					 | 
				
			||||||
    secret = "123"
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This secret must be provided to all requests to a `/api/v1/admin/`
 | 
					 | 
				
			||||||
endpoint.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The most convenient way to execute admin tasks is to use the
 | 
					 | 
				
			||||||
[cli](@/docs/tools/cli.md). You get a list of possible admin commands
 | 
					 | 
				
			||||||
via `dsc admin help`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To see the output of the commands, there are these ways:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. looking at the joex logs, which gives most details.
 | 
					 | 
				
			||||||
2. Use the job-queue page when logged in as `docspell-system`
 | 
					 | 
				
			||||||
3. setup a [webhook](@/docs/webapp/notification.md) to be notified
 | 
					 | 
				
			||||||
   when a job finishes. This way you get a small message.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All admin tasks (and also some other system tasks) are run under the
 | 
					 | 
				
			||||||
account `docspell-system` (collective and user). You need to create
 | 
					 | 
				
			||||||
this account and setup the notification hooks in there - not in your
 | 
					 | 
				
			||||||
normal account.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Full-Text Search: SOLR
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[Apache SOLR](https://solr.apache.org) is used to provide the
 | 
					 | 
				
			||||||
full-text search. Both docspell components must provide the same
 | 
					 | 
				
			||||||
connection setup. This is defined in the `full-text-search.solr`
 | 
					 | 
				
			||||||
subsection:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
...
 | 
					 | 
				
			||||||
  full-text-search {
 | 
					 | 
				
			||||||
    enabled = true
 | 
					 | 
				
			||||||
    ...
 | 
					 | 
				
			||||||
    solr = {
 | 
					 | 
				
			||||||
      url = "http://localhost:8983/solr/docspell"
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The default configuration at the end of this page contains more
 | 
					 | 
				
			||||||
information about each setting.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `solr.url` is the mandatory setting that you need to change to
 | 
					 | 
				
			||||||
point to your SOLR instance. Then you need to set the `enabled` flag
 | 
					 | 
				
			||||||
to `true`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When installing docspell manually, just install solr and create a core
 | 
					 | 
				
			||||||
as described in the [solr
 | 
					 | 
				
			||||||
documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
 | 
					 | 
				
			||||||
That will provide you with the connection url (the last part is the
 | 
					 | 
				
			||||||
core name). If Docspell detects an empty core it will run a schema
 | 
					 | 
				
			||||||
setup on start automatically.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `full-text-search.solr` options are the same for joex and the
 | 
					 | 
				
			||||||
restserver.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There is an [admin route](@/docs/api/intro.md#admin) that allows to
 | 
					 | 
				
			||||||
re-create the entire index (for all collectives). This is possible via
 | 
					 | 
				
			||||||
a call:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
or use the [cli](@/docs/tools/cli.md):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```bash
 | 
					 | 
				
			||||||
dsc admin -a test123 recreate-index
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Here the `test123` is the key defined with `admin-endpoint.secret`. If
 | 
					 | 
				
			||||||
it is empty (the default), this call is disabled (all admin routes).
 | 
					 | 
				
			||||||
Otherwise, the POST request will submit a system task that is executed
 | 
					 | 
				
			||||||
by a joex instance eventually.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Using this endpoint, the entire index (including the schema) will be
 | 
					 | 
				
			||||||
re-created. This is sometimes necessary, for example if you upgrade
 | 
					 | 
				
			||||||
SOLR or delete the core to provide a new one (see
 | 
					 | 
				
			||||||
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
 | 
					 | 
				
			||||||
details). Another way is to restart docspell (while clearing the
 | 
					 | 
				
			||||||
index). If docspell detects an empty index at startup, it will submit
 | 
					 | 
				
			||||||
a task to build the index automatically.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Note that a collective can also re-index their data using a similiar
 | 
					 | 
				
			||||||
endpoint; but this is only deleting their data and doesn't do a full
 | 
					 | 
				
			||||||
re-index.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The solr index doesn't contain any new information, it can be
 | 
					 | 
				
			||||||
regenerated any time using the above REST call. Thus it doesn't need
 | 
					 | 
				
			||||||
to be backed up.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Bind
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The host and port the http server binds to. This applies to both
 | 
					 | 
				
			||||||
components. The joex component also exposes a small REST api to
 | 
					 | 
				
			||||||
inspect its state and notify the scheduler.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
docspell.server.bind {
 | 
					 | 
				
			||||||
  address = localhost
 | 
					 | 
				
			||||||
  port = 7880
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
docspell.joex.bind {
 | 
					 | 
				
			||||||
  address = localhost
 | 
					 | 
				
			||||||
  port = 7878
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
By default, it binds to `localhost` and some predefined port. This
 | 
					 | 
				
			||||||
must be changed, if components are on different machines.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Baseurl
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The base url is an important setting that defines the http URL where
 | 
					 | 
				
			||||||
the corresponding component can be reached. It applies to both
 | 
					 | 
				
			||||||
components. For a joex component, the url must be resolvable from a
 | 
					 | 
				
			||||||
REST server component. The REST server also uses this url to create
 | 
					 | 
				
			||||||
absolute urls and to configure the authenication cookie.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
By default it is build using the information from the `bind` setting,
 | 
					 | 
				
			||||||
which is `http://localhost:7880`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If the default is not changed, docspell will use the request to
 | 
					 | 
				
			||||||
determine the base-url. It first inspects the `X-Forwarded-For` header
 | 
					 | 
				
			||||||
that is often used with reverse proxies. If that is not present, the
 | 
					 | 
				
			||||||
`Host` header of the request is used. However, if the `base-url`
 | 
					 | 
				
			||||||
setting is changed, then only this setting is used.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
docspell.server.base-url = ...
 | 
					 | 
				
			||||||
docspell.joex.base-url = ...
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you are unsure, leave it at its default.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Examples
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
docspell.server.baseurl = "https://docspell.example.com"
 | 
					 | 
				
			||||||
docspell.joex.baseurl = "http://192.168.101.10"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## App-id
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `app-id` is the identifier of the corresponding instance. It *must
 | 
					 | 
				
			||||||
be unique* for all instances. By default the REST server uses `rest1`
 | 
					 | 
				
			||||||
and joex `joex1`. It is recommended to overwrite this setting to have
 | 
					 | 
				
			||||||
an explicit and stable identifier should multiple instances are
 | 
					 | 
				
			||||||
intended.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
docspell.server.app-id = "rest1"
 | 
					 | 
				
			||||||
docspell.joex.app-id = "joex1"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Registration Options
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This defines if and how new users can create accounts. There are 3
 | 
					 | 
				
			||||||
options:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- *closed* no new user can sign up
 | 
					 | 
				
			||||||
- *open* new users can sign up
 | 
					 | 
				
			||||||
- *invite* new users can sign up but require an invitation key
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This applies only to the REST sevrer component.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
docspell.server.backend.signup {
 | 
					 | 
				
			||||||
  mode = "open"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  # If mode == 'invite', a password must be provided to generate
 | 
					 | 
				
			||||||
  # invitation keys. It must not be empty.
 | 
					 | 
				
			||||||
  new-invite-password = ""
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  # If mode == 'invite', this is the period an invitation token is
 | 
					 | 
				
			||||||
  # considered valid.
 | 
					 | 
				
			||||||
  invite-time = "3 days"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The mode `invite` is intended to open the application only to some
 | 
					 | 
				
			||||||
users. The admin can create these invitation keys and distribute them
 | 
					 | 
				
			||||||
to the desired people. For this, the `new-invite-password` must be
 | 
					 | 
				
			||||||
given. The idea is that only the person who installs docspell knows
 | 
					 | 
				
			||||||
this. If it is not set, then invitation won't work. New invitation
 | 
					 | 
				
			||||||
keys can be generated from within the web application or via REST
 | 
					 | 
				
			||||||
calls (using `curl`, for example).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Authentication
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Authentication works in two ways:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- with an account-name / password pair
 | 
					 | 
				
			||||||
- with an authentication token
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The initial authentication must occur with an accountname/password
 | 
					 | 
				
			||||||
pair. This will generate an authentication token which is valid for a
 | 
					 | 
				
			||||||
some time. Subsequent calls to secured routes can use this token. The
 | 
					 | 
				
			||||||
token can be given as a normal http header or via a cookie header.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
These settings apply only to the REST server.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` bash
 | 
					 | 
				
			||||||
docspell.server.auth {
 | 
					 | 
				
			||||||
  server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
 | 
					 | 
				
			||||||
  session-valid = "5 minutes"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `server-secret` is used to sign the token. If multiple REST
 | 
					 | 
				
			||||||
servers are deployed, all must share the same server secret. Otherwise
 | 
					 | 
				
			||||||
tokens from one instance are not valid on another instance. The secret
 | 
					 | 
				
			||||||
can be given as Base64 encoded string or in hex form. Use the prefix
 | 
					 | 
				
			||||||
`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
 | 
					 | 
				
			||||||
of the string are used.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `session-valid` determines how long a token is valid. This can be
 | 
					 | 
				
			||||||
just some minutes, the web application obtains new ones
 | 
					 | 
				
			||||||
periodically. So a rather short time is recommended.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## OpenID Connect / OAuth2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can integrate Docspell into your SSO solution via [OpenID
 | 
					 | 
				
			||||||
Connect](https://openid.net/connect/) (OIDC). This requires to set up
 | 
					 | 
				
			||||||
an OpenID Provider (OP) somewhere and to configure Docspell
 | 
					 | 
				
			||||||
accordingly to act as the relying party.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can define multiple OPs to use. For some examples, please see the
 | 
					 | 
				
			||||||
default configuration file [below](#rest-server).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The configuration of a provider highly depends on how it is setup.
 | 
					 | 
				
			||||||
Here is an example for a setup using
 | 
					 | 
				
			||||||
[keycloak](https://www.keycloak.org):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` conf
 | 
					 | 
				
			||||||
provider = {
 | 
					 | 
				
			||||||
  provider-id = "keycloak",
 | 
					 | 
				
			||||||
  client-id = "docspell",
 | 
					 | 
				
			||||||
  client-secret = "example-secret-439e-bf06-911e4cdd56a6",
 | 
					 | 
				
			||||||
  scope = "profile", # scope is required for OIDC
 | 
					 | 
				
			||||||
  authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
 | 
					 | 
				
			||||||
  token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
 | 
					 | 
				
			||||||
  #User URL is not used when signature key is set.
 | 
					 | 
				
			||||||
  #user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
 | 
					 | 
				
			||||||
  sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
 | 
					 | 
				
			||||||
  sig-algo = "RS512"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `provider-id` is some identifier that is used in the URL to
 | 
					 | 
				
			||||||
distinguish between possibly multiple providers. The `client-id` and
 | 
					 | 
				
			||||||
`client-secret` define the two parameters required for a "confidential
 | 
					 | 
				
			||||||
client". The different URLs are best explained at the [keycloak
 | 
					 | 
				
			||||||
docs](https://www.keycloak.org/docs/latest/server_admin/).
 | 
					 | 
				
			||||||
They are available for all OPs in some way. The `user-url` is not
 | 
					 | 
				
			||||||
required, if the access token is already containing the necessary
 | 
					 | 
				
			||||||
data. If not, then docspell performs another request to the
 | 
					 | 
				
			||||||
`user-url`, which must be the user-info endpoint, to obtain the
 | 
					 | 
				
			||||||
required user data.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If the data is taken from the token directly and not via a request to
 | 
					 | 
				
			||||||
the user-info endpoint, then the token must be validated using the
 | 
					 | 
				
			||||||
given `sign-key` and `sig-algo`. These two values are then required to
 | 
					 | 
				
			||||||
specify! However, if the user-info endpoint should be used, then leave
 | 
					 | 
				
			||||||
the `sign-key` empty and specify the correct url in `user-url`. When
 | 
					 | 
				
			||||||
specifying the `sign-key` use a prefix of `b64:` if it is Base64
 | 
					 | 
				
			||||||
encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
 | 
					 | 
				
			||||||
are used, which is most probably not wanted for this setting.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Once the user is authenticated, docspell tries to setup an account and
 | 
					 | 
				
			||||||
does some checks. For this it must get to the username and collective
 | 
					 | 
				
			||||||
name somehow. How it does this, can be specified by the `user-key` and
 | 
					 | 
				
			||||||
`collective-key` settings:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
``` conf
 | 
					 | 
				
			||||||
# The collective of the user is given in the access token as
 | 
					 | 
				
			||||||
# property `docspell_collective`.
 | 
					 | 
				
			||||||
collective-key = "lookup:docspell_collective",
 | 
					 | 
				
			||||||
# The username to use for the docspell account
 | 
					 | 
				
			||||||
user-key = "preferred_username"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `user-key` is some string that is used to search the JSON response
 | 
					 | 
				
			||||||
from the OP for an object with that key. The search happens
 | 
					 | 
				
			||||||
recursively, so the field can be in a nested object. The found value
 | 
					 | 
				
			||||||
is used as the user name. Keycloak transmits the `preferred_username`
 | 
					 | 
				
			||||||
when asking for the `profile` scope. This can be used as the user
 | 
					 | 
				
			||||||
name.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The collective name can be obtained by different ways. For example,
 | 
					 | 
				
			||||||
you can instruct your OP (like keycloak) to provide a collective name
 | 
					 | 
				
			||||||
in the token and/or user-info responses. If you do this, then use the
 | 
					 | 
				
			||||||
`lookup:` prefix as in the example above. This instructs docspell to
 | 
					 | 
				
			||||||
search for a value the same way as the `user-key`. You can also set a
 | 
					 | 
				
			||||||
fixed collective, using `fixed:` prefix; in this case all users are in
 | 
					 | 
				
			||||||
the same collective! A third option is to prefix it with `account:` -
 | 
					 | 
				
			||||||
then the value that is looked up is interpreted as the full account
 | 
					 | 
				
			||||||
name, like `collective/user` and the `user-key` setting is ignored. If
 | 
					 | 
				
			||||||
you want to put each user in its own collective, you can just use the
 | 
					 | 
				
			||||||
same value as in `user-key`, only prefixed with `lookup:`. In the
 | 
					 | 
				
			||||||
example it would be `lookup:preferred_username`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you find that these methods do not suffice for your case, please
 | 
					 | 
				
			||||||
open an issue.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## File Backends
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Docspell allows to choose from different storage backends for binary
 | 
					 | 
				
			||||||
files. You can choose between:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. *Database (the recommended default)*
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   The database can be used to store the files as well. It is the
 | 
					 | 
				
			||||||
   default. It doesn't require any other configuration and works well
 | 
					 | 
				
			||||||
   with multiple instances of restservers and joex nodes.
 | 
					 | 
				
			||||||
2. *S3*
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   The S3 backend allows to store files in an S3 compatible storage.
 | 
					 | 
				
			||||||
   It was tested with MinIO, which is possible to self host.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. *Filesystem*
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   The filesystem can also be used directly, by specifying a
 | 
					 | 
				
			||||||
   directory. Be aware that _all_ nodes must have read and write
 | 
					 | 
				
			||||||
   access into this directory! When running multiple nodes over a
 | 
					 | 
				
			||||||
   network, consider using one of the above instead. Docspell uses a
 | 
					 | 
				
			||||||
   fixed structure for storing the files below the given directory, it
 | 
					 | 
				
			||||||
   cannot be configured.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When using S3 or filesystem, remember to backup the database *and* the
 | 
					 | 
				
			||||||
files!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Note that Docspell not only stores the file that are uploaded, but
 | 
					 | 
				
			||||||
also some other files for internal use.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Configuring
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{% warningbubble(title="Note") %}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Each node must have the same config for its file backend! When using
 | 
					 | 
				
			||||||
the filesystem, make sure all processes can access the directory with
 | 
					 | 
				
			||||||
read and write permissions.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{% end %}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The file storage backend can be configured inside the `files` section
 | 
					 | 
				
			||||||
(see the default configs below):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```conf
 | 
					 | 
				
			||||||
files {
 | 
					 | 
				
			||||||
  …
 | 
					 | 
				
			||||||
  default-store = "database"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  stores = {
 | 
					 | 
				
			||||||
    database =
 | 
					 | 
				
			||||||
      { enabled = true
 | 
					 | 
				
			||||||
        type = "default-database"
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    filesystem =
 | 
					 | 
				
			||||||
      { enabled = false
 | 
					 | 
				
			||||||
        type = "file-system"
 | 
					 | 
				
			||||||
        directory = "/some/directory"
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    minio =
 | 
					 | 
				
			||||||
     { enabled = false
 | 
					 | 
				
			||||||
       type = "s3"
 | 
					 | 
				
			||||||
       endpoint = "http://localhost:9000"
 | 
					 | 
				
			||||||
       access-key = "username"
 | 
					 | 
				
			||||||
       secret-key = "password"
 | 
					 | 
				
			||||||
       bucket = "docspell"
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The `stores` object defines a set of stores and the `default-store`
 | 
					 | 
				
			||||||
selects the one that should be used. All disabled store configurations
 | 
					 | 
				
			||||||
are removed from the list. Thus the `default-store` must be enabled.
 | 
					 | 
				
			||||||
Other enabled stores can be used as the target when copying files (see
 | 
					 | 
				
			||||||
below).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
A store configuration requires a `enabled` and `type` property.
 | 
					 | 
				
			||||||
Depending on the `type` property, other properties are required, they
 | 
					 | 
				
			||||||
are presented above. The available storage types are
 | 
					 | 
				
			||||||
`default-database`, `file-system` and `s3`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you use the docker setup, you can find the corresponding
 | 
					 | 
				
			||||||
environment variables to the above config snippet
 | 
					 | 
				
			||||||
[below](#environment-variables).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Change Backends
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It is possible to change backends with a bit of manual effort. When
 | 
					 | 
				
			||||||
doing this, please make sure that the application is not used. It is
 | 
					 | 
				
			||||||
important that no file is uploaded during the following steps.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The [cli](@/docs/tools/cli.md) will be used, please set it up first
 | 
					 | 
				
			||||||
and you need to enable the [admin endpoint](#admin-endpoint). Config
 | 
					 | 
				
			||||||
changes mentioned here must be applied to all nodes - joex and
 | 
					 | 
				
			||||||
restserver!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. In the config, enable a second file backend (besides the default)
 | 
					 | 
				
			||||||
   you want to change to and start docspell as normal. Don't change
 | 
					 | 
				
			||||||
   `default-store` yet.
 | 
					 | 
				
			||||||
2. Run the file integrity check in order to see whether all files are
 | 
					 | 
				
			||||||
   ok as they are in the current store. This can be done using the
 | 
					 | 
				
			||||||
   [cli](@/docs/tools/cli.md) by running:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   ```bash
 | 
					 | 
				
			||||||
   dsc admin file-integrity-check
 | 
					 | 
				
			||||||
   ```
 | 
					 | 
				
			||||||
3. Run the copy files admin command which will copy all files from the
 | 
					 | 
				
			||||||
   current `default-store` to all other enabled stores.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   ```bash
 | 
					 | 
				
			||||||
   dsc admin clone-file-repository
 | 
					 | 
				
			||||||
   ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   And wait until it's done :-). You can see the progress in the jobs
 | 
					 | 
				
			||||||
   page when logged in as `docspell-system` or just look at the logs.
 | 
					 | 
				
			||||||
4. In the config, change the `default-store` to the one you just
 | 
					 | 
				
			||||||
   copied all the files to and restart docspell.
 | 
					 | 
				
			||||||
5. Login and do some smoke tests. Then run the file integrity check
 | 
					 | 
				
			||||||
   again:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   ```bash
 | 
					 | 
				
			||||||
   dsc admin file-integrity-check
 | 
					 | 
				
			||||||
   ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If all is fine, then you are done and are now using the new file
 | 
					 | 
				
			||||||
backend. If the second integrity check fails, please open an issue.
 | 
					 | 
				
			||||||
You need then to revert the config change of step 4 to use the
 | 
					 | 
				
			||||||
previous `default-store` again.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you want to delete the files from the database, you can do so by
 | 
					 | 
				
			||||||
running the following SQL against the database:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sql
 | 
					 | 
				
			||||||
DELETE FROM filechunk
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You can copy them back into the database using the steps above.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## File Processing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Files are being processed by the joex component. So all the respective
 | 
					 | 
				
			||||||
configuration is in this config only.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
File processing involves several stages, detailed information can be
 | 
					 | 
				
			||||||
found [here](@/docs/joex/file-processing.md#text-analysis) and in the
 | 
					 | 
				
			||||||
corresponding sections in [joex default config](#joex).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Configuration allows to define the external tools and set some
 | 
					 | 
				
			||||||
limitations to control memory usage. The sections are:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `docspell.joex.extraction`
 | 
					 | 
				
			||||||
- `docspell.joex.text-analysis`
 | 
					 | 
				
			||||||
- `docspell.joex.convert`
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Options to external commands can use variables that are replaced by
 | 
					 | 
				
			||||||
values at runtime. Variables are enclosed in double braces `{{…}}`.
 | 
					 | 
				
			||||||
Please see the default configuration for what variables exist per
 | 
					 | 
				
			||||||
command.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Classification
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In `text-analysis.classification` you can define how many documents at
 | 
					 | 
				
			||||||
most should be used for learning. The default settings should work
 | 
					 | 
				
			||||||
well for most cases. However, it always depends on the amount of data
 | 
					 | 
				
			||||||
and the machine that runs joex. For example, by default the documents
 | 
					 | 
				
			||||||
to learn from are limited to 600 (`classification.item-count`) and
 | 
					 | 
				
			||||||
every text is cut after 5000 characters (`text-analysis.max-length`).
 | 
					 | 
				
			||||||
This is fine if *most* of your documents are small and only a few are
 | 
					 | 
				
			||||||
near 5000 characters). But if *all* your documents are very large, you
 | 
					 | 
				
			||||||
probably need to either assign more heap memory or go down with the
 | 
					 | 
				
			||||||
limits.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Classification can be disabled, too, for when it's not needed.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### NLP
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This setting defines which NLP mode to use. It defaults to `full`,
 | 
					 | 
				
			||||||
which requires more memory for certain languages (with the advantage
 | 
					 | 
				
			||||||
of better results). Other values are `basic`, `regexonly` and
 | 
					 | 
				
			||||||
`disabled`. The modes `full` and `basic` use pre-defined lanugage
 | 
					 | 
				
			||||||
models for procesing documents of languaes German, English, French and
 | 
					 | 
				
			||||||
Spanish. These require some amount of memory (see below).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The mode `basic` is like the "light" variant to `full`. It doesn't use
 | 
					 | 
				
			||||||
all NLP features, which makes memory consumption much lower, but comes
 | 
					 | 
				
			||||||
with the compromise of less accurate results.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The mode `regexonly` doesn't use pre-defined lanuage models, even if
 | 
					 | 
				
			||||||
available. It checks your address book against a document to find
 | 
					 | 
				
			||||||
metadata. That means, it is language independent. Also, when using
 | 
					 | 
				
			||||||
`full` or `basic` with lanugages where no pre-defined models exist, it
 | 
					 | 
				
			||||||
will degrade to `regexonly` for these.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The mode `disabled` skips NLP processing completely. This has least
 | 
					 | 
				
			||||||
impact in memory consumption, obviously, but then only the classifier
 | 
					 | 
				
			||||||
is used to find metadata (unless it is disabled, too).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
You might want to try different modes and see what combination suits
 | 
					 | 
				
			||||||
best your usage pattern and machine running joex. If a powerful
 | 
					 | 
				
			||||||
machine is used, simply leave the defaults. When running on an
 | 
					 | 
				
			||||||
raspberry pi, for example, you might need to adjust things.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
### Memory Usage
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The memory requirements for the joex component depends on the document
 | 
					 | 
				
			||||||
language and the enabled features for text-analysis. The `nlp.mode`
 | 
					 | 
				
			||||||
setting has significant impact, especially when your documents are in
 | 
					 | 
				
			||||||
German. Here are some rough numbers on jvm heap usage (the same file
 | 
					 | 
				
			||||||
was used for all tries):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<table class="striped-basic">
 | 
					 | 
				
			||||||
<thead>
 | 
					 | 
				
			||||||
  <tr>
 | 
					 | 
				
			||||||
     <th>nlp.mode</th>
 | 
					 | 
				
			||||||
     <th>English</th>
 | 
					 | 
				
			||||||
     <th>German</th>
 | 
					 | 
				
			||||||
     <th>French</th>
 | 
					 | 
				
			||||||
 </tr>
 | 
					 | 
				
			||||||
</thead>
 | 
					 | 
				
			||||||
<tfoot>
 | 
					 | 
				
			||||||
</tfoot>
 | 
					 | 
				
			||||||
<tbody>
 | 
					 | 
				
			||||||
  <tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
 | 
					 | 
				
			||||||
  <tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
 | 
					 | 
				
			||||||
</tbody>
 | 
					 | 
				
			||||||
</table>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Note that these are only rough numbers and they show the maximum used
 | 
					 | 
				
			||||||
heap memory while processing a file.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When using `mode=full`, a heap setting of at least `-Xmx1400M` is
 | 
					 | 
				
			||||||
recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
 | 
					 | 
				
			||||||
recommended.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Other languages can't use these two modes, and so don't require this
 | 
					 | 
				
			||||||
amount of memory (but don't have as good results). Then you can go
 | 
					 | 
				
			||||||
with less heap. For these languages, the nlp mode is the same as
 | 
					 | 
				
			||||||
`regexonly`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Training the classifier is also memory intensive, which solely depends
 | 
					 | 
				
			||||||
on the size and number of documents that are being trained. However,
 | 
					 | 
				
			||||||
training the classifier is done periodically and can happen maybe
 | 
					 | 
				
			||||||
every two weeks. When classifying new documents, memory requirements
 | 
					 | 
				
			||||||
are lower, since the model already exists.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
More details about these modes can be found
 | 
					 | 
				
			||||||
[here](@/docs/joex/file-processing.md#text-analysis).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The restserver component is very lightweight, here you can use
 | 
					 | 
				
			||||||
defaults.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# JVM Options
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The start scripts support some options to configure the JVM. One often
 | 
					 | 
				
			||||||
used setting is the maximum heap size of the JVM. By default, java
 | 
					 | 
				
			||||||
determines it based on properties of the current machine. You can
 | 
					 | 
				
			||||||
specify it by given java startup options to the command:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This would limit the maximum heap to 1GB. The double slash separates
 | 
					 | 
				
			||||||
internal options and the arguments to the program. Another frequently
 | 
					 | 
				
			||||||
used option is to change the default temp directory. Usually it is
 | 
					 | 
				
			||||||
`/tmp`, but it may be desired to have a dedicated temp directory,
 | 
					 | 
				
			||||||
which can be configured:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The command:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
$ ./docspell-restserver*/bin/docspell-restserver -h
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gives an overview of supported options.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It is recommended to run joex with the G1GC enabled. If you use java8,
 | 
					 | 
				
			||||||
you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
 | 
					 | 
				
			||||||
this is not necessary (but doesn't hurt either). This could look like
 | 
					 | 
				
			||||||
this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Using these options you can define how much memory the JVM process is
 | 
					 | 
				
			||||||
able to use. This might be necessary to adopt depending on the usage
 | 
					 | 
				
			||||||
scenario and configured text analysis features.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Please have a look at the corresponding [section](@/docs/configure/_index.md#memory-usage).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Logging
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
By default, docspell logs to stdout. This works well, when managed by
 | 
					 | 
				
			||||||
systemd or other inits. Logging can be configured in the configuration
 | 
					 | 
				
			||||||
file or via environment variables. There are only two settings:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `minimum-level` specifies the log level to control the verbosity.
 | 
					 | 
				
			||||||
  Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
 | 
					 | 
				
			||||||
  *Error*
 | 
					 | 
				
			||||||
- `format` this defines how the logs are formatted. There are two
 | 
					 | 
				
			||||||
  formats for humans: *Plain* and *Fancy*. And two more suited for
 | 
					 | 
				
			||||||
  machine consumption: *Json* and *Logfmt*. The *Json* format contains
 | 
					 | 
				
			||||||
  all details, while the others may omit some for readability
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
These settings are the same for joex and the restserver component.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Default Config
 | 
					 | 
				
			||||||
## Rest Server
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{{ incl_conf(path="templates/shortcodes/server.conf") }}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Joex
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{{ incl_conf(path="templates/shortcodes/joex.conf") }}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Environment Variables
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Environment variables can be used when there is no config file
 | 
					 | 
				
			||||||
supplied. The listing below shows all possible variables and their
 | 
					 | 
				
			||||||
default values.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										39
									
								
								website/site/content/docs/configure/admin-endpoint.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								website/site/content/docs/configure/admin-endpoint.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Admin Endpoint"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 60
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Admin Endpoint
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The admin endpoint defines some [routes](@/docs/api/intro.md#admin)
 | 
				
			||||||
 | 
					for adminstration tasks. This is disabled by default and can be
 | 
				
			||||||
 | 
					enabled by providing a secret:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					...
 | 
				
			||||||
 | 
					  admin-endpoint {
 | 
				
			||||||
 | 
					    secret = "123"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This secret must be provided to all requests to a `/api/v1/admin/`
 | 
				
			||||||
 | 
					endpoint.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The most convenient way to execute admin tasks is to use the
 | 
				
			||||||
 | 
					[cli](@/docs/tools/cli.md). You get a list of possible admin commands
 | 
				
			||||||
 | 
					via `dsc admin help`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					To see the output of the commands, there are these ways:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. looking at the joex logs, which gives most details.
 | 
				
			||||||
 | 
					2. Use the job-queue page when logged in as `docspell-system`
 | 
				
			||||||
 | 
					3. setup a [webhook](@/docs/webapp/notification.md) to be notified
 | 
				
			||||||
 | 
					   when a job finishes. This way you get a small message.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					All admin tasks (and also some other system tasks) are run under the
 | 
				
			||||||
 | 
					account `docspell-system` (collective and user). You need to create
 | 
				
			||||||
 | 
					this account and setup the notification hooks in there - not in your
 | 
				
			||||||
 | 
					normal account.
 | 
				
			||||||
							
								
								
									
										124
									
								
								website/site/content/docs/configure/authentication.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								website/site/content/docs/configure/authentication.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,124 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Authentication"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 70
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Authentication
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Authentication works in two ways:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- with an account-name / password pair
 | 
				
			||||||
 | 
					- with an authentication token
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The initial authentication must occur with an accountname/password
 | 
				
			||||||
 | 
					pair. This will generate an authentication token which is valid for a
 | 
				
			||||||
 | 
					some time. Subsequent calls to secured routes can use this token. The
 | 
				
			||||||
 | 
					token can be given as a normal http header or via a cookie header.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These settings apply only to the REST server.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					docspell.server.auth {
 | 
				
			||||||
 | 
					  server-secret = "hex:caffee" # or "b64:Y2FmZmVlCg=="
 | 
				
			||||||
 | 
					  session-valid = "5 minutes"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `server-secret` is used to sign the token. If multiple REST
 | 
				
			||||||
 | 
					servers are deployed, all must share the same server secret. Otherwise
 | 
				
			||||||
 | 
					tokens from one instance are not valid on another instance. The secret
 | 
				
			||||||
 | 
					can be given as Base64 encoded string or in hex form. Use the prefix
 | 
				
			||||||
 | 
					`hex:` and `b64:`, respectively. If no prefix is given, the UTF8 bytes
 | 
				
			||||||
 | 
					of the string are used.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `session-valid` determines how long a token is valid. This can be
 | 
				
			||||||
 | 
					just some minutes, the web application obtains new ones
 | 
				
			||||||
 | 
					periodically. So a rather short time is recommended.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## OpenID Connect / OAuth2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can integrate Docspell into your SSO solution via [OpenID
 | 
				
			||||||
 | 
					Connect](https://openid.net/connect/) (OIDC). This requires to set up
 | 
				
			||||||
 | 
					an OpenID Provider (OP) somewhere and to configure Docspell
 | 
				
			||||||
 | 
					accordingly to act as the relying party.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can define multiple OPs to use. For some examples, please see the
 | 
				
			||||||
 | 
					[default configuration](@/docs/configure/main.md#default-config).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The configuration of a provider highly depends on how it is setup.
 | 
				
			||||||
 | 
					Here is an example for a setup using
 | 
				
			||||||
 | 
					[keycloak](https://www.keycloak.org):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` conf
 | 
				
			||||||
 | 
					provider = {
 | 
				
			||||||
 | 
					  provider-id = "keycloak",
 | 
				
			||||||
 | 
					  client-id = "docspell",
 | 
				
			||||||
 | 
					  client-secret = "example-secret-439e-bf06-911e4cdd56a6",
 | 
				
			||||||
 | 
					  scope = "profile", # scope is required for OIDC
 | 
				
			||||||
 | 
					  authorize-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/auth",
 | 
				
			||||||
 | 
					  token-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/token",
 | 
				
			||||||
 | 
					  #User URL is not used when signature key is set.
 | 
				
			||||||
 | 
					  #user-url = "http://localhost:8080/auth/realms/home/protocol/openid-connect/userinfo",
 | 
				
			||||||
 | 
					  sign-key = "b64:MII…ZYL09vAwLn8EAcSkCAwEAAQ==",
 | 
				
			||||||
 | 
					  sig-algo = "RS512"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `provider-id` is some identifier that is used in the URL to
 | 
				
			||||||
 | 
					distinguish between possibly multiple providers. The `client-id` and
 | 
				
			||||||
 | 
					`client-secret` define the two parameters required for a "confidential
 | 
				
			||||||
 | 
					client". The different URLs are best explained at the [keycloak
 | 
				
			||||||
 | 
					docs](https://www.keycloak.org/docs/latest/server_admin/).
 | 
				
			||||||
 | 
					They are available for all OPs in some way. The `user-url` is not
 | 
				
			||||||
 | 
					required, if the access token is already containing the necessary
 | 
				
			||||||
 | 
					data. If not, then docspell performs another request to the
 | 
				
			||||||
 | 
					`user-url`, which must be the user-info endpoint, to obtain the
 | 
				
			||||||
 | 
					required user data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the data is taken from the token directly and not via a request to
 | 
				
			||||||
 | 
					the user-info endpoint, then the token must be validated using the
 | 
				
			||||||
 | 
					given `sign-key` and `sig-algo`. These two values are then required to
 | 
				
			||||||
 | 
					specify! However, if the user-info endpoint should be used, then leave
 | 
				
			||||||
 | 
					the `sign-key` empty and specify the correct url in `user-url`. When
 | 
				
			||||||
 | 
					specifying the `sign-key` use a prefix of `b64:` if it is Base64
 | 
				
			||||||
 | 
					encoded or `hex:` if it is hex encoded. Otherwise the unicode bytes
 | 
				
			||||||
 | 
					are used, which is most probably not wanted for this setting.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Once the user is authenticated, docspell tries to setup an account and
 | 
				
			||||||
 | 
					does some checks. For this it must get to the username and collective
 | 
				
			||||||
 | 
					name somehow. How it does this, can be specified by the `user-key` and
 | 
				
			||||||
 | 
					`collective-key` settings:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` conf
 | 
				
			||||||
 | 
					# The collective of the user is given in the access token as
 | 
				
			||||||
 | 
					# property `docspell_collective`.
 | 
				
			||||||
 | 
					collective-key = "lookup:docspell_collective",
 | 
				
			||||||
 | 
					# The username to use for the docspell account
 | 
				
			||||||
 | 
					user-key = "preferred_username"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `user-key` is some string that is used to search the JSON response
 | 
				
			||||||
 | 
					from the OP for an object with that key. The search happens
 | 
				
			||||||
 | 
					recursively, so the field can be in a nested object. The found value
 | 
				
			||||||
 | 
					is used as the user name. Keycloak transmits the `preferred_username`
 | 
				
			||||||
 | 
					when asking for the `profile` scope. This can be used as the user
 | 
				
			||||||
 | 
					name.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The collective name can be obtained by different ways. For example,
 | 
				
			||||||
 | 
					you can instruct your OP (like keycloak) to provide a collective name
 | 
				
			||||||
 | 
					in the token and/or user-info responses. If you do this, then use the
 | 
				
			||||||
 | 
					`lookup:` prefix as in the example above. This instructs docspell to
 | 
				
			||||||
 | 
					search for a value the same way as the `user-key`. You can also set a
 | 
				
			||||||
 | 
					fixed collective, using `fixed:` prefix; in this case all users are in
 | 
				
			||||||
 | 
					the same collective! A third option is to prefix it with `account:` -
 | 
				
			||||||
 | 
					then the value that is looked up is interpreted as the full account
 | 
				
			||||||
 | 
					name, like `collective/user` and the `user-key` setting is ignored. If
 | 
				
			||||||
 | 
					you want to put each user in its own collective, you can just use the
 | 
				
			||||||
 | 
					same value as in `user-key`, only prefixed with `lookup:`. In the
 | 
				
			||||||
 | 
					example it would be `lookup:preferred_username`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you find that these methods do not suffice for your case, please
 | 
				
			||||||
 | 
					open an issue.
 | 
				
			||||||
							
								
								
									
										38
									
								
								website/site/content/docs/configure/baseurl.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								website/site/content/docs/configure/baseurl.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Base URL"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 90
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Baseurl
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The base url is an important setting that defines the http URL where
 | 
				
			||||||
 | 
					the corresponding component can be reached. It applies to both
 | 
				
			||||||
 | 
					components. For a joex component, the url must be resolvable from a
 | 
				
			||||||
 | 
					REST server component. The REST server also uses this url to create
 | 
				
			||||||
 | 
					absolute urls and to configure the authenication cookie.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default it is build using the information from the `bind` setting,
 | 
				
			||||||
 | 
					which is `http://localhost:7880`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the default is not changed, docspell will use the request to
 | 
				
			||||||
 | 
					determine the base-url. It first inspects the `X-Forwarded-For` header
 | 
				
			||||||
 | 
					that is often used with reverse proxies. If that is not present, the
 | 
				
			||||||
 | 
					`Host` header of the request is used. However, if the `base-url`
 | 
				
			||||||
 | 
					setting is changed, then only this setting is used.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					docspell.server.base-url = ...
 | 
				
			||||||
 | 
					docspell.joex.base-url = ...
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you are unsure, leave it at its default.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Examples
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					docspell.server.baseurl = "https://docspell.example.com"
 | 
				
			||||||
 | 
					docspell.joex.baseurl = "http://192.168.101.10"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
							
								
								
									
										27
									
								
								website/site/content/docs/configure/bind.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								website/site/content/docs/configure/bind.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,27 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Bind"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 12
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Bind
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The host and port the http server binds to. This applies to both
 | 
				
			||||||
 | 
					components. The joex component also exposes a small REST api to
 | 
				
			||||||
 | 
					inspect its state and notify the scheduler.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					docspell.server.bind {
 | 
				
			||||||
 | 
					  address = localhost
 | 
				
			||||||
 | 
					  port = 7880
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					docspell.joex.bind {
 | 
				
			||||||
 | 
					  address = localhost
 | 
				
			||||||
 | 
					  port = 7878
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default, it binds to `localhost` and some predefined port. This
 | 
				
			||||||
 | 
					must be changed, if components are on different machines.
 | 
				
			||||||
							
								
								
									
										71
									
								
								website/site/content/docs/configure/database.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								website/site/content/docs/configure/database.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,71 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Database"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Details about configuring the database."
 | 
				
			||||||
 | 
					weight = 20
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Database
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The database holds by default all the data and must be configured
 | 
				
			||||||
 | 
					exactly the same on all nodes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The following are supported DBs:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- PostgreSQL (recommended)
 | 
				
			||||||
 | 
					- MariaDB
 | 
				
			||||||
 | 
					- H2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This has to be specified for the rest server and joex. By default, a
 | 
				
			||||||
 | 
					H2 database in the current `/tmp` directory is configured.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The config looks like this (both components):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					docspell.joex.jdbc {
 | 
				
			||||||
 | 
					  url = ...
 | 
				
			||||||
 | 
					  user = ...
 | 
				
			||||||
 | 
					  password = ...
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					docspell.server.backend.jdbc {
 | 
				
			||||||
 | 
					  url = ...
 | 
				
			||||||
 | 
					  user = ...
 | 
				
			||||||
 | 
					  password = ...
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `url` is the connection to the database. It must start with
 | 
				
			||||||
 | 
					`jdbc`, followed by name of the database. The rest is specific to the
 | 
				
			||||||
 | 
					database used: it is either a path to a file for H2 or a host/database
 | 
				
			||||||
 | 
					url for MariaDB and PostgreSQL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When using H2, the user and password can be chosen freely on first
 | 
				
			||||||
 | 
					start, but must stay the same on subsequent starts. Usually, the user
 | 
				
			||||||
 | 
					is `sa` and the password is left empty. Additionally, the url must
 | 
				
			||||||
 | 
					include these options:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Examples
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PostgreSQL:
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					url = "jdbc:postgresql://localhost:5432/docspelldb"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MariaDB:
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					url = "jdbc:mariadb://localhost:3306/docspelldb"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					H2
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
							
								
								
									
										147
									
								
								website/site/content/docs/configure/file-backends.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								website/site/content/docs/configure/file-backends.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,147 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "File Backends"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 30
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## File Backends
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Docspell allows to choose from different storage backends for binary
 | 
				
			||||||
 | 
					files. You can choose between:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. *Database (the recommended default)*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   The database can be used to store the files as well. It is the
 | 
				
			||||||
 | 
					   default. It doesn't require any other configuration and works well
 | 
				
			||||||
 | 
					   with multiple instances of restservers and joex nodes.
 | 
				
			||||||
 | 
					2. *S3*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   The S3 backend allows to store files in an S3 compatible storage.
 | 
				
			||||||
 | 
					   It was tested with MinIO, which is possible to self host.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. *Filesystem*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   The filesystem can also be used directly, by specifying a
 | 
				
			||||||
 | 
					   directory. Be aware that _all_ nodes must have read and write
 | 
				
			||||||
 | 
					   access into this directory! When running multiple nodes over a
 | 
				
			||||||
 | 
					   network, consider using one of the above instead. Docspell uses a
 | 
				
			||||||
 | 
					   fixed structure for storing the files below the given directory, it
 | 
				
			||||||
 | 
					   cannot be configured.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When using S3 or filesystem, remember to backup the database *and* the
 | 
				
			||||||
 | 
					files!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note that Docspell not only stores the file that are uploaded, but
 | 
				
			||||||
 | 
					also some other files for internal use.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Configuring
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{% warningbubble(title="Note") %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Each node must have the same config for its file backend! When using
 | 
				
			||||||
 | 
					the filesystem, make sure all processes can access the directory with
 | 
				
			||||||
 | 
					read and write permissions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{% end %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The file storage backend can be configured inside the `files` section
 | 
				
			||||||
 | 
					(see the [default configs](@/docs/configure/main.md#default-config)):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```conf
 | 
				
			||||||
 | 
					files {
 | 
				
			||||||
 | 
					  …
 | 
				
			||||||
 | 
					  default-store = "database"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  stores = {
 | 
				
			||||||
 | 
					    database =
 | 
				
			||||||
 | 
					      { enabled = true
 | 
				
			||||||
 | 
					        type = "default-database"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    filesystem =
 | 
				
			||||||
 | 
					      { enabled = false
 | 
				
			||||||
 | 
					        type = "file-system"
 | 
				
			||||||
 | 
					        directory = "/some/directory"
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    minio =
 | 
				
			||||||
 | 
					     { enabled = false
 | 
				
			||||||
 | 
					       type = "s3"
 | 
				
			||||||
 | 
					       endpoint = "http://localhost:9000"
 | 
				
			||||||
 | 
					       access-key = "username"
 | 
				
			||||||
 | 
					       secret-key = "password"
 | 
				
			||||||
 | 
					       bucket = "docspell"
 | 
				
			||||||
 | 
					     }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `stores` object defines a set of stores and the `default-store`
 | 
				
			||||||
 | 
					selects the one that should be used. All disabled store configurations
 | 
				
			||||||
 | 
					are removed from the list. Thus the `default-store` must be enabled.
 | 
				
			||||||
 | 
					Other enabled stores can be used as the target when copying files (see
 | 
				
			||||||
 | 
					below).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A store configuration requires a `enabled` and `type` property.
 | 
				
			||||||
 | 
					Depending on the `type` property, other properties are required, they
 | 
				
			||||||
 | 
					are presented above. The available storage types are
 | 
				
			||||||
 | 
					`default-database`, `file-system` and `s3`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you use the docker setup, you can find the corresponding
 | 
				
			||||||
 | 
					environment variables to the above config snippet
 | 
				
			||||||
 | 
					[below](#environment-variables).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Change Backends
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It is possible to change backends with a bit of manual effort. When
 | 
				
			||||||
 | 
					doing this, please make sure that the application is not used. It is
 | 
				
			||||||
 | 
					important that no file is uploaded during the following steps.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The [cli](@/docs/tools/cli.md) will be used, please set it up first
 | 
				
			||||||
 | 
					and you need to enable the [admin endpoint](#admin-endpoint). Config
 | 
				
			||||||
 | 
					changes mentioned here must be applied to all nodes - joex and
 | 
				
			||||||
 | 
					restserver!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. In the config, enable a second file backend (besides the default)
 | 
				
			||||||
 | 
					   you want to change to and start docspell as normal. Don't change
 | 
				
			||||||
 | 
					   `default-store` yet.
 | 
				
			||||||
 | 
					2. Run the file integrity check in order to see whether all files are
 | 
				
			||||||
 | 
					   ok as they are in the current store. This can be done using the
 | 
				
			||||||
 | 
					   [cli](@/docs/tools/cli.md) by running:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ```bash
 | 
				
			||||||
 | 
					   dsc admin file-integrity-check
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					3. Run the copy files admin command which will copy all files from the
 | 
				
			||||||
 | 
					   current `default-store` to all other enabled stores.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ```bash
 | 
				
			||||||
 | 
					   dsc admin clone-file-repository
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   And wait until it's done :-). You can see the progress in the jobs
 | 
				
			||||||
 | 
					   page when logged in as `docspell-system` or just look at the logs.
 | 
				
			||||||
 | 
					4. In the config, change the `default-store` to the one you just
 | 
				
			||||||
 | 
					   copied all the files to and restart docspell.
 | 
				
			||||||
 | 
					5. Login and do some smoke tests. Then run the file integrity check
 | 
				
			||||||
 | 
					   again:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ```bash
 | 
				
			||||||
 | 
					   dsc admin file-integrity-check
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If all is fine, then you are done and are now using the new file
 | 
				
			||||||
 | 
					backend. If the second integrity check fails, please open an issue.
 | 
				
			||||||
 | 
					You need then to revert the config change of step 4 to use the
 | 
				
			||||||
 | 
					previous `default-store` again.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you want to delete the files from the database, you can do so by
 | 
				
			||||||
 | 
					running the following SQL against the database:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```sql
 | 
				
			||||||
 | 
					DELETE FROM filechunk
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can copy them back into the database using the steps above.
 | 
				
			||||||
							
								
								
									
										122
									
								
								website/site/content/docs/configure/file-processing.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								website/site/content/docs/configure/file-processing.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,122 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "File Processing"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 40
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## File Processing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Files are being processed by the joex component. So all the respective
 | 
				
			||||||
 | 
					configuration is in this config only.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					File processing involves several stages, detailed information can be
 | 
				
			||||||
 | 
					found [here](@/docs/joex/file-processing.md#text-analysis) and in the
 | 
				
			||||||
 | 
					corresponding sections in [joex default
 | 
				
			||||||
 | 
					config](@/docs/configure/main.md#joex).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Configuration allows to define the external tools and set some
 | 
				
			||||||
 | 
					limitations to control memory usage. The sections are:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `docspell.joex.extraction`
 | 
				
			||||||
 | 
					- `docspell.joex.text-analysis`
 | 
				
			||||||
 | 
					- `docspell.joex.convert`
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Options to external commands can use variables that are replaced by
 | 
				
			||||||
 | 
					values at runtime. Variables are enclosed in double braces `{{…}}`.
 | 
				
			||||||
 | 
					Please see the default configuration for what variables exist per
 | 
				
			||||||
 | 
					command.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Classification
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In `text-analysis.classification` you can define how many documents at
 | 
				
			||||||
 | 
					most should be used for learning. The default settings should work
 | 
				
			||||||
 | 
					well for most cases. However, it always depends on the amount of data
 | 
				
			||||||
 | 
					and the machine that runs joex. For example, by default the documents
 | 
				
			||||||
 | 
					to learn from are limited to 600 (`classification.item-count`) and
 | 
				
			||||||
 | 
					every text is cut after 5000 characters (`text-analysis.max-length`).
 | 
				
			||||||
 | 
					This is fine if *most* of your documents are small and only a few are
 | 
				
			||||||
 | 
					near 5000 characters). But if *all* your documents are very large, you
 | 
				
			||||||
 | 
					probably need to either assign more heap memory or go down with the
 | 
				
			||||||
 | 
					limits.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Classification can be disabled, too, for when it's not needed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### NLP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This setting defines which NLP mode to use. It defaults to `full`,
 | 
				
			||||||
 | 
					which requires more memory for certain languages (with the advantage
 | 
				
			||||||
 | 
					of better results). Other values are `basic`, `regexonly` and
 | 
				
			||||||
 | 
					`disabled`. The modes `full` and `basic` use pre-defined lanugage
 | 
				
			||||||
 | 
					models for procesing documents of languaes German, English, French and
 | 
				
			||||||
 | 
					Spanish. These require some amount of memory (see below).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The mode `basic` is like the "light" variant to `full`. It doesn't use
 | 
				
			||||||
 | 
					all NLP features, which makes memory consumption much lower, but comes
 | 
				
			||||||
 | 
					with the compromise of less accurate results.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The mode `regexonly` doesn't use pre-defined lanuage models, even if
 | 
				
			||||||
 | 
					available. It checks your address book against a document to find
 | 
				
			||||||
 | 
					metadata. That means, it is language independent. Also, when using
 | 
				
			||||||
 | 
					`full` or `basic` with lanugages where no pre-defined models exist, it
 | 
				
			||||||
 | 
					will degrade to `regexonly` for these.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The mode `disabled` skips NLP processing completely. This has least
 | 
				
			||||||
 | 
					impact in memory consumption, obviously, but then only the classifier
 | 
				
			||||||
 | 
					is used to find metadata (unless it is disabled, too).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You might want to try different modes and see what combination suits
 | 
				
			||||||
 | 
					best your usage pattern and machine running joex. If a powerful
 | 
				
			||||||
 | 
					machine is used, simply leave the defaults. When running on an
 | 
				
			||||||
 | 
					raspberry pi, for example, you might need to adjust things.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Memory Usage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The memory requirements for the joex component depends on the document
 | 
				
			||||||
 | 
					language and the enabled features for text-analysis. The `nlp.mode`
 | 
				
			||||||
 | 
					setting has significant impact, especially when your documents are in
 | 
				
			||||||
 | 
					German. Here are some rough numbers on jvm heap usage (the same file
 | 
				
			||||||
 | 
					was used for all tries):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					<table class="striped-basic">
 | 
				
			||||||
 | 
					<thead>
 | 
				
			||||||
 | 
					  <tr>
 | 
				
			||||||
 | 
					     <th>nlp.mode</th>
 | 
				
			||||||
 | 
					     <th>English</th>
 | 
				
			||||||
 | 
					     <th>German</th>
 | 
				
			||||||
 | 
					     <th>French</th>
 | 
				
			||||||
 | 
					 </tr>
 | 
				
			||||||
 | 
					</thead>
 | 
				
			||||||
 | 
					<tfoot>
 | 
				
			||||||
 | 
					</tfoot>
 | 
				
			||||||
 | 
					<tbody>
 | 
				
			||||||
 | 
					  <tr><td>full</td><td>420M</td><td>950M</td><td>490M</td></tr>
 | 
				
			||||||
 | 
					  <tr><td>basic</td><td>170M</td><td>380M</td><td>390M</td></tr>
 | 
				
			||||||
 | 
					</tbody>
 | 
				
			||||||
 | 
					</table>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note that these are only rough numbers and they show the maximum used
 | 
				
			||||||
 | 
					heap memory while processing a file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When using `mode=full`, a heap setting of at least `-Xmx1400M` is
 | 
				
			||||||
 | 
					recommended. For `mode=basic` a heap setting of at least `-Xmx500M` is
 | 
				
			||||||
 | 
					recommended.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Other languages can't use these two modes, and so don't require this
 | 
				
			||||||
 | 
					amount of memory (but don't have as good results). Then you can go
 | 
				
			||||||
 | 
					with less heap. For these languages, the nlp mode is the same as
 | 
				
			||||||
 | 
					`regexonly`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Training the classifier is also memory intensive, which solely depends
 | 
				
			||||||
 | 
					on the size and number of documents that are being trained. However,
 | 
				
			||||||
 | 
					training the classifier is done periodically and can happen maybe
 | 
				
			||||||
 | 
					every two weeks. When classifying new documents, memory requirements
 | 
				
			||||||
 | 
					are lower, since the model already exists.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					More details about these modes can be found
 | 
				
			||||||
 | 
					[here](@/docs/joex/file-processing.md#text-analysis).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The restserver component is very lightweight, here you can use
 | 
				
			||||||
 | 
					defaults.
 | 
				
			||||||
							
								
								
									
										176
									
								
								website/site/content/docs/configure/fulltext-search.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										176
									
								
								website/site/content/docs/configure/fulltext-search.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,176 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Full-Text Search"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Details about configuring the fulltext search."
 | 
				
			||||||
 | 
					weight = 50
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Full-Text Search
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fulltext search is optional and provided by external systems. There
 | 
				
			||||||
 | 
					are currently [Apache SOLR](https://solr.apache.org) and [PostgreSQL's
 | 
				
			||||||
 | 
					text search](https://www.postgresql.org/docs/14/textsearch.html)
 | 
				
			||||||
 | 
					available.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can enable and configure the fulltext search backends as described
 | 
				
			||||||
 | 
					below and then choose the backend:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```conf
 | 
				
			||||||
 | 
					full-text-search {
 | 
				
			||||||
 | 
					  enabled = true
 | 
				
			||||||
 | 
					  # Which backend to use, either solr or postgresql
 | 
				
			||||||
 | 
					  backend = "solr"
 | 
				
			||||||
 | 
					  …
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					All docspell components must provide the same fulltext search
 | 
				
			||||||
 | 
					configuration.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## SOLR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[Apache SOLR](https://solr.apache.org) can be used to provide the
 | 
				
			||||||
 | 
					full-text search. This is defined in the `full-text-search.solr`
 | 
				
			||||||
 | 
					subsection:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					...
 | 
				
			||||||
 | 
					  full-text-search {
 | 
				
			||||||
 | 
					    ...
 | 
				
			||||||
 | 
					    solr = {
 | 
				
			||||||
 | 
					      url = "http://localhost:8983/solr/docspell"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The default configuration at the end of this page contains more
 | 
				
			||||||
 | 
					information about each setting.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `solr.url` is the mandatory setting that you need to change to
 | 
				
			||||||
 | 
					point to your SOLR instance. Then you need to set the `enabled` flag
 | 
				
			||||||
 | 
					to `true`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When installing docspell manually, just install solr and create a core
 | 
				
			||||||
 | 
					as described in the [solr
 | 
				
			||||||
 | 
					documentation](https://solr.apache.org/guide/8_4/installing-solr.html).
 | 
				
			||||||
 | 
					That will provide you with the connection url (the last part is the
 | 
				
			||||||
 | 
					core name). If Docspell detects an empty core it will run a schema
 | 
				
			||||||
 | 
					setup on start automatically.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `full-text-search.solr` options are the same for joex and the
 | 
				
			||||||
 | 
					restserver.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Sometimes it is necessary to re-create the entire index, for example
 | 
				
			||||||
 | 
					if you upgrade SOLR or delete the core to provide a new one (see
 | 
				
			||||||
 | 
					[here](https://solr.apache.org/guide/8_4/reindexing.html) for
 | 
				
			||||||
 | 
					details). Another way is to restart docspell (while clearing the
 | 
				
			||||||
 | 
					index). If docspell detects an empty index at startup, it will submit
 | 
				
			||||||
 | 
					a task to build the index automatically.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Note that a collective can also re-index their data using a similiar
 | 
				
			||||||
 | 
					endpoint; but this is only deleting their data and doesn't do a full
 | 
				
			||||||
 | 
					re-index.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The solr index doesn't contain any new information, it can be
 | 
				
			||||||
 | 
					regenerated any time using the above REST call. Thus it doesn't need
 | 
				
			||||||
 | 
					to be backed up.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## PostgreSQL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PostgreSQL provides many additional features, one of them is [text
 | 
				
			||||||
 | 
					search](https://www.postgresql.org/docs/14/textsearch.html). Docspell
 | 
				
			||||||
 | 
					can utilize this to provide the fulltext search feature. This is
 | 
				
			||||||
 | 
					especially useful, if PostgreSQL is used as the primary database for
 | 
				
			||||||
 | 
					docspell.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can choose to use the same database or separate connection. The
 | 
				
			||||||
 | 
					fulltext search will create a single table `ftspsql_search` that holds
 | 
				
			||||||
 | 
					all necessary data. When doing backups, you can exclude this table as
 | 
				
			||||||
 | 
					it can be recreated from the primary data any time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The configuration is placed inside `full-text-search`:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```conf
 | 
				
			||||||
 | 
					full-text-search {
 | 
				
			||||||
 | 
					  …
 | 
				
			||||||
 | 
					  postgresql = {
 | 
				
			||||||
 | 
					    use-default-connection = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    jdbc {
 | 
				
			||||||
 | 
					      url = "jdbc:postgresql://server:5432/db"
 | 
				
			||||||
 | 
					      user = "pguser"
 | 
				
			||||||
 | 
					      password = ""
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pg-config = {
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    pg-query-parser = "websearch_to_tsquery"
 | 
				
			||||||
 | 
					    pg-rank-normalization = [ 4 ]
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The flag `use-default-connection` can be set to `true` if you use
 | 
				
			||||||
 | 
					PostgreSQL as the primary db to have it also used for the fulltext
 | 
				
			||||||
 | 
					search. If set to `false`, the subsequent `jdbc` block defines the
 | 
				
			||||||
 | 
					connection to the postgres database to use.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It follows some settings to tune PostgreSQL's text search feature.
 | 
				
			||||||
 | 
					Please visit [their
 | 
				
			||||||
 | 
					documentation](https://www.postgresql.org/docs/14/textsearch.html) for
 | 
				
			||||||
 | 
					all the details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `pg-config`: this is an optional mapping from document languages as
 | 
				
			||||||
 | 
					  used in Docspell to a PostgreSQL text search configuration. Not all
 | 
				
			||||||
 | 
					  languages are equally well supported out of the box. You can create
 | 
				
			||||||
 | 
					  your own text search config in PostgreSQL and then define it in this
 | 
				
			||||||
 | 
					  map for your language. For example:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ```conf
 | 
				
			||||||
 | 
					  pg-config = {
 | 
				
			||||||
 | 
					    english = "my-english"
 | 
				
			||||||
 | 
					    german = "my-german"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  By default, the predefined configs are used for some lanugages and
 | 
				
			||||||
 | 
					  otherwise fallback to `simple`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  *If you change this setting, you must re-index everything.*
 | 
				
			||||||
 | 
					- `pg-query-parser`: the parser applied to the fulltext query. By
 | 
				
			||||||
 | 
					  default it is `websearch_to_tsquery`. (relevant [doc
 | 
				
			||||||
 | 
					  link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES))
 | 
				
			||||||
 | 
					- `pg-rank-normalization`: this is used to tweak rank calculation that
 | 
				
			||||||
 | 
					  affects the order of the elements returned from a query. It is an
 | 
				
			||||||
 | 
					  array of numbers out of `1`, `2`, `4`, `8`, `16` or `32`. (relevant
 | 
				
			||||||
 | 
					  [doc
 | 
				
			||||||
 | 
					  link](https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Re-create the index
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					There is an [admin route](@/docs/api/intro.md#admin) that allows to
 | 
				
			||||||
 | 
					re-create the entire index (for all collectives). This is possible via
 | 
				
			||||||
 | 
					a call:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					$ curl -XPOST -H "Docspell-Admin-Secret: test123" http://localhost:7880/api/v1/admin/fts/reIndexAll
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					or use the [cli](@/docs/tools/cli.md):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```bash
 | 
				
			||||||
 | 
					dsc admin -a test123 recreate-index
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Here the `test123` is the key defined with `admin-endpoint.secret`. If
 | 
				
			||||||
 | 
					it is empty (the default), this call is disabled (all admin routes).
 | 
				
			||||||
 | 
					Otherwise, the POST request will submit a system task that is executed
 | 
				
			||||||
 | 
					by a joex instance eventually.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Using this endpoint, the entire index (including the schema) will be
 | 
				
			||||||
 | 
					re-created.
 | 
				
			||||||
							
								
								
									
										192
									
								
								website/site/content/docs/configure/main.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								website/site/content/docs/configure/main.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,192 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Main"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 10
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Configuration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Docspell's executables (restserver and joex) can take one argument – a
 | 
				
			||||||
 | 
					configuration file. If that is not given, the defaults are used,
 | 
				
			||||||
 | 
					overriden by environment variables. A config file overrides default
 | 
				
			||||||
 | 
					values, so only values that differ from the defaults are necessary.
 | 
				
			||||||
 | 
					The complete default options and their documentation is at the end of
 | 
				
			||||||
 | 
					this page.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Besides the config file, another way is to provide individual settings
 | 
				
			||||||
 | 
					via key-value pairs to the executable by the `-D` option. For example
 | 
				
			||||||
 | 
					to override only `base-url` you could add the argument
 | 
				
			||||||
 | 
					`-Ddocspell.server.base-url=…` to the command. Multiple options are
 | 
				
			||||||
 | 
					possible. For more than few values this is very tedious, obviously, so
 | 
				
			||||||
 | 
					the recommended way is to maintain a config file. If these options
 | 
				
			||||||
 | 
					*and* a file is provded, then any setting given via the `-D…` option
 | 
				
			||||||
 | 
					overrides the same setting from the config file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					At last, it is possible to configure docspell via environment
 | 
				
			||||||
 | 
					variables if there is no config file supplied (if a config file *is*
 | 
				
			||||||
 | 
					supplied, it is always preferred). Note that this approach is limited,
 | 
				
			||||||
 | 
					as arrays are not supported. A list of environment variables can be
 | 
				
			||||||
 | 
					found at the [end of this page](#environment-variables). The
 | 
				
			||||||
 | 
					environment variable name follows the corresponding config key - where
 | 
				
			||||||
 | 
					dots are replaced by underscores and dashes are replaced by two
 | 
				
			||||||
 | 
					underscores. For example, the config key `docspell.server.app-name`
 | 
				
			||||||
 | 
					can be defined as env variable `DOCSPELL_SERVER_APP__NAME`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It is also possible to specify environment variables inside a config
 | 
				
			||||||
 | 
					file (to get a mix of both) - please see the [documentation of the
 | 
				
			||||||
 | 
					config library](https://github.com/lightbend/config#standard-behavior)
 | 
				
			||||||
 | 
					for more on this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# File Format
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The format of the configuration files can be
 | 
				
			||||||
 | 
					[HOCON](https://github.com/lightbend/config/blob/master/HOCON.md#hocon-human-optimized-config-object-notation),
 | 
				
			||||||
 | 
					JSON or what this [config
 | 
				
			||||||
 | 
					library](https://github.com/lightbend/config) understands. The default
 | 
				
			||||||
 | 
					values below are in HOCON format, which is recommended, since it
 | 
				
			||||||
 | 
					allows comments and has some [advanced
 | 
				
			||||||
 | 
					features](https://github.com/lightbend/config#features-of-hocon).
 | 
				
			||||||
 | 
					Please also see their documentation for more details.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A short description (please check the links for better understanding):
 | 
				
			||||||
 | 
					The config consists of key-value pairs and can be written in a
 | 
				
			||||||
 | 
					JSON-like format (called HOCON). Keys are organized in trees, and a
 | 
				
			||||||
 | 
					key defines a full path into the tree. There are two ways:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					a.b.c.d=15
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					or
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					a {
 | 
				
			||||||
 | 
					  b {
 | 
				
			||||||
 | 
					    c {
 | 
				
			||||||
 | 
					      d = 15
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Both are exactly the same and these forms are both used at the same
 | 
				
			||||||
 | 
					time. Usually the braces approach is used to group some more settings,
 | 
				
			||||||
 | 
					for better readability.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Strings that contain "not-so-common" characters should be enclosed in
 | 
				
			||||||
 | 
					quotes. It is possible to define values at the top of the file and
 | 
				
			||||||
 | 
					reuse them on different locations via the `${full.path.to.key}`
 | 
				
			||||||
 | 
					syntax. When using these variables, they *must not* be enclosed in
 | 
				
			||||||
 | 
					quotes.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Config Options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The configuration of both components uses separate namespaces. The
 | 
				
			||||||
 | 
					configuration for the REST server is below `docspell.server`, while
 | 
				
			||||||
 | 
					the one for joex is below `docspell.joex`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can therefore use two separate config files or one single file
 | 
				
			||||||
 | 
					containing both namespaces.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## App-id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The `app-id` is the identifier of the corresponding instance. It *must
 | 
				
			||||||
 | 
					be unique* for all instances. By default the REST server uses `rest1`
 | 
				
			||||||
 | 
					and joex `joex1`. It is recommended to overwrite this setting to have
 | 
				
			||||||
 | 
					an explicit and stable identifier should multiple instances are
 | 
				
			||||||
 | 
					intended.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					docspell.server.app-id = "rest1"
 | 
				
			||||||
 | 
					docspell.joex.app-id = "joex1"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Other options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Please see the menu on the left for details about specific
 | 
				
			||||||
 | 
					configuration options.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# JVM Options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The start scripts support some options to configure the JVM. One often
 | 
				
			||||||
 | 
					used setting is the maximum heap size of the JVM. By default, java
 | 
				
			||||||
 | 
					determines it based on properties of the current machine. You can
 | 
				
			||||||
 | 
					specify it by given java startup options to the command:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -- /path/to/server-config.conf
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This would limit the maximum heap to 1GB. The double slash separates
 | 
				
			||||||
 | 
					internal options and the arguments to the program. Another frequently
 | 
				
			||||||
 | 
					used option is to change the default temp directory. Usually it is
 | 
				
			||||||
 | 
					`/tmp`, but it may be desired to have a dedicated temp directory,
 | 
				
			||||||
 | 
					which can be configured:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					$ ./docspell-restserver*/bin/docspell-restserver -J-Xmx1G -Djava.io.tmpdir=/path/to/othertemp -- /path/to/server-config.conf
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The command:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					$ ./docspell-restserver*/bin/docspell-restserver -h
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					gives an overview of supported options.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					It is recommended to run joex with the G1GC enabled. If you use java8,
 | 
				
			||||||
 | 
					you need to add an option to use G1GC (`-XX:+UseG1GC`), for java11
 | 
				
			||||||
 | 
					this is not necessary (but doesn't hurt either). This could look like
 | 
				
			||||||
 | 
					this:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					./docspell-joex-{{version()}}/bin/docspell-joex -J-Xmx1596M -J-XX:+UseG1GC -- /path/to/joex.conf
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Using these options you can define how much memory the JVM process is
 | 
				
			||||||
 | 
					able to use. This might be necessary to adopt depending on the usage
 | 
				
			||||||
 | 
					scenario and configured text analysis features.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Please have a look at the corresponding
 | 
				
			||||||
 | 
					[section](@/docs/configure/file-processing.md#memory-usage).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By default, docspell logs to stdout. This works well, when managed by
 | 
				
			||||||
 | 
					systemd or other inits. Logging can be configured in the configuration
 | 
				
			||||||
 | 
					file or via environment variables. There are only two settings:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- `minimum-level` specifies the log level to control the verbosity.
 | 
				
			||||||
 | 
					  Levels are ordered from: *Trace*, *Debug*, *Info*, *Warn* and
 | 
				
			||||||
 | 
					  *Error*
 | 
				
			||||||
 | 
					- `format` this defines how the logs are formatted. There are two
 | 
				
			||||||
 | 
					  formats for humans: *Plain* and *Fancy*. And two more suited for
 | 
				
			||||||
 | 
					  machine consumption: *Json* and *Logfmt*. The *Json* format contains
 | 
				
			||||||
 | 
					  all details, while the others may omit some for readability
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These settings are the same for joex and the restserver component.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Default Config
 | 
				
			||||||
 | 
					## Rest Server
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{{ incl_conf(path="templates/shortcodes/server.conf") }}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Joex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{{ incl_conf(path="templates/shortcodes/joex.conf") }}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Environment Variables
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Environment variables can be used when there is no config file
 | 
				
			||||||
 | 
					supplied. The listing below shows all possible variables and their
 | 
				
			||||||
 | 
					default values.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{{ incl_conf(path="templates/shortcodes/config.env.txt") }}
 | 
				
			||||||
							
								
								
									
										44
									
								
								website/site/content/docs/configure/registration.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								website/site/content/docs/configure/registration.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					+++
 | 
				
			||||||
 | 
					title = "Registration"
 | 
				
			||||||
 | 
					insert_anchor_links = "right"
 | 
				
			||||||
 | 
					description = "Describes the configuration file and shows all default settings."
 | 
				
			||||||
 | 
					weight = 80
 | 
				
			||||||
 | 
					template = "docs.html"
 | 
				
			||||||
 | 
					+++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Registration Options
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This defines if and how new users can create accounts. There are 3
 | 
				
			||||||
 | 
					options:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- *closed* no new user can sign up
 | 
				
			||||||
 | 
					- *open* new users can sign up
 | 
				
			||||||
 | 
					- *invite* new users can sign up but require an invitation key
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This applies only to the REST sevrer component.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					docspell.server.backend.signup {
 | 
				
			||||||
 | 
					  mode = "open"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # If mode == 'invite', a password must be provided to generate
 | 
				
			||||||
 | 
					  # invitation keys. It must not be empty.
 | 
				
			||||||
 | 
					  new-invite-password = ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # If mode == 'invite', this is the period an invitation token is
 | 
				
			||||||
 | 
					  # considered valid.
 | 
				
			||||||
 | 
					  invite-time = "3 days"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The mode `invite` is intended to open the application only to some
 | 
				
			||||||
 | 
					users. The admin can create these invitation keys and distribute them
 | 
				
			||||||
 | 
					to the desired people. For this, the `new-invite-password` must be
 | 
				
			||||||
 | 
					given. The idea is that only the person who installs docspell knows
 | 
				
			||||||
 | 
					this. If it is not set, then invitation won't work. New invitation
 | 
				
			||||||
 | 
					keys can be generated from within the web application or via REST
 | 
				
			||||||
 | 
					calls (using `curl`, for example).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					``` bash
 | 
				
			||||||
 | 
					curl -X POST -d '{"password":"blabla"}' "http://localhost:7880/api/v1/open/signup/newinvite"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
@@ -14,7 +14,8 @@ template = "docs.html"
 | 
				
			|||||||
- Handle multiple documents as one unit
 | 
					- Handle multiple documents as one unit
 | 
				
			||||||
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
 | 
					- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
 | 
				
			||||||
- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
 | 
					- [Full-Text Search](@/docs/webapp/finding.md#full-text-search) based
 | 
				
			||||||
  on [Apache SOLR](https://solr.apache.org)
 | 
					  on [Apache SOLR](https://solr.apache.org) or [PostgreSQL's text
 | 
				
			||||||
 | 
					  search](https://www.postgresql.org/docs/14/textsearch.html)
 | 
				
			||||||
- Conversion to PDF: all files are converted into a PDF file. PDFs
 | 
					- Conversion to PDF: all files are converted into a PDF file. PDFs
 | 
				
			||||||
  with only images (as often returned from scanners) are converted
 | 
					  with only images (as often returned from scanners) are converted
 | 
				
			||||||
  into searchable PDF/A pdfs.
 | 
					  into searchable PDF/A pdfs.
 | 
				
			||||||
@@ -36,7 +37,8 @@ template = "docs.html"
 | 
				
			|||||||
  [REST Api](@/docs/api/_index.md); allows to [generate
 | 
					  [REST Api](@/docs/api/_index.md); allows to [generate
 | 
				
			||||||
  clients](https://openapi-generator.tech/docs/generators) for many
 | 
					  clients](https://openapi-generator.tech/docs/generators) for many
 | 
				
			||||||
  languages
 | 
					  languages
 | 
				
			||||||
- [OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2)
 | 
					- [OpenID
 | 
				
			||||||
 | 
					  Connect](@/docs/configure/authentication.md#openid-connect-oauth2)
 | 
				
			||||||
  support allows Docspell to integrate into your SSO setup, for
 | 
					  support allows Docspell to integrate into your SSO setup, for
 | 
				
			||||||
  example with keycloak.
 | 
					  example with keycloak.
 | 
				
			||||||
- Two-Factor Authentication using [TOTP](@/docs/webapp/totp.md) built
 | 
					- Two-Factor Authentication using [TOTP](@/docs/webapp/totp.md) built
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -74,9 +74,10 @@ $ ./docspell-joex*/bin/docspell-joex
 | 
				
			|||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This will startup both components using the default configuration.
 | 
					This will startup both components using the default configuration.
 | 
				
			||||||
Please refer to the [configuration page](@/docs/configure/_index.md)
 | 
					Please refer to the [configuration
 | 
				
			||||||
for how to create a custom config file. Once you have your config
 | 
					page](@/docs/configure/main.md) for how to create a custom
 | 
				
			||||||
file, simply pass it as argument to the command:
 | 
					config file. Once you have your config file, simply pass it as
 | 
				
			||||||
 | 
					argument to the command:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
 | 
					$ ./docspell-restserver*/bin/docspell-restserver /path/to/server-config.conf
 | 
				
			||||||
@@ -110,7 +111,7 @@ Fulltext search is powered by [SOLR](https://solr.apache.org). You
 | 
				
			|||||||
need to install solr and create a core for docspell. Then cange the
 | 
					need to install solr and create a core for docspell. Then cange the
 | 
				
			||||||
solr url for both components (restserver and joex) accordingly. See
 | 
					solr url for both components (restserver and joex) accordingly. See
 | 
				
			||||||
the relevant section in the [config
 | 
					the relevant section in the [config
 | 
				
			||||||
page](@/docs/configure/_index.md#full-text-search-solr).
 | 
					page](@/docs/configure/fulltext-search.md).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Watching a directory
 | 
					### Watching a directory
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -102,7 +102,7 @@ When using H2, make sure that all components access the same database
 | 
				
			|||||||
– the jdbc url must point to the same file. Then, it is important to
 | 
					– the jdbc url must point to the same file. Then, it is important to
 | 
				
			||||||
add the options
 | 
					add the options
 | 
				
			||||||
`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
 | 
					`;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE` at the end
 | 
				
			||||||
of the url. See the [config page](@/docs/configure/_index.md#jdbc) for
 | 
					of the url. See the [config page](@/docs/configure/database.md) for
 | 
				
			||||||
an example.
 | 
					an example.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For large installations, PostgreSQL or MariaDB is recommended. Create
 | 
					For large installations, PostgreSQL or MariaDB is recommended. Create
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,10 +30,10 @@ result in long processing times for OCR and text analysis. The board
 | 
				
			|||||||
should provide 4G of RAM (like the current RPi4), especially if also a
 | 
					should provide 4G of RAM (like the current RPi4), especially if also a
 | 
				
			||||||
database and solr are running next to it. The memory required by joex
 | 
					database and solr are running next to it. The memory required by joex
 | 
				
			||||||
depends on the config and document language. Please pick a value that
 | 
					depends on the config and document language. Please pick a value that
 | 
				
			||||||
suits your setup from [here](@/docs/configure/_index.md#memory-usage).
 | 
					suits your setup from
 | 
				
			||||||
For boards like the RPi, it might be necessary to use
 | 
					[here](@/docs/configure/file-processing.md#memory-usage). For boards
 | 
				
			||||||
`nlp.mode=basic`, rather than `nlp.mode=full`. You should also set the
 | 
					like the RPi, it might be necessary to use `nlp.mode=basic`, rather
 | 
				
			||||||
joex pool size to 1.
 | 
					than `nlp.mode=full`. You should also set the joex pool size to 1.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
An example: on this [UP
 | 
					An example: on this [UP
 | 
				
			||||||
board](https://up-board.org/up/specifications/) with an Intel Atom
 | 
					board](https://up-board.org/up/specifications/) with an Intel Atom
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -80,7 +80,7 @@ line are required. As you see for `wkhtmltopdf` the page size is fixed
 | 
				
			|||||||
to DIN A4. Other commands are configured like this as well.
 | 
					to DIN A4. Other commands are configured like this as well.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For the default values, please see the [configuration
 | 
					For the default values, please see the [configuration
 | 
				
			||||||
page](@/docs/configure/_index.md#joex).
 | 
					page](@/docs/configure/main.md#joex).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Duplicate Check
 | 
					## Duplicate Check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -23,7 +23,7 @@ For larger installations, it is probably better to run several joex
 | 
				
			|||||||
components on different machines. That works out of the box, as long
 | 
					components on different machines. That works out of the box, as long
 | 
				
			||||||
as all components point to the same database and use different
 | 
					as all components point to the same database and use different
 | 
				
			||||||
`app-id`s (see [configuring
 | 
					`app-id`s (see [configuring
 | 
				
			||||||
docspell](@/docs/configure/_index.md#app-id)).
 | 
					docspell](@/docs/configure/main.md#app-id)).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
When files are submitted to docspell, they are stored in the database
 | 
					When files are submitted to docspell, they are stored in the database
 | 
				
			||||||
and all known joex components are notified about new work. Then they
 | 
					and all known joex components are notified about new work. Then they
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -323,8 +323,8 @@ full detail.
 | 
				
			|||||||
These are a set of commands that simply call a route at the server to
 | 
					These are a set of commands that simply call a route at the server to
 | 
				
			||||||
submit a maintenance task or to reset the password of some user. These
 | 
					submit a maintenance task or to reset the password of some user. These
 | 
				
			||||||
commands require the [admin
 | 
					commands require the [admin
 | 
				
			||||||
secret](@/docs/configure/_index.md#admin-endpoint) either in the
 | 
					secret](@/docs/configure/admin-endpoint.md) either in the config file
 | 
				
			||||||
config file or as an argument.
 | 
					or as an argument.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Reset user password
 | 
					### Reset user password
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,15 +19,15 @@ _UI Settings_. Among other things, there is a _Item Cards_ section:
 | 
				
			|||||||
This defines how many of the item notes to display in the card. You
 | 
					This defines how many of the item notes to display in the card. You
 | 
				
			||||||
can set it to `0` to not show any notes at all. This is only a "soft
 | 
					can set it to `0` to not show any notes at all. This is only a "soft
 | 
				
			||||||
limit", there is also a "hard limit" in [docspell's
 | 
					limit", there is also a "hard limit" in [docspell's
 | 
				
			||||||
configuration](@/docs/configure/_index.md#rest-server) (see `max-note-length`),
 | 
					configuration](@/docs/configure/main.md#rest-server) (see
 | 
				
			||||||
that is an upper limit to this value.
 | 
					`max-note-length`), that is an upper limit to this value.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Size of item preview
 | 
					### Size of item preview
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The item preview is an image of the first page of the first
 | 
					The item preview is an image of the first page of the first
 | 
				
			||||||
attachment. You can change the order of attachments in the item detail
 | 
					attachment. You can change the order of attachments in the item detail
 | 
				
			||||||
view. This image has a predefined size, which is specified [docspell's
 | 
					view. This image has a predefined size, which is specified [docspell's
 | 
				
			||||||
configuration](@/docs/configure/_index.md#joex) (see
 | 
					configuration](@/docs/configure/main.md#joex) (see
 | 
				
			||||||
`extraction.preview.dpi`). The size for displaying it, can be
 | 
					`extraction.preview.dpi`). The size for displaying it, can be
 | 
				
			||||||
specified via this setting. A _small_ preview uses about 80px width, a
 | 
					specified via this setting. A _small_ preview uses about 80px width, a
 | 
				
			||||||
_medium_ one 160px and _large_ means to use the available space in the
 | 
					_medium_ one 160px and _large_ means to use the available space in the
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -191,9 +191,9 @@ file to look for duplicates, too.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Docspell will go through all folders and download mails in “batches”.
 | 
					Docspell will go through all folders and download mails in “batches”.
 | 
				
			||||||
This size can be set by the admin in the [configuration
 | 
					This size can be set by the admin in the [configuration
 | 
				
			||||||
file](@/docs/configure/_index.md#joex) and applies to all these tasks
 | 
					file](@/docs/configure/main.md#joex) and applies to all these
 | 
				
			||||||
(same for all users). This batch only contains the mail headers and
 | 
					tasks (same for all users). This batch only contains the mail headers
 | 
				
			||||||
not the complete mail.
 | 
					and not the complete mail.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Then each mail is downloaded completely one by one and converted into
 | 
					Then each mail is downloaded completely one by one and converted into
 | 
				
			||||||
an [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) file
 | 
					an [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) file
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,7 +11,8 @@ Docspell has built-in support for two-factor (2FA) authentication
 | 
				
			|||||||
using
 | 
					using
 | 
				
			||||||
[TOTP](https://en.wikipedia.org/wiki/Time-based_One-Time_Password)s.
 | 
					[TOTP](https://en.wikipedia.org/wiki/Time-based_One-Time_Password)s.
 | 
				
			||||||
For anything more, consider a dedicated account management tool and
 | 
					For anything more, consider a dedicated account management tool and
 | 
				
			||||||
[OpenID Connect](@/docs/configure/_index.md#openid-connect-oauth2).
 | 
					[OpenID
 | 
				
			||||||
 | 
					Connect](@/docs/configure/authentication.md#openid-connect-oauth2).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Setup
 | 
					## Setup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -65,7 +66,7 @@ client](@/docs/tools/cli.md) to execute an admin command that removes
 | 
				
			|||||||
2FA for a given user.
 | 
					2FA for a given user.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
For this to work, you need to [enable the admin
 | 
					For this to work, you need to [enable the admin
 | 
				
			||||||
endpoint](@/docs/configure/_index.md#admin-endpoint). Then execute the
 | 
					endpoint](@/docs/configure/admin-endpoint.md). Then execute the
 | 
				
			||||||
`disable-2fa` admin command and specify the complete account.
 | 
					`disable-2fa` admin command and specify the complete account.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user