Merge pull request #160 from eikek/fts

Fts
This commit is contained in:
mergify[bot] 2020-06-25 21:19:42 +00:00 committed by GitHub
commit 50b3554c9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
85 changed files with 3237 additions and 287 deletions

View File

@ -259,6 +259,28 @@ val analysis = project.in(file("modules/analysis")).
Dependencies.fs2 ++
Dependencies.stanfordNlpCore
).dependsOn(common, files % "test->test")
val ftsclient = project.in(file("modules/fts-client")).
disablePlugins(RevolverPlugin).
settings(sharedSettings).
settings(testSettings).
settings(
name := "docspell-fts-client",
libraryDependencies ++= Seq.empty
).dependsOn(common)
val ftssolr = project.in(file("modules/fts-solr")).
disablePlugins(RevolverPlugin).
settings(sharedSettings).
settings(testSettings).
settings(
name := "docspell-fts-solr",
libraryDependencies ++=
Dependencies.http4sClient ++
Dependencies.http4sCirce ++
Dependencies.http4sDsl ++
Dependencies.circe
).dependsOn(common, ftsclient)
val restapi = project.in(file("modules/restapi")).
disablePlugins(RevolverPlugin).
@ -303,7 +325,7 @@ val backend = project.in(file("modules/backend")).
Dependencies.bcrypt ++
Dependencies.http4sClient ++
Dependencies.emil
).dependsOn(store, joexapi)
).dependsOn(store, joexapi, ftsclient)
val webapp = project.in(file("modules/webapp")).
disablePlugins(RevolverPlugin).
@ -336,7 +358,9 @@ val joex = project.in(file("modules/joex")).
name := "docspell-joex",
libraryDependencies ++=
Dependencies.fs2 ++
Dependencies.http4s ++
Dependencies.http4sServer ++
Dependencies.http4sCirce ++
Dependencies.http4sDsl ++
Dependencies.circe ++
Dependencies.pureconfig ++
Dependencies.emilTnef ++
@ -350,7 +374,7 @@ val joex = project.in(file("modules/joex")).
addCompilerPlugin(Dependencies.betterMonadicFor),
buildInfoPackage := "docspell.joex",
reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"local"/"dev.conf"}")
).dependsOn(store, backend, extract, convert, analysis, joexapi, restapi)
).dependsOn(store, backend, extract, convert, analysis, joexapi, restapi, ftssolr)
val restserver = project.in(file("modules/restserver")).
enablePlugins(BuildInfoPlugin
@ -364,7 +388,9 @@ val restserver = project.in(file("modules/restserver")).
settings(
name := "docspell-restserver",
libraryDependencies ++=
Dependencies.http4s ++
Dependencies.http4sServer ++
Dependencies.http4sCirce ++
Dependencies.http4sDsl ++
Dependencies.circe ++
Dependencies.pureconfig ++
Dependencies.yamusca ++
@ -386,7 +412,7 @@ val restserver = project.in(file("modules/restserver")).
}.taskValue,
Compile/unmanagedResourceDirectories ++= Seq((Compile/resourceDirectory).value.getParentFile/"templates"),
reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"local"/"dev.conf"}")
).dependsOn(restapi, joexapi, backend, webapp)
).dependsOn(restapi, joexapi, backend, webapp, ftssolr)
@ -472,6 +498,8 @@ val root = project.in(file(".")).
, extract
, convert
, analysis
, ftsclient
, ftssolr
, files
, store
, joexapi

View File

@ -1,12 +1,17 @@
package docspell.backend
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
import docspell.backend.auth.Login
import docspell.backend.ops._
import docspell.backend.signup.OSignup
import docspell.joexapi.client.JoexClient
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.usertask.UserTaskStore
import docspell.ftsclient.FtsClient
import scala.concurrent.ExecutionContext
import emil.javamail.{JavaMailEmil, Settings}
@ -25,6 +30,7 @@ trait BackendApp[F[_]] {
def job: OJob[F]
def item: OItem[F]
def itemSearch: OItemSearch[F]
def fulltext: OFulltext[F]
def mail: OMail[F]
def joex: OJoex[F]
def userTask: OUserTask[F]
@ -35,7 +41,8 @@ object BackendApp {
def create[F[_]: ConcurrentEffect: ContextShift](
cfg: Config,
store: Store[F],
httpClientEc: ExecutionContext,
httpClient: Client[F],
ftsClient: FtsClient[F],
blocker: Blocker
): Resource[F, BackendApp[F]] =
for {
@ -48,12 +55,13 @@ object BackendApp {
tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store)
joexImpl <- OJoex.create(httpClientEc, store)
joexImpl <- OJoex(JoexClient(httpClient), store)
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl)
itemImpl <- OItem(store)
itemImpl <- OItem(store, ftsClient)
itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
mailImpl <- OMail(store, javaEmil)
@ -71,6 +79,7 @@ object BackendApp {
val job = jobImpl
val item = itemImpl
val itemSearch = itemSearchImpl
val fulltext = fulltextImpl
val mail = mailImpl
val joex = joexImpl
val userTask = userTaskImpl
@ -81,9 +90,11 @@ object BackendApp {
connectEC: ExecutionContext,
httpClientEc: ExecutionContext,
blocker: Blocker
): Resource[F, BackendApp[F]] =
)(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] =
for {
store <- Store.create(cfg.jdbc, connectEC, blocker)
backend <- create(cfg, store, httpClientEc, blocker)
store <- Store.create(cfg.jdbc, connectEC, blocker)
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
ftsClient <- ftsFactory(httpClient)
backend <- create(cfg, store, httpClient, ftsClient, blocker)
} yield backend
}

View File

@ -0,0 +1,90 @@
package docspell.backend
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.store.records.RJob
object JobFactory {
def processItem[F[_]: Sync](
args: ProcessItemArgs,
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
job = RJob.newJob(
id,
ProcessItemArgs.taskName,
account.collective,
args,
args.makeSubject,
now,
account.user,
prio,
tracker
)
} yield job
def processItems[F[_]: Sync](
args: Vector[ProcessItemArgs],
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[Vector[RJob]] = {
def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob =
RJob.newJob(
id,
ProcessItemArgs.taskName,
account.collective,
arg,
arg.makeSubject,
now,
account.user,
prio,
tracker
)
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
jobs = args.map(a => create(id, now, a))
} yield jobs
}
def reIndexAll[F[_]: Sync]: F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
ReIndexTaskArgs.taskName,
DocspellSystem.taskGroup,
ReIndexTaskArgs(None),
s"Recreate full-text index",
now,
DocspellSystem.taskGroup,
Priority.Low,
Some(DocspellSystem.migrationTaskTracker)
)
def reIndex[F[_]: Sync](account: AccountId): F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
args = ReIndexTaskArgs(Some(account.collective))
} yield RJob.newJob(
id,
ReIndexTaskArgs.taskName,
account.collective,
args,
s"Recreate full-text index",
now,
account.user,
Priority.Low,
Some(ReIndexTaskArgs.tracker(args))
)
}

View File

@ -0,0 +1,243 @@
package docspell.backend.ops
import cats.effect._
import cats.implicits._
import fs2.Stream
import docspell.common._
import docspell.ftsclient._
import docspell.backend.JobFactory
import docspell.store.Store
import docspell.store.records.RJob
import docspell.store.queue.JobQueue
import docspell.store.queries.QItem
import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
trait OFulltext[F[_]] {
def findItems(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItem]]
/** Same as `findItems` but does more queries per item to find all tags. */
def findItemsWithTags(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItemWithTags]]
def findIndexOnly(
fts: OFulltext.FtsInput,
collective: Ident,
batch: Batch
): F[Vector[OFulltext.FtsItemWithTags]]
/** Clears the full-text index completely and launches a task that
* indexes all data.
*/
def reindexAll: F[Unit]
/** Clears the full-text index for the given collective and starts a
* task indexing all their data.
*/
def reindexCollective(account: AccountId): F[Unit]
}
object OFulltext {
case class FtsInput(
query: String,
highlightPre: String = "***",
highlightPost: String = "***"
)
case class FtsDataItem(
score: Double,
matchData: FtsResult.MatchData,
context: List[String]
)
case class FtsData(
maxScore: Double,
count: Int,
qtime: Duration,
items: List[FtsDataItem]
)
case class FtsItem(item: ListItem, ftsData: FtsData)
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
def apply[F[_]: Effect](
itemSearch: OItemSearch[F],
fts: FtsClient[F],
store: Store[F],
queue: JobQueue[F],
joex: OJoex[F]
): Resource[F, OFulltext[F]] =
Resource.pure[F, OFulltext[F]](new OFulltext[F] {
def reindexAll: F[Unit] =
for {
job <- JobFactory.reIndexAll[F]
_ <- queue.insertIfNew(job) *> joex.notifyAllNodes
} yield ()
def reindexCollective(account: AccountId): F[Unit] =
for {
exist <- store.transact(
RJob.findNonFinalByTracker(DocspellSystem.migrationTaskTracker)
)
job <- JobFactory.reIndex(account)
_ <-
if (exist.isDefined) ().pure[F]
else queue.insertIfNew(job) *> joex.notifyAllNodes
} yield ()
def findIndexOnly(
ftsQ: OFulltext.FtsInput,
collective: Ident,
batch: Batch
): F[Vector[OFulltext.FtsItemWithTags]] = {
val fq = FtsQuery(
ftsQ.query,
collective,
Set.empty,
batch.limit,
batch.offset,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
)
for {
ftsR <- fts.search(fq)
ftsItems = ftsR.results.groupBy(_.itemId)
select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet
itemsWithTags <-
store
.transact(
QItem.findItemsWithTags(
collective,
QItem.findSelectedItems(QItem.Query.empty(collective), select)
)
)
.take(batch.limit.toLong)
.compile
.toVector
res =
itemsWithTags
.collect(convertFtsData(ftsR, ftsItems))
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
} yield res
}
def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem])
.drop(batch.offset.toLong)
.take(batch.limit.toLong)
.map({ case (li, fd) => FtsItem(li, fd) })
.compile
.toVector
def findItemsWithTags(
q: Query,
ftsQ: FtsInput,
batch: Batch
): F[Vector[FtsItemWithTags]] =
findItemsFts(
q,
ftsQ,
batch.first,
itemSearch.findItemsWithTags,
convertFtsData[ListItemWithTags]
)
.drop(batch.offset.toLong)
.take(batch.limit.toLong)
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
.compile
.toVector
// Helper
private def findItemsFts[A: ItemId, B](
q: Query,
ftsQ: FtsInput,
batch: Batch,
search: (Query, Batch) => F[Vector[A]],
convert: (
FtsResult,
Map[Ident, List[FtsResult.ItemMatch]]
) => PartialFunction[A, (A, FtsData)]
): Stream[F, (A, FtsData)] =
findItemsFts0(q, ftsQ, batch, search, convert)
.takeThrough(_._1 >= batch.limit)
.flatMap(x => Stream.emits(x._2))
private def findItemsFts0[A: ItemId, B](
q: Query,
ftsQ: FtsInput,
batch: Batch,
search: (Query, Batch) => F[Vector[A]],
convert: (
FtsResult,
Map[Ident, List[FtsResult.ItemMatch]]
) => PartialFunction[A, (A, FtsData)]
): Stream[F, (Int, Vector[(A, FtsData)])] = {
val sqlResult = search(q, batch)
val fq = FtsQuery(
ftsQ.query,
q.collective,
Set.empty,
0,
0,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
)
val qres =
for {
items <- sqlResult
ids = items.map(a => ItemId[A].itemId(a))
// must find all index results involving the items.
// Currently there is one result per item + one result per
// attachment
limit = items.map(a => ItemId[A].fileCount(a)).sum + items.size
ftsQ = fq.copy(items = ids.toSet, limit = limit)
ftsR <- fts.search(ftsQ)
ftsItems = ftsR.results.groupBy(_.itemId)
res = items.collect(convert(ftsR, ftsItems))
} yield (items.size, res)
Stream.eval(qres) ++ findItemsFts0(q, ftsQ, batch.next, search, convert)
}
private def convertFtsData[A: ItemId](
ftr: FtsResult,
ftrItems: Map[Ident, List[FtsResult.ItemMatch]]
): PartialFunction[A, (A, FtsData)] = {
case a if ftrItems.contains(ItemId[A].itemId(a)) =>
val ftsDataItems = ftrItems
.get(ItemId[A].itemId(a))
.getOrElse(Nil)
.map(im =>
FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil))
)
(a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems))
}
})
trait ItemId[A] {
def itemId(a: A): Ident
def fileCount(a: A): Int
}
object ItemId {
def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev
def from[A](f: A => Ident, g: A => Int): ItemId[A] =
new ItemId[A] {
def itemId(a: A) = f(a)
def fileCount(a: A) = g(a)
}
implicit val listItemId: ItemId[ListItem] =
ItemId.from(_.id, _.fileCount)
implicit val listItemWithTagsId: ItemId[ListItemWithTags] =
ItemId.from(_.item.id, _.item.fileCount)
}
}

View File

@ -5,10 +5,12 @@ import cats.implicits._
import cats.effect.{Effect, Resource}
import doobie._
import doobie.implicits._
import org.log4s.getLogger
import docspell.store.{AddResult, Store}
import docspell.store.queries.{QAttachment, QItem}
import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp}
import docspell.common._
import docspell.store.records._
import docspell.ftsclient.FtsClient
trait OItem[F[_]] {
@ -38,7 +40,7 @@ trait OItem[F[_]] {
def setNotes(item: Ident, notes: Option[String], collective: Ident): F[AddResult]
def setName(item: Ident, notes: String, collective: Ident): F[AddResult]
def setName(item: Ident, name: String, collective: Ident): F[AddResult]
def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult]
@ -67,11 +69,12 @@ trait OItem[F[_]] {
object OItem {
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItem[F]] =
def apply[F[_]: Effect](store: Store[F], fts: FtsClient[F]): Resource[F, OItem[F]] =
for {
otag <- OTag(store)
oorg <- OOrganization(store)
oequip <- OEquipment(store)
logger <- Resource.pure[F, Logger[F]](Logger.log4s(getLogger))
oitem <- Resource.pure[F, OItem[F]](new OItem[F] {
def moveAttachmentBefore(
itemId: Ident,
@ -259,12 +262,18 @@ object OItem {
.transact(RItem.updateNotes(item, collective, notes))
.attempt
.map(AddResult.fromUpdate)
.flatTap(
onSuccessIgnoreError(fts.updateItemNotes(logger, item, collective, notes))
)
def setName(item: Ident, name: String, collective: Ident): F[AddResult] =
store
.transact(RItem.updateName(item, collective, name))
.attempt
.map(AddResult.fromUpdate)
.flatTap(
onSuccessIgnoreError(fts.updateItemName(logger, item, collective, name))
)
def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult] =
store
@ -293,13 +302,17 @@ object OItem {
.map(AddResult.fromUpdate)
def deleteItem(itemId: Ident, collective: Ident): F[Int] =
QItem.delete(store)(itemId, collective)
QItem
.delete(store)(itemId, collective)
.flatTap(_ => fts.removeItem(logger, itemId))
def getProposals(item: Ident, collective: Ident): F[MetaProposalList] =
store.transact(QAttachment.getMetaProposals(item, collective))
def deleteAttachment(id: Ident, collective: Ident): F[Int] =
QAttachment.deleteSingleAttachment(store)(id, collective)
QAttachment
.deleteSingleAttachment(store)(id, collective)
.flatTap(_ => fts.removeAttachment(logger, id))
def setAttachmentName(
attachId: Ident,
@ -310,6 +323,29 @@ object OItem {
.transact(RAttachment.updateName(attachId, collective, name))
.attempt
.map(AddResult.fromUpdate)
.flatTap(
onSuccessIgnoreError(
OptionT(store.transact(RAttachment.findItemId(attachId)))
.semiflatMap(itemId =>
fts.updateAttachmentName(logger, itemId, attachId, collective, name)
)
.fold(())(identity)
)
)
private def onSuccessIgnoreError(update: F[Unit])(ar: AddResult): F[Unit] =
ar match {
case AddResult.Success =>
update.attempt.flatMap {
case Right(()) => ().pure[F]
case Left(ex) =>
logger.warn(s"Error updating full-text index: ${ex.getMessage}")
}
case AddResult.Failure(_) =>
().pure[F]
case AddResult.EntityExists(_) =>
().pure[F]
}
})
} yield oitem
}

View File

@ -110,11 +110,15 @@ object OItemSearch {
.compile
.toVector
def findItemsWithTags(q: Query, batch: Batch): F[Vector[ListItemWithTags]] =
def findItemsWithTags(q: Query, batch: Batch): F[Vector[ListItemWithTags]] = {
val search = QItem.findItems(q, batch)
store
.transact(QItem.findItemsWithTags(q, batch).take(batch.limit.toLong))
.transact(
QItem.findItemsWithTags(q.collective, search).take(batch.limit.toLong)
)
.compile
.toVector
}
def findAttachment(id: Ident, collective: Ident): F[Option[AttachmentData[F]]] =
store

View File

@ -5,7 +5,7 @@ import cats.Functor
import cats.data.{EitherT, OptionT}
import cats.effect._
import cats.implicits._
import docspell.backend.Config
import docspell.backend.{Config, JobFactory}
import fs2.Stream
import docspell.common._
import docspell.common.syntax.all._
@ -204,26 +204,7 @@ object OUpload {
account: AccountId,
prio: Priority,
tracker: Option[Ident]
): F[Vector[RJob]] = {
def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob =
RJob.newJob(
id,
ProcessItemArgs.taskName,
account.collective,
arg,
arg.makeSubject,
now,
account.user,
prio,
tracker
)
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
jobs = args.map(a => create(id, now, a))
} yield jobs
}
): F[Vector[RJob]] =
JobFactory.processItems[F](args, account, prio, tracker)
})
}

View File

@ -0,0 +1,8 @@
package docspell.common
object DocspellSystem {
val taskGroup = Ident.unsafe("docspell-system")
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
}

View File

@ -25,6 +25,9 @@ case class Duration(nanos: Long) {
def formatExact: String =
s"$millis ms"
override def toString(): String =
s"Duration(${millis}ms)"
}
object Duration {

View File

@ -15,6 +15,9 @@ case class Ident(id: String) {
def nonEmpty: Boolean =
!isEmpty
def /(next: Ident): Ident =
new Ident(id + "." + next.id)
}
object Ident {

View File

@ -0,0 +1,24 @@
package docspell.common
import io.circe._
import io.circe.generic.semiauto._
final case class ReIndexTaskArgs(collective: Option[Ident])
object ReIndexTaskArgs {
val taskName = Ident.unsafe("full-text-reindex")
def tracker(args: ReIndexTaskArgs): Ident =
args.collective match {
case Some(cid) =>
cid / DocspellSystem.migrationTaskTracker
case None =>
DocspellSystem.migrationTaskTracker
}
implicit val jsonEncoder: Encoder[ReIndexTaskArgs] =
deriveEncoder[ReIndexTaskArgs]
implicit val jsonDecoder: Decoder[ReIndexTaskArgs] =
deriveDecoder[ReIndexTaskArgs]
}

View File

@ -0,0 +1,132 @@
package docspell.ftsclient
import fs2.Stream
import cats.implicits._
import cats.effect._
import org.log4s.getLogger
import docspell.common._
/** The fts client is the interface for docspell to a fulltext search
* engine.
*
* It defines all operations required for integration into docspell.
* It uses data structures from docspell. Implementation modules need
* to translate it to the engine that provides the features.
*/
trait FtsClient[F[_]] {
/** Initialization tasks. This is called exactly once and then never
* again (except when re-indexing everything). It may be used to
* setup the database.
*/
def initialize: F[Unit]
/** Run a full-text search. */
def search(q: FtsQuery): F[FtsResult]
/** Continually run a full-text search and concatenate the results. */
def searchAll(q: FtsQuery): Stream[F, FtsResult] =
Stream.eval(search(q)).flatMap { result =>
if (result.results.size < q.limit) Stream.emit(result)
else Stream.emit(result) ++ searchAll(q.nextPage)
}
/** Push all data to the index. Data with same `id' is replaced.
* Values that are `None' are removed from the index (or set to an
* empty string).
*/
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit]
def indexData(logger: Logger[F], data: TextData*): F[Unit] =
indexData(logger, Stream.emits(data))
/** Push all data to the index, but only update existing entries. No
* new entries are created and values that are given as `None' are
* skipped.
*/
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit]
def updateIndex(logger: Logger[F], data: TextData*): F[Unit] =
updateIndex(logger, Stream.emits(data))
def updateItemName(
logger: Logger[F],
itemId: Ident,
collective: Ident,
name: String
): F[Unit] =
updateIndex(logger, TextData.item(itemId, collective, Some(name), None))
def updateItemNotes(
logger: Logger[F],
itemId: Ident,
collective: Ident,
notes: Option[String]
): F[Unit] =
updateIndex(
logger,
TextData.item(itemId, collective, None, Some(notes.getOrElse("")))
)
def updateAttachmentName(
logger: Logger[F],
itemId: Ident,
attachId: Ident,
collective: Ident,
name: Option[String]
): F[Unit] =
updateIndex(
logger,
TextData.attachment(
itemId,
attachId,
collective,
Language.English,
Some(name.getOrElse("")),
None
)
)
def removeItem(logger: Logger[F], itemId: Ident): F[Unit]
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit]
/** Clears the index removes everything. */
def clearAll(logger: Logger[F]): F[Unit]
/** Clears the index from all data belonging to the given collective. */
def clear(logger: Logger[F], collective: Ident): F[Unit]
}
object FtsClient {
def none[F[_]: Sync] =
new FtsClient[F] {
private[this] val logger = Logger.log4s[F](getLogger)
def initialize: F[Unit] =
logger.info("Full-text search is disabled!")
def search(q: FtsQuery): F[FtsResult] =
logger.warn("Full-text search is disabled!") *> FtsResult.empty.pure[F]
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!")
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!")
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
logger.warn("Full-text search is disabled!")
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
logger.warn("Full-text search is disabled!")
def clearAll(logger: Logger[F]): F[Unit] =
logger.warn("Full-text search is disabled!")
def clear(logger: Logger[F], collective: Ident): F[Unit] =
logger.warn("Full-text search is disabled!")
}
}

View File

@ -0,0 +1,34 @@
package docspell.ftsclient
import docspell.common._
/** A fulltext query.
*
* The query itself is a raw string. Each implementation may
* interpret it according to the system in use.
*
* Searches must only look for given collective and in the given list
* of item ids, if it is non-empty. If the item set is empty, then
* don't restrict the result in this way.
*/
final case class FtsQuery(
q: String,
collective: Ident,
items: Set[Ident],
limit: Int,
offset: Int,
highlight: FtsQuery.HighlightSetting
) {
def nextPage: FtsQuery =
copy(offset = limit + offset)
}
object FtsQuery {
case class HighlightSetting(pre: String, post: String)
object HighlightSetting {
val default = HighlightSetting("**", "**")
}
}

View File

@ -0,0 +1,31 @@
package docspell.ftsclient
import docspell.common._
import FtsResult.ItemMatch
final case class FtsResult(
qtime: Duration,
count: Int,
maxScore: Double,
highlight: Map[Ident, List[String]],
results: List[ItemMatch]
) {}
object FtsResult {
val empty =
FtsResult(Duration.millis(0), 0, 0.0, Map.empty, Nil)
sealed trait MatchData
case class AttachmentData(attachId: Ident, attachName: String) extends MatchData
case object ItemData extends MatchData
case class ItemMatch(
id: Ident,
itemId: Ident,
collectiveId: Ident,
score: Double,
data: MatchData
)
}

View File

@ -0,0 +1,63 @@
package docspell.ftsclient
import docspell.common._
sealed trait TextData {
def id: Ident
def item: Ident
def collective: Ident
final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
this match {
case a: TextData.Attachment => f(a)
case a: TextData.Item => g(a)
}
}
object TextData {
final case class Attachment(
item: Ident,
attachId: Ident,
collective: Ident,
lang: Language,
name: Option[String],
text: Option[String]
) extends TextData {
val id = item / attachId
}
def attachment(
item: Ident,
attachId: Ident,
collective: Ident,
lang: Language,
name: Option[String],
text: Option[String]
): TextData =
Attachment(item, attachId, collective, lang, name, text)
final case class Item(
item: Ident,
collective: Ident,
name: Option[String],
notes: Option[String]
) extends TextData {
val id = Ident.unsafe("item") / item
}
def item(
item: Ident,
collective: Ident,
name: Option[String],
notes: Option[String]
): TextData =
Item(item, collective, name, notes)
}

View File

@ -0,0 +1,38 @@
package docspell.ftssolr
import io.circe._
import docspell.common._
final class Field(val name: String) extends AnyVal {
override def toString(): String = s"Field($name)"
}
object Field {
def apply(name: String): Field =
new Field(name)
val id = Field("id")
val itemId = Field("itemId")
val collectiveId = Field("collectiveId")
val attachmentId = Field("attachmentId")
val discriminator = Field("discriminator")
val attachmentName = Field("attachmentName")
val content = Field("content")
val content_de = Field("content_de")
val content_en = Field("content_en")
val itemName = Field("itemName")
val itemNotes = Field("itemNotes")
def contentField(lang: Language): Field =
lang match {
case Language.German =>
Field.content_de
case Language.English =>
Field.content_en
}
implicit val jsonEncoder: Encoder[Field] =
Encoder.encodeString.contramap(_.name)
}

View File

@ -0,0 +1,136 @@
package docspell.ftssolr
import io.circe._
import io.circe.syntax._
import docspell.common._
import docspell.ftsclient._
trait JsonCodec {
implicit def attachmentEncoder(implicit
enc: Encoder[Ident]
): Encoder[TextData.Attachment] =
new Encoder[TextData.Attachment] {
final def apply(td: TextData.Attachment): Json = {
val cnt =
(Field.contentField(td.lang).name, Json.fromString(td.text.getOrElse("")))
Json.fromFields(
cnt :: List(
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.attachmentId.name, enc(td.attachId)),
(Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))),
(Field.discriminator.name, Json.fromString("attachment"))
)
)
}
}
implicit def itemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] =
new Encoder[TextData.Item] {
final def apply(td: TextData.Item): Json =
Json.obj(
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.itemName.name, Json.fromString(td.name.getOrElse(""))),
(Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))),
(Field.discriminator.name, Json.fromString("item"))
)
}
implicit def textDataEncoder(implicit
ae: Encoder[TextData.Attachment],
ie: Encoder[TextData.Item]
): Encoder[TextData] =
Encoder(_.fold(ae.apply, ie.apply))
implicit def ftsResultDecoder: Decoder[FtsResult] =
new Decoder[FtsResult] {
final def apply(c: HCursor): Decoder.Result[FtsResult] =
for {
qtime <- c.downField("responseHeader").get[Duration]("QTime")
count <- c.downField("response").get[Int]("numFound")
maxScore <- c.downField("response").get[Double]("maxScore")
results <- c.downField("response").get[List[FtsResult.ItemMatch]]("docs")
highlightng <- c.get[Map[Ident, Map[String, List[String]]]]("highlighting")
highlight = highlightng.map(kv => kv._1 -> kv._2.values.flatten.toList)
} yield FtsResult(qtime, count, maxScore, highlight, results)
}
implicit def decodeItemMatch: Decoder[FtsResult.ItemMatch] =
new Decoder[FtsResult.ItemMatch] {
final def apply(c: HCursor): Decoder.Result[FtsResult.ItemMatch] =
for {
itemId <- c.get[Ident](Field.itemId.name)
id <- c.get[Ident](Field.id.name)
coll <- c.get[Ident](Field.collectiveId.name)
score <- c.get[Double]("score")
md <- decodeMatchData(c)
} yield FtsResult.ItemMatch(id, itemId, coll, score, md)
}
def decodeMatchData: Decoder[FtsResult.MatchData] =
new Decoder[FtsResult.MatchData] {
final def apply(c: HCursor): Decoder.Result[FtsResult.MatchData] =
for {
disc <- c.get[String]("discriminator")
md <-
if ("attachment" == disc)
for {
aId <- c.get[Ident](Field.attachmentId.name)
aName <- c.get[String](Field.attachmentName.name)
} yield FtsResult.AttachmentData(aId, aName)
else Right(FtsResult.ItemData)
} yield md
}
implicit def identKeyEncoder: KeyEncoder[Ident] =
new KeyEncoder[Ident] {
override def apply(ident: Ident): String = ident.id
}
implicit def identKeyDecoder: KeyDecoder[Ident] =
new KeyDecoder[Ident] {
override def apply(ident: String): Option[Ident] = Ident(ident).toOption
}
def setAttachmentEncoder(implicit
enc: Encoder[Ident]
): Encoder[TextData.Attachment] =
new Encoder[TextData.Attachment] {
final def apply(td: TextData.Attachment): Json = {
val setter = List(
td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)),
td.text.map(txt =>
(Field.contentField(td.lang).name, Map("set" -> txt.asJson).asJson)
)
).flatten
Json.fromFields(
(Field.id.name, enc(td.id)) :: setter
)
}
}
def setItemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] =
new Encoder[TextData.Item] {
final def apply(td: TextData.Item): Json = {
val setter = List(
td.name.map(n => (Field.itemName.name, Map("set" -> n.asJson).asJson)),
td.notes.map(n => (Field.itemNotes.name, Map("set" -> n.asJson).asJson))
).flatten
Json.fromFields(
(Field.id.name, enc(td.id)) :: setter
)
}
}
implicit def textDataEncoder: Encoder[SetFields] =
Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply))
}
object JsonCodec extends JsonCodec

View File

@ -0,0 +1,68 @@
package docspell.ftssolr
import io.circe._
import io.circe.generic.semiauto._
import docspell.ftsclient.FtsQuery
final case class QueryData(
query: String,
filter: String,
limit: Int,
offset: Int,
fields: List[Field],
params: Map[String, String]
) {
def nextPage: QueryData =
copy(offset = offset + limit)
def withHighLight(fields: List[Field], pre: String, post: String): QueryData =
copy(params =
params ++ Map(
"hl" -> "on",
"hl.requireFieldMatch" -> "true",
"hl.fl" -> fields.map(_.name).mkString(","),
"hl.simple.pre" -> pre,
"hl.simple.post" -> post
)
)
}
object QueryData {
implicit val jsonEncoder: Encoder[QueryData] =
deriveEncoder[QueryData]
def apply(
cfg: SolrConfig,
search: List[Field],
fields: List[Field],
fq: FtsQuery
): QueryData = {
val q = sanitize(fq.q)
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
val items = fq.items.map(_.id).mkString(" ")
val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}""""
val filterQ = fq.items match {
case s if s.isEmpty =>
collQ
case _ =>
(collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ")
}
QueryData(
extQ,
filterQ,
fq.limit,
fq.offset,
fields,
Map("defType" -> cfg.defType, "q.op" -> cfg.qOp)
).withHighLight(
search,
fq.highlight.pre,
fq.highlight.post
)
}
private def sanitize(q: String): String =
q.replaceAll("[\\(,\\)]+", " ")
}

View File

@ -0,0 +1,5 @@
package docspell.ftssolr
import docspell.ftsclient._
final case class SetFields(td: TextData)

View File

@ -0,0 +1,13 @@
package docspell.ftssolr
import docspell.common._
final case class SolrConfig(
url: LenientUri,
commitWithin: Int,
logVerbose: Boolean,
defType: String,
qOp: String
)
object SolrConfig {}

View File

@ -0,0 +1,89 @@
package docspell.ftssolr
import fs2.Stream
import cats.effect._
import cats.implicits._
import org.http4s.client.Client
import org.http4s.client.middleware.Logger
import org.log4s.getLogger
import docspell.common._
import docspell.ftsclient._
final class SolrFtsClient[F[_]: Effect](
solrUpdate: SolrUpdate[F],
solrSetup: SolrSetup[F],
solrQuery: SolrQuery[F]
) extends FtsClient[F] {
def initialize: F[Unit] =
solrSetup.setupSchema
def search(q: FtsQuery): F[FtsResult] =
solrQuery.query(q)
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
modifyIndex(logger, data)(solrUpdate.add)
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
modifyIndex(logger, data)(solrUpdate.update)
def modifyIndex(logger: Logger[F], data: Stream[F, TextData])(
f: List[TextData] => F[Unit]
): F[Unit] =
(for {
_ <- Stream.eval(logger.debug("Updating SOLR index"))
chunks <- data.chunks
res <- Stream.eval(f(chunks.toList).attempt)
_ <- res match {
case Right(()) => Stream.emit(())
case Left(ex) =>
Stream.eval(logger.error(ex)("Error updating with chunk of data"))
}
} yield ()).compile.drain
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
logger.debug(s"Remove item '${itemId.id}' from index") *>
solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}", None)
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
logger.debug(s"Remove attachment '${attachId.id}' from index") *>
solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}", None)
def clearAll(logger: Logger[F]): F[Unit] =
logger.info("Deleting complete full-text index!") *>
solrUpdate.delete("*:*", Option(0))
def clear(logger: Logger[F], collective: Ident): F[Unit] =
logger.info(s"Deleting full-text index for collective ${collective.id}") *>
solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}", Option(0))
}
object SolrFtsClient {
private[this] val logger = getLogger
def apply[F[_]: ConcurrentEffect](
cfg: SolrConfig,
httpClient: Client[F]
): Resource[F, FtsClient[F]] = {
val client = loggingMiddleware(cfg, httpClient)
Resource.pure[F, FtsClient[F]](
new SolrFtsClient(
SolrUpdate(cfg, client),
SolrSetup(cfg, client),
SolrQuery(cfg, client)
)
)
}
private def loggingMiddleware[F[_]: Concurrent](
cfg: SolrConfig,
client: Client[F]
): Client[F] =
Logger(
logHeaders = true,
logBody = cfg.logVerbose,
logAction = Some((msg: String) => Sync[F].delay(logger.trace(msg)))
)(client)
}

View File

@ -0,0 +1,60 @@
package docspell.ftssolr
import cats.effect._
import org.http4s._
import org.http4s.client.Client
import org.http4s.circe._
import org.http4s.circe.CirceEntityDecoder._
import org.http4s.client.dsl.Http4sClientDsl
import _root_.io.circe.syntax._
import docspell.ftsclient._
import JsonCodec._
trait SolrQuery[F[_]] {
def query(q: QueryData): F[FtsResult]
def query(q: FtsQuery): F[FtsResult]
}
object SolrQuery {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
new SolrQuery[F] {
val url = Uri.unsafeFromString(cfg.url.asString) / "query"
def query(q: QueryData): F[FtsResult] = {
val req = Method.POST(q.asJson, url)
client.expect[FtsResult](req)
}
def query(q: FtsQuery): F[FtsResult] = {
val fq = QueryData(
cfg,
List(
Field.content,
Field.content_de,
Field.content_en,
Field.itemName,
Field.itemNotes,
Field.attachmentName
),
List(
Field.id,
Field.itemId,
Field.collectiveId,
Field("score"),
Field.attachmentId,
Field.attachmentName,
Field.discriminator
),
q
)
query(fq)
}
}
}
}

View File

@ -0,0 +1,121 @@
package docspell.ftssolr
import cats.effect._
import org.http4s._
import cats.implicits._
import org.http4s.client.Client
import org.http4s.circe._
import org.http4s.client.dsl.Http4sClientDsl
import _root_.io.circe.syntax._
import _root_.io.circe._
import _root_.io.circe.generic.semiauto._
import docspell.common._
trait SolrSetup[F[_]] {
def setupSchema: F[Unit]
}
object SolrSetup {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
new SolrSetup[F] {
val url = (Uri.unsafeFromString(cfg.url.asString) / "schema")
.withQueryParam("commitWithin", cfg.commitWithin.toString)
def setupSchema: F[Unit] = {
val cmds0 =
List(
Field.id,
Field.itemId,
Field.collectiveId,
Field.discriminator,
Field.attachmentId
)
.traverse(addStringField)
val cmds1 = List(
Field.attachmentName,
Field.content,
Field.itemName,
Field.itemNotes
)
.traverse(addTextField(None))
val cntLang = Language.all.traverse {
case l @ Language.German =>
addTextField(l.some)(Field.content_de)
case l @ Language.English =>
addTextField(l.some)(Field.content_en)
}
cmds0 *> cmds1 *> cntLang *> ().pure[F]
}
private def run(cmd: Json): F[Unit] = {
val req = Method.POST(cmd, url)
client.expect[Unit](req)
}
private def addStringField(field: Field): F[Unit] =
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.string(field)))
private def addTextField(lang: Option[Language])(field: Field): F[Unit] =
lang match {
case None =>
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.text(field)))
case Some(Language.German) =>
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.textDE(field)))
case Some(Language.English) =>
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.textEN(field)))
}
}
}
// Schema Commands: The structure is for conveniently creating the
// solr json. All fields must be stored, because of highlighting and
// single-updates only work when all fields are stored.
case class AddField(
name: Field,
`type`: String,
stored: Boolean,
indexed: Boolean,
multiValued: Boolean
)
object AddField {
implicit val encoder: Encoder[AddField] =
deriveEncoder[AddField]
def command(body: AddField): Json =
Map("add-field" -> body.asJson).asJson
def string(field: Field): AddField =
AddField(field, "string", true, true, false)
def text(field: Field): AddField =
AddField(field, "text_general", true, true, false)
def textDE(field: Field): AddField =
AddField(field, "text_de", true, true, false)
def textEN(field: Field): AddField =
AddField(field, "text_en", true, true, false)
}
case class DeleteField(name: Field)
object DeleteField {
implicit val encoder: Encoder[DeleteField] =
deriveEncoder[DeleteField]
def command(body: DeleteField): Json =
Map("delete-field" -> body.asJson).asJson
}
}

View File

@ -0,0 +1,78 @@
package docspell.ftssolr
import cats.effect._
import org.http4s._
import org.http4s.client.Client
import org.http4s.circe._
import org.http4s.client.dsl.Http4sClientDsl
import _root_.io.circe._
import _root_.io.circe.syntax._
import docspell.ftsclient._
import JsonCodec._
trait SolrUpdate[F[_]] {
def add(tds: List[TextData]): F[Unit]
def update(tds: List[TextData]): F[Unit]
def delete(q: String, commitWithin: Option[Int]): F[Unit]
}
object SolrUpdate {
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
new SolrUpdate[F] {
val url = (Uri.unsafeFromString(cfg.url.asString) / "update")
.withQueryParam("commitWithin", cfg.commitWithin.toString)
.withQueryParam("overwrite", "true")
.withQueryParam("wt", "json")
def add(tds: List[TextData]): F[Unit] = {
val req = Method.POST(tds.asJson, url)
client.expect[Unit](req)
}
def update(tds: List[TextData]): F[Unit] = {
val req = Method.POST(tds.filter(minOneChange).map(SetFields).asJson, url)
client.expect[Unit](req)
}
def delete(q: String, commitWithin: Option[Int]): F[Unit] = {
val uri = commitWithin match {
case Some(n) =>
if (n <= 0)
url.removeQueryParam("commitWithin").withQueryParam("commit", "true")
else url.withQueryParam("commitWithin", n.toString)
case None =>
url
}
val req = Method.POST(Delete(q).asJson, uri)
client.expect[Unit](req)
}
private val minOneChange: TextData => Boolean =
_ match {
case td: TextData.Attachment =>
td.name.isDefined || td.text.isDefined
case td: TextData.Item =>
td.name.isDefined || td.notes.isDefined
}
}
}
case class Delete(query: String)
object Delete {
implicit val jsonEncoder: Encoder[Delete] =
new Encoder[Delete] {
def apply(d: Delete): Json =
Json.obj(
("delete", Json.obj("query" -> d.query.asJson))
)
}
}
}

View File

@ -364,4 +364,38 @@ docspell.joex {
# By default all files are allowed.
valid-mime-types = [ ]
}
# Configuration of the full-text search engine.
full-text-search {
# The full-text search feature can be disabled. It requires an
# additional index server which needs additional memory and disk
# space. It can be enabled later any time.
#
# Currently the SOLR search platform is supported.
enabled = false
# Configuration for the SOLR backend.
solr = {
# The URL to solr
url = "http://localhost:8983/solr/docspell"
# Used to tell solr when to commit the data
commit-within = 1000
# If true, logs request and response bodies
log-verbose = false
# The defType parameter to lucene that defines the parser to
# use. You might want to try "edismax" or look here:
# https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing
def-type = "lucene"
# The default combiner for tokens. One of {AND, OR}.
q-op = "OR"
}
# Settings for running the index migration tasks
migration = {
# Chunk size to use when indexing data from the database. This
# many attachments are loaded into memory and pushed to the
# full-text index.
index-all-chunk = 10
}
}
}

View File

@ -8,6 +8,7 @@ import docspell.convert.ConvertConfig
import docspell.extract.ExtractConfig
import docspell.joex.hk.HouseKeepingConfig
import docspell.backend.Config.Files
import docspell.ftssolr.SolrConfig
case class Config(
appId: Ident,
@ -23,7 +24,8 @@ case class Config(
convert: ConvertConfig,
sendMail: MailSendConfig,
files: Files,
mailDebug: Boolean
mailDebug: Boolean,
fullTextSearch: Config.FullTextSearch
)
object Config {
@ -34,4 +36,15 @@ object Config {
math.min(mailChunkSize, maxMails)
}
case class UserTasks(scanMailbox: ScanMailbox)
case class FullTextSearch(
enabled: Boolean,
migration: FullTextSearch.Migration,
solr: SolrConfig
)
object FullTextSearch {
final case class Migration(indexAllChunk: Int)
}
}

View File

@ -3,10 +3,15 @@ package docspell.joex
import cats.implicits._
import cats.effect._
import emil.javamail._
import fs2.concurrent.SignallingRef
import scala.concurrent.ExecutionContext
import org.http4s.client.Client
import org.http4s.client.blaze.BlazeClientBuilder
import docspell.common._
import docspell.backend.ops._
import docspell.joex.hk._
import docspell.joex.notify._
import docspell.joex.fts.{MigrationTask, ReIndexTask}
import docspell.joex.scanmailbox._
import docspell.joex.process.ItemHandler
import docspell.joex.scheduler._
@ -14,13 +19,14 @@ import docspell.joexapi.client.JoexClient
import docspell.store.Store
import docspell.store.queue._
import docspell.store.records.RJobLog
import fs2.concurrent.SignallingRef
import scala.concurrent.ExecutionContext
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
cfg: Config,
nodeOps: ONode[F],
store: Store[F],
queue: JobQueue[F],
pstore: PeriodicTaskStore[F],
termSignal: SignallingRef[F, Boolean],
val scheduler: Scheduler[F],
@ -50,7 +56,10 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
periodicScheduler.shutdown *> scheduler.shutdown(false) *> termSignal.set(true)
private def scheduleBackgroundTasks: F[Unit] =
HouseKeepingTask.periodicTask[F](cfg.houseKeeping.schedule).flatMap(pstore.insert)
HouseKeepingTask
.periodicTask[F](cfg.houseKeeping.schedule)
.flatMap(pstore.insert) *>
MigrationTask.job.flatMap(queue.insertIfNew)
}
object JoexAppImpl {
@ -63,13 +72,15 @@ object JoexAppImpl {
blocker: Blocker
): Resource[F, JoexApp[F]] =
for {
client <- JoexClient.resource(clientEC)
httpClient <- BlazeClientBuilder[F](clientEC).resource
client = JoexClient(httpClient)
store <- Store.create(cfg.jdbc, connectEC, blocker)
queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store)
joex <- OJoex(client, store)
upload <- OUpload(store, queue, cfg.files, joex)
fts <- createFtsClient(cfg)(httpClient)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
@ -77,7 +88,7 @@ object JoexAppImpl {
.withTask(
JobTask.json(
ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg),
ItemHandler.newItem[F](cfg, fts),
ItemHandler.onCancel[F]
)
)
@ -95,6 +106,20 @@ object JoexAppImpl {
ScanMailboxTask.onCancel[F]
)
)
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts),
ReIndexTask.onCancel[F]
)
)
.withTask(
JobTask.json(
HouseKeepingTask.taskName,
@ -111,7 +136,13 @@ object JoexAppImpl {
client,
Timer[F]
)
app = new JoexAppImpl(cfg, nodeOps, store, pstore, termSignal, sch, psch)
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
private def createFtsClient[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
}

View File

@ -0,0 +1,24 @@
package docspell.joex.fts
import docspell.common.Logger
import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.store.Store
import docspell.ftsclient.FtsClient
case class FtsContext[F[_]](
cfg: Config.FullTextSearch,
store: Store[F],
fts: FtsClient[F],
logger: Logger[F]
)
object FtsContext {
def apply[F[_]](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
ctx: Context[F, _]
): FtsContext[F] =
FtsContext(cfg, ctx.store, fts, ctx.logger)
}

View File

@ -0,0 +1,93 @@
package docspell.joex.fts
import cats.effect._
import cats.data.{Kleisli, NonEmptyList}
import cats.{ApplicativeError, FlatMap, Semigroup}
import cats.implicits._
import docspell.common._
import docspell.ftsclient._
import docspell.joex.scheduler.Context
import docspell.joex.Config
import docspell.store.queries.{QAttachment, QItem}
object FtsWork {
def apply[F[_]](f: FtsContext[F] => F[Unit]): FtsWork[F] =
Kleisli(f)
def all[F[_]: FlatMap](
m0: FtsWork[F],
mn: FtsWork[F]*
): FtsWork[F] =
NonEmptyList.of(m0, mn: _*).reduce(semigroup[F])
implicit def semigroup[F[_]: FlatMap]: Semigroup[FtsWork[F]] =
Semigroup.instance((mt1, mt2) => mt1.flatMap(_ => mt2))
// some tasks
def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] =
FtsWork(ctx => f(ctx.logger))
def initialize[F[_]]: FtsWork[F] =
FtsWork(_.fts.initialize)
def clearIndex[F[_]](coll: Option[Ident]): FtsWork[F] =
coll match {
case Some(cid) =>
FtsWork(ctx => ctx.fts.clear(ctx.logger, cid))
case None =>
FtsWork(ctx => ctx.fts.clearAll(ctx.logger))
}
def insertAll[F[_]: Effect](coll: Option[Ident]): FtsWork[F] =
FtsWork
.all(
FtsWork(ctx =>
ctx.fts.indexData(
ctx.logger,
ctx.store
.transact(
QAttachment
.allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk)
)
.map(caa =>
TextData
.attachment(
caa.item,
caa.id,
caa.collective,
caa.lang,
caa.name,
caa.content
)
)
)
),
FtsWork(ctx =>
ctx.fts.indexData(
ctx.logger,
ctx.store
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
.map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes))
)
)
)
object syntax {
implicit final class FtsWorkOps[F[_]](mt: FtsWork[F]) {
def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] =
all(mt, mn)
def recoverWith(
other: FtsWork[F]
)(implicit ev: ApplicativeError[F, Throwable]): FtsWork[F] =
Kleisli(ctx => mt.run(ctx).onError({ case _ => other.run(ctx) }))
def forContext(
cfg: Config.FullTextSearch,
fts: FtsClient[F]
): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, ctx))
}
}
}

View File

@ -0,0 +1,66 @@
package docspell.joex.fts
import cats.effect._
import cats.implicits._
import cats.data.{Kleisli, OptionT}
import cats.Traverse
import docspell.common._
import docspell.joex.Config
import docspell.store.{AddResult, Store}
import docspell.store.records.RFtsMigration
import docspell.ftsclient._
case class Migration[F[_]](
version: Int,
engine: Ident,
description: String,
task: FtsWork[F]
)
object Migration {
def apply[F[_]: Effect](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
store: Store[F],
logger: Logger[F]
): Kleisli[F, List[Migration[F]], Unit] = {
val ctx = FtsContext(cfg, store, fts, logger)
Kleisli(migs => Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ()))
}
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] = {
val insertRecord: F[Option[RFtsMigration]] =
for {
rec <- RFtsMigration.create(m.version, m.engine, m.description)
res <- ctx.store.add(
RFtsMigration.insert(rec),
RFtsMigration.exists(m.version, m.engine)
)
ret <- res match {
case AddResult.Success => rec.some.pure[F]
case AddResult.EntityExists(_) => None.pure[F]
case AddResult.Failure(ex) => Effect[F].raiseError(ex)
}
} yield ret
(for {
_ <- OptionT.liftF(ctx.logger.info(s"Apply ${m.version}/${m.description}"))
rec <- OptionT(insertRecord)
res <- OptionT.liftF(m.task.run(ctx).attempt)
ret <- OptionT.liftF(res match {
case Right(()) => ().pure[F]
case Left(ex) =>
ctx.logger.error(ex)(
s"Applying index migration ${m.version}/${m.description} failed"
) *>
ctx.store.transact(RFtsMigration.deleteById(rec.id)) *> Effect[F]
.raiseError[Unit](
ex
)
})
} yield ret).getOrElseF(
ctx.logger.info(s"Migration ${m.version}/${m.description} already applied.")
)
}
}

View File

@ -0,0 +1,53 @@
package docspell.joex.fts
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.ftsclient._
import docspell.store.records.RJob
object MigrationTask {
val taskName = Ident.unsafe("full-text-index")
def apply[F[_]: ConcurrentEffect](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
): Task[F, Unit, Unit] =
Task
.log[F, Unit](_.info(s"Running full-text-index migrations now"))
.flatMap(_ =>
Task(ctx =>
Migration[F](cfg, fts, ctx.store, ctx.logger)
.run(migrationTasks[F])
)
)
def onCancel[F[_]: Sync]: Task[F, Unit, Unit] =
Task.log[F, Unit](_.warn("Cancelling full-text-index task"))
def job[F[_]: Sync]: F[RJob] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RJob.newJob(
id,
taskName,
DocspellSystem.taskGroup,
(),
"Create full-text index",
now,
DocspellSystem.taskGroup,
Priority.Low,
Some(DocspellSystem.migrationTaskTracker)
)
private val solrEngine = Ident.unsafe("solr")
def migrationTasks[F[_]: Effect]: List[Migration[F]] =
List(
Migration[F](1, solrEngine, "initialize", FtsWork.initialize[F]),
Migration[F](2, solrEngine, "Index all from database", FtsWork.insertAll[F](None))
)
}

View File

@ -0,0 +1,54 @@
package docspell.joex.fts
import cats.effect._
import docspell.common._
import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.ftsclient._
import FtsWork.syntax._
object ReIndexTask {
type Args = ReIndexTaskArgs
val taskName = ReIndexTaskArgs.taskName
val tracker = DocspellSystem.migrationTaskTracker
def apply[F[_]: ConcurrentEffect](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
): Task[F, Args, Unit] =
Task
.log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ =>
Task(ctx =>
(clearData[F](ctx.args.collective) ++
FtsWork.log[F](_.info("Inserting data from database")) ++
FtsWork.insertAll[F](
ctx.args.collective
)).forContext(cfg, fts).run(ctx)
)
)
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] =
FtsWork.log[F](_.info("Clearing index data")) ++
(collective match {
case Some(_) =>
FtsWork
.clearIndex(collective)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
)
case None =>
FtsWork
.clearIndex(None)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++
FtsWork.log[F](_.info("Running index initialize")) ++
FtsWork.initialize[F]
})
}

View File

@ -0,0 +1,9 @@
package docspell.joex
import cats.data.Kleisli
package object fts {
type FtsWork[F[_]] = Kleisli[F, FtsContext[F], Unit]
}

View File

@ -11,7 +11,6 @@ import docspell.store.records._
object HouseKeepingTask {
private val periodicId = Ident.unsafe("docspell-houskeeping")
val systemGroup: Ident = Ident.unsafe("docspell-system")
val taskName: Ident = Ident.unsafe("housekeeping")
@ -29,10 +28,10 @@ object HouseKeepingTask {
.createJson(
true,
taskName,
systemGroup,
DocspellSystem.taskGroup,
(),
"Docspell house-keeping",
systemGroup,
DocspellSystem.taskGroup,
Priority.Low,
ce
)

View File

@ -8,6 +8,7 @@ import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.queries.QItem
import docspell.store.records.RItem
import docspell.ftsclient.FtsClient
object ItemHandler {
def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
@ -16,11 +17,12 @@ object ItemHandler {
)
def newItem[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config,
fts: FtsClient[F]
): Task[F, ProcessItemArgs, Unit] =
CreateItem[F]
.flatMap(itemStateTask(ItemState.Processing))
.flatMap(safeProcess[F](cfg))
.flatMap(safeProcess[F](cfg, fts))
.map(_ => ())
def itemStateTask[F[_]: Sync, A](
@ -36,11 +38,12 @@ object ItemHandler {
Task(_.isLastRetry)
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config,
fts: FtsClient[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
isLastRetry[F].flatMap {
case true =>
ProcessItem[F](cfg)(data).attempt.flatMap({
ProcessItem[F](cfg, fts)(data).attempt.flatMap({
case Right(d) =>
Task.pure(d)
case Left(ex) =>
@ -50,7 +53,7 @@ object ItemHandler {
.andThen(_ => Sync[F].raiseError(ex))
})
case false =>
ProcessItem[F](cfg)(data).flatMap(itemStateTask(ItemState.Created))
ProcessItem[F](cfg, fts)(data).flatMap(itemStateTask(ItemState.Created))
}
def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =

View File

@ -5,17 +5,19 @@ import docspell.common.ProcessItemArgs
import docspell.analysis.TextAnalysisConfig
import docspell.joex.scheduler.Task
import docspell.joex.Config
import docspell.ftsclient.FtsClient
object ProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config,
fts: FtsClient[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ExtractArchive(item)
.flatMap(Task.setProgress(20))
.flatMap(ConvertPdf(cfg.convert, _))
.flatMap(Task.setProgress(40))
.flatMap(TextExtraction(cfg.extraction, _))
.flatMap(TextExtraction(cfg.extraction, fts))
.flatMap(Task.setProgress(60))
.flatMap(analysisOnly[F](cfg.textAnalysis))
.flatMap(Task.setProgress(80))

View File

@ -3,17 +3,17 @@ package docspell.joex.process
import bitpeace.{Mimetype, RangeDef}
import cats.data.OptionT
import cats.implicits._
import cats.effect.{ContextShift, Sync}
import cats.effect._
import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
import docspell.store.syntax.MimeTypes._
import docspell.ftsclient.{FtsClient, TextData}
object TextExtraction {
def apply[F[_]: Sync: ContextShift](
cfg: ExtractConfig,
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -21,28 +21,52 @@ object TextExtraction {
_ <- ctx.logger.info("Starting text extraction")
start <- Duration.stopTime[F]
txt <- item.attachments.traverse(
extractTextIfEmpty(ctx, cfg, ctx.args.meta.language, item)
extractTextIfEmpty(
ctx,
cfg,
ctx.args.meta.language,
ctx.args.meta.collective,
item
)
)
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
idxItem =
TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None)
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*)
dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
} yield item.copy(metas = txt)
} yield item.copy(metas = txt.map(_._1))
}
def extractTextIfEmpty[F[_]: Sync: ContextShift](
ctx: Context[F, _],
cfg: ExtractConfig,
lang: Language,
collective: Ident,
item: ItemData
)(ra: RAttachment): F[RAttachmentMeta] = {
)(ra: RAttachment): F[(RAttachmentMeta, TextData)] = {
def makeTextData(rm: RAttachmentMeta): (RAttachmentMeta, TextData) =
(
rm,
TextData.attachment(
item.item.id,
ra.id,
collective,
lang,
ra.name,
rm.content
)
)
val rm = item.findOrCreate(ra.id)
rm.content match {
case Some(_) =>
ctx.logger.info("TextExtraction skipped, since text is already available.") *>
rm.pure[F]
makeTextData(rm).pure[F]
case None =>
extractTextToMeta[F](ctx, cfg, lang, item)(ra)
.map(makeTextData)
}
}

View File

@ -6,6 +6,17 @@ permalink: demo
# {{ page.title }}
## Finding Items
<video width="100%" controls>
<source src="../static/docspell-search-2020-06-24.webm" type="video/webm">
Your browser does not support the video tag.
</video>
<img width="100%" src="img/docspell-demo.gif" title="Demo">
## Basic Idea (First Version)
<video width="100%" controls>
<source src="../static/docspell-demo.webm" type="video/webm">
Your browser does not support the video tag.
</video>

View File

@ -0,0 +1,49 @@
---
layout: docs
title: Fulltext Search Engine
---
# Choose a Fulltext Search Engine
It should be possible to search the contents of all documents.
## Context and Problem Statement
To allow searching the documents contents efficiently, a separate
index is necessary. The "defacto standard" for fulltext search on the
JVM is something backed by [Lucene](https://lucene.apache.org).
Another option is to use a RDBMS that supports fulltext search.
This adds another component to the mix, which increases the complexity
of the setup and the software. Since docspell works great without this
feature, it shouldn't have a huge impact on the application, i.e. if
the fulltext search component is down or broken, docspell should still
work (just the fulltext search is then not working).
## Considered Options
* [Apache SOLR](https://lucene.apache.org/solr)
* [ElasticSearch](https://www.elastic.co/elasticsearch/)
* [PostgreSQL](https://www.postgresql.org/docs/12/textsearch.html)
* All of them or a subset
## Decision Outcome
If docspell is running on PostgreSQL, it would be nice to also use it
for fulltext search to save the cost of running another component. But
I don't want to lock the database to PostgreSQL *only* because of the
fulltext search feature.
ElasticSearch and Apache SOLR are quite similiar in features. SOLR is
part of Lucene and therefore lives in the Apache ecosystem. I would
choose SOLR over ElasticSearch, because I used it before.
The last option (supporting all) is interesting, since it would enable
to use PostgreSQL for fulltext search for those that use PostgreSQL as
the database for docspell.
In a first step, identify what docspell needs from a fulltext search
component and create this interface and an implementation for Apache
SOLR. This enables all users to use the fulltext search feature. As a
later step, an implementation based on PostgreSQL and/or ElasticSearch
could be provided, too.

View File

@ -72,6 +72,68 @@ H2
url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE"
```
### Full-Text Search: SOLR
[Apache SOLR](https://lucene.apache.org/solr) is used to provide the
full-text search. Both docspell components must provide the same
connection setup. This is defined in the `full-text-search.solr`
subsection:
```
...
full-text-search {
enabled = true
...
solr = {
url = "http://localhost:8983/solr/docspell"
}
}
```
The default configuration at the end of this page contains more
information about each setting.
The `solr.url` is the mandatory setting that you need to change to
point to your SOLR instance. Then you need to set the `enabled` flag
to `true`.
When installing docspell manually, just install solr and create a core
as described in the [solr
documentation](https://lucene.apache.org/solr/guide/8_4/installing-solr.html).
That will provide you with the connection url (the last part is the
core name).
While the `full-text-search.solr` options are the same for joex and
the restserver, there are some settings that differ. The restserver
has this additional setting, that may be of interest:
```
full-text-search {
recreate-key = "test123"
}
```
This key is required if you want docspell to drop and re-create the
entire index. This is possible via a REST call:
``` shell
$ curl -XPOST http://localhost:7880/api/v1/open/fts/reIndexAll/test123
```
Here the `test123` is the key defined with `recreate-key`. If it is
empty (the default), this REST call is disabled. Otherwise, the POST
request will submit a system task that is executed by a joex instance
eventually.
Using this endpoint, the index will be re-created. This is sometimes
necessary, for example if you upgrade SOLR or delete the core to
provide a new one (see
[here](https://lucene.apache.org/solr/guide/8_4/reindexing.html) for
details). Note that a collective can also re-index their data using a
similiar endpoint; but this is only deleting their data and doesn't do
a full re-index.
### Bind
The host and port the http server binds to. This applies to both

View File

@ -1,6 +1,6 @@
---
layout: docs
title: Find and Review
title: Curate Metadata
permalink: doc/curate
---

View File

@ -6,9 +6,9 @@ permalink: doc/finding
# {{ page.title }}
Items can be searched by their annotated meta data. The landing page
shows a list of current items. Items are displayed sorted by their
date, newest first.
Items can be searched by their annotated meta data and their contents
using full text search. The landing page shows a list of current
items. Items are displayed sorted by their date, newest first.
Docspell has two modes for searching: a simple search bar and a search
menu with many options. Both are active at the same time, but only one
@ -19,32 +19,51 @@ is visible. You can switch between them without affecting the results.
<img style="float:right;" src="../img/search-bar.png" height="50">
By default, the search bar is shown. It searches in the name
properties of the following meta data:
By default, the search bar is shown. It provides a refined view of the
search menu. The dropdown contains different options to do a quick
search.
- the item name
- the notes
- correspondent organization and person
- concerning person and equipment
### *All Names* and *Contents*
A wildcard `*` can be used at the start or end of a search term to do
a substring match. A `*` means "everything". So a term `*company`
matches all names ending in `company` and `*company*` matches all
names containing the word `company`. The matching is case insensitive.
These two options correspond to the same named field in the search
menu. If you switch between search menu and search bar (by clicking
the icon on the left), you'll see that they are the same fields.
Typing in the search bar also fills the corresponding field in the
search menu (and vice versa).
Docspell adds a `*` to the front and end of a term automatically,
unless one of the following is true:
- The *All Names* searches in the item name, item notes, names of
correspondent organization and person, and names of concering person
and equipment. It uses a simple substring search.
- The option *Contents* searches the contents of all attachments
(documents), attachment names, the item name and item notes. It uses
full text search. However, it does not search the names of attached
meta data.
- The term already has a wildcard.
- The term is enclosed in quotes `"`.
When searching with one of these fields active, it simply submits the
(hidden) search menu. So if the menu has other fields filled out, they
will affect the result, too. Using one of these fields, the bar is
just a reduced view of the search menu.
You can go to the search menu by clicking the left icon in the search
bar.
So you can choose tags or correspondents in the search menu and
further restrict the results using full text search. The results will
be returned sorted by the item date, newest first.
If the search bar shows a little blue bubble, it means that there are
more search fields filled out in the search menu. In this case the
results are not only restricted by the search term given in the
search-bar, but also by what is specified in the search menu.
If the left button in the search bar shows a little blue bubble, it
means that there are more search fields filled out in the search menu
that you currently can't see. In this case the results are not only
restricted by the search term given in the search-bar, but also by
what is specified in the search menu.
### *Contents Only*
This option has no corresponding part in the search menu. Searching
with this option active, there is only a full text search done in the
attachments contents, attachment names, item name and item notes.
The results are not ordered by item date, but by relevance with
respect to the search term. This ordering is returned from the full
text search engine and is simply transfered unmodified.
## Search Menu
@ -104,9 +123,61 @@ within this range. Items without a due date are not shown.
Specify whether to show only incoming, only outgoing or all items.
## Customize Substring Search
The substring search of the *All Names* and *Name* field can be
customized in the following way: A wildcard `*` can be used at the
start or end of a search term to do a substring match. A `*` means
"everything". So a term `*company` matches all names ending in
`company` and `*company*` matches all names containing the word
`company`. The matching is case insensitive.
Docspell adds a `*` to the front and end of a term automatically,
unless one of the following is true:
- The term already has a wildcard.
- The term is enclosed in quotes `"`.
## Full Text Search
### The Query
The query string for full text search is very powerful. Docspell
currently supports [Apache SOLR](https://lucene.apache.org/solr/) as
full text search backend, so you may want to have a look at their
[documentation on query
syntax](https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing)
for a in depth guide.
- Wildcards: `?` matches any single character, `*` matches zero or
more characters
- Fuzzy search: Appending a `~` to a term, results in a fuzzy search
(search this term and similiar spelled ones)
- Proximity Search: Search for terms that "near" each other, again
using `~` appended to a search phrase. Example: `"cheese cake"~5`.
- Boosting: apply more weight to a term with `^`. Example: `cheese^4
cake` cheese is 4x more important.
Docspell will preprocess the search query to prepare a query for SOLR.
It will by default search all indexed fields, which are: attachment
contents, attachment names, item name and item notes.
### The Results
When using full text search, each item in the result list is annotated
with the highlighted occurrence of the match.
<div class="thumbnail">
<img src="../img/search-content-results.png">
</div>
## Screencast
<video width="100%" controls>
<source src="../static/docspell-search-2020-06-13.webm" type="video/webm">
<source src="../static/docspell-search-2020-06-24.webm" type="video/webm">
Your browser does not support the video tag.
</video>

View File

@ -11,13 +11,18 @@ permalink: features
account)
- Handle multiple documents as one unit
- OCR using [tesseract](https://github.com/tesseract-ocr/tesseract)
- [Full-Text Search](doc/finding#full-text-search) based on [Apache
SOLR](https://lucene.apache.org/solr)
- Conversion to PDF: all files are converted into a PDF file
- Non-destructive: all your uploaded files are never modified and can
always be downloaded untouched
- Text is analysed to find and attach meta data automatically
- [Manage document processing](doc/processing): cancel jobs, set
priorities
- Everything available via a documented [REST Api](api)
- Everything available via a [documented](https://www.openapis.org/)
[REST Api](api); allows to [generate
clients](https://openapi-generator.tech/docs/generators) for
(almost) any language
- mobile-friendly Web-UI
- [Create “share-urls”](doc/uploading#anonymous-upload) to upload files
anonymously
@ -28,6 +33,8 @@ permalink: features
- REST server and document processing are separate applications which
can be scaled-out independently
- Everything stored in a SQL database: PostgreSQL, MariaDB or H2
- H2 is embedded, a "one-file-only" database, avoids installing db
servers
- Files supported:
- Documents:
- PDF
@ -55,8 +62,7 @@ permalink: features
# Limitations
These are current known limitations that may be of interest for
considering docspell at the moment. Hopefully they will be resolved
eventually….
considering docspell at the moment.
- No fulltext search implemented. This is now planned for an upcoming
release.
- Documents cannot be modified.
- You can remove and add documents but there is no versioning.

View File

@ -33,15 +33,15 @@ options:
- title: Finding Items
url: doc/finding
- title: Curate Metadata
url: doc/curate
- title: Uploads
url: doc/uploading
- title: Processing Queue
url: doc/processing
- title: Find and Review
url: doc/curate
- title: E-Mail Settings
url: doc/emailsettings

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 42 KiB

View File

@ -151,6 +151,46 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/open/fts/reIndexAll/{id}:
post:
tags: [Full-Text Index]
summary: Re-creates the full-text index.
description: |
Clears the full-text index and inserts all data from the
database. This migh take a while to complete. The response
returns immediately. A task is submitted that will be executed
by a job executor. Note that this affects all data of all
collectives.
The `id` is required and refers to the key given in the config
file to ensure that only admins can call this route.
parameters:
- $ref: "#/components/parameters/id"
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/fts/reIndex:
post:
tags: [Full-Text Index]
summary: Re-creates the full-text index for the current collective
description: |
Clears the full-text index for all data belonging to the
current collective and inserts the data from the database. The
response is immediately returned and a task is submitted that
will be executed by a job executor.
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/BasicResult"
/sec/checkfile/{checksum}:
get:
tags: [ Upload ]
@ -987,9 +1027,13 @@ paths:
summary: Search for items.
description: |
Search for items given a search form. The results are grouped
by month by default. Tags are *not* resolved! The results will
always contain an empty list for item tags. Use
`/searchWithTags` to also retrieve all tags of an item.
by month and are sorted by item date (newest first). Tags are
*not* resolved. The results will always contain an empty list
for item tags. Use `/searchWithTags` to also retrieve all tags
of an item.
The `fulltext` field can be used to restrict the results by
using full-text search in the documents contents.
security:
- authTokenHeader: []
requestBody:
@ -1011,7 +1055,11 @@ paths:
description: |
Search for items given a search form. The results are grouped
by month by default. For each item, its tags are also
returned. This uses more queries and is therefore slower.
returned. This uses more queries and is therefore slower, but
returns all tags to an item.
The `fulltext` field can be used to restrict the results by
using full-text search in the documents contents.
security:
- authTokenHeader: []
requestBody:
@ -1026,6 +1074,37 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/ItemLightList"
/sec/item/searchIndex:
post:
tags: [ Item ]
summary: Search for items using full-text search only.
description: |
Search for items by only using the full-text search index.
Unlike the other search routes, this one only asks the
full-text search index and returns only one group that
contains the results in the same order as given from the
index. Most full-text search engines use an ordering that
reflect the relevance wrt the search term.
The other search routes always order the results by some
property (the item date) and thus the relevance ordering is
destroyed when using the full-text search.
security:
- authTokenHeader: []
requestBody:
content:
application/json:
schema:
$ref: "#/components/schemas/ItemFtsSearch"
responses:
200:
description: Ok
content:
application/json:
schema:
$ref: "#/components/schemas/ItemLightList"
/sec/item/{id}:
get:
tags: [ Item ]
@ -2255,6 +2334,28 @@ paths:
components:
schemas:
ItemFtsSearch:
description: |
Query description for a full-text only search.
required:
- query
- offset
- limit
properties:
offset:
type: integer
format: int32
limit:
type: integer
format: int32
description: |
The maximum number of results to return. Note that this
limit is a soft limit, there is some hard limit on the
server, too.
query:
type: string
description: |
A query searching the contents of documents.
MoveAttachment:
description: |
Data to move an attachment to another position.
@ -3386,6 +3487,10 @@ components:
description: |
Search in item names, correspondents, concerned entities
and notes.
fullText:
type: string
description: |
A query searching the contents of documents.
corrOrg:
type: string
format: ident
@ -3458,6 +3563,29 @@ components:
type: array
items:
$ref: "#/components/schemas/Tag"
highlighting:
description: |
Optional contextual information of a search query. Each
item refers to some field where a search match was found
(e.g. the name of an attachment or the item notes) and a
list of lines giving surrounding context of the macth.
type: array
items:
$ref: "#/components/schemas/HighlightEntry"
HighlightEntry:
description: |
Highlighting information for a single field (maybe attachment
name or item notes).
required:
- name
- lines
properties:
name:
type: string
lines:
type: array
items:
type: string
IdName:
description: |
The identifier and a human readable name of some entity.

View File

@ -22,7 +22,7 @@ docspell.server {
# within the client config, but it is restricted by the server to
# the number defined here. An admin might choose a lower number
# depending on the available resources.
max-item-page-size = 500
max-item-page-size = 200
# Authentication.
auth {
@ -84,6 +84,40 @@ docspell.server {
}
}
# Configuration of the full-text search engine.
full-text-search {
# The full-text search feature can be disabled. It requires an
# additional index server which needs additional memory and disk
# space. It can be enabled later any time.
#
# Currently the SOLR search platform is supported.
enabled = false
# When re-creating the complete index via a REST call, this key
# is required. If left empty (the default), recreating the index
# is disabled.
#
# Example curl command:
# curl -XPOST http://localhost:7880/api/v1/open/fts/reIndexAll/test123
recreate-key = ""
# Configuration for the SOLR backend.
solr = {
# The URL to solr
url = "http://localhost:8983/solr/docspell"
# Used to tell solr when to commit the data
commit-within = 1000
# If true, logs request and response bodies
log-verbose = false
# The defType parameter to lucene that defines the parser to
# use. You might want to try "edismax" or look here:
# https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing
def-type = "lucene"
# The default combiner for tokens. One of {AND, OR}.
q-op = "OR"
}
}
# Configuration for the backend.
backend {
# Enable or disable debugging for e-mail related functionality. This

View File

@ -1,9 +1,10 @@
package docspell.restserver
import java.net.InetAddress
import docspell.common._
import docspell.backend.auth.Login
import docspell.backend.{Config => BackendConfig}
import docspell.common._
import docspell.ftssolr.SolrConfig
case class Config(
appName: String,
@ -13,7 +14,8 @@ case class Config(
backend: BackendConfig,
auth: Login.Config,
integrationEndpoint: Config.IntegrationEndpoint,
maxItemPageSize: Int
maxItemPageSize: Int,
fullTextSearch: Config.FullTextSearch
)
object Config {
@ -50,4 +52,9 @@ object Config {
}
}
}
case class FullTextSearch(enabled: Boolean, recreateKey: Ident, solr: SolrConfig)
object FullTextSearch {}
}

View File

@ -2,8 +2,11 @@ package docspell.restserver
import cats.implicits._
import cats.effect._
import org.http4s.client.Client
import docspell.backend.BackendApp
import docspell.common.NodeType
import docspell.ftsclient.FtsClient
import docspell.ftssolr.SolrFtsClient
import scala.concurrent.ExecutionContext
@ -26,9 +29,16 @@ object RestAppImpl {
blocker: Blocker
): Resource[F, RestApp[F]] =
for {
backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)
backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)(
createFtsClient[F](cfg)
)
app = new RestAppImpl[F](cfg, backend)
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
} yield appR
private def createFtsClient[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
)(client: Client[F]): Resource[F, FtsClient[F]] =
if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client)
else Resource.pure[F, FtsClient[F]](FtsClient.none[F])
}

View File

@ -78,7 +78,8 @@ object RestServer {
"email/sent" -> SentMailRoutes(restApp.backend, token),
"usertask/notifydueitems" -> NotifyDueItemsRoutes(cfg, restApp.backend, token),
"usertask/scanmailbox" -> ScanMailboxRoutes(restApp.backend, token),
"calevent/check" -> CalEventCheckRoutes()
"calevent/check" -> CalEventCheckRoutes(),
"fts" -> FullTextIndexRoutes.secured(cfg, restApp.backend, token)
)
def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] =
@ -87,7 +88,8 @@ object RestServer {
"signup" -> RegisterRoutes(restApp.backend, cfg),
"upload" -> UploadRoutes.open(restApp.backend, cfg),
"checkfile" -> CheckFileRoutes.open(restApp.backend),
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg)
"integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg),
"fts" -> FullTextIndexRoutes.open(cfg, restApp.backend)
)
def redirectTo[F[_]: Effect](path: String): HttpRoutes[F] = {

View File

@ -14,8 +14,9 @@ import bitpeace.FileMeta
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
import docspell.backend.ops.OJob.JobCancelResult
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload}
import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload}
import docspell.store.AddResult
import docspell.ftsclient.FtsResult
import org.http4s.multipart.Multipart
import org.http4s.headers.`Content-Type`
import org.log4s.Logger
@ -124,6 +125,7 @@ trait Conversions {
m.dueDateFrom,
m.dueDateUntil,
m.allNames,
None,
None
)
@ -138,6 +140,17 @@ trait Conversions {
ItemLightList(gs)
}
def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = {
val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLight).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
@ -149,6 +162,21 @@ trait Conversions {
ItemLightList(gs)
}
def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemListWithTagsFtsPlain(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList =
if (v.isEmpty) ItemLightList(Nil)
else ItemLightList(List(ItemLightGroup("Results", v.map(mkItemLightWithTags).toList)))
def mkItemLight(i: OItemSearch.ListItem): ItemLight =
ItemLight(
i.id,
@ -163,12 +191,35 @@ trait Conversions {
i.concPerson.map(mkIdName),
i.concEquip.map(mkIdName),
i.fileCount,
Nil,
Nil
)
def mkItemLight(i: OFulltext.FtsItem): ItemLight = {
val il = mkItemLight(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = {
val il = mkItemLightWithTags(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] =
ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi =>
fdi.matchData match {
case FtsResult.AttachmentData(_, aName) =>
HighlightEntry(aName, fdi.context)
case FtsResult.ItemData =>
HighlightEntry("Item", fdi.context)
}
}
// job
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {

View File

@ -0,0 +1,60 @@
package docspell.restserver.routes
import cats.effect._
import cats.implicits._
import cats.data.OptionT
import org.http4s._
//import org.http4s.circe.CirceEntityDecoder._
import org.http4s.circe.CirceEntityEncoder._
import org.http4s.dsl.Http4sDsl
import docspell.common._
import docspell.backend.BackendApp
import docspell.backend.auth.AuthToken
import docspell.restserver.Config
import docspell.restserver.conv.Conversions
object FullTextIndexRoutes {
def secured[F[_]: Effect](
cfg: Config,
backend: BackendApp[F],
user: AuthToken
): HttpRoutes[F] =
if (!cfg.fullTextSearch.enabled) notFound[F]
else {
val dsl = Http4sDsl[F]
import dsl._
HttpRoutes.of {
case POST -> Root / "reIndex" =>
for {
res <- backend.fulltext.reindexCollective(user.account).attempt
resp <-
Ok(Conversions.basicResult(res, "Full-text index will be re-created."))
} yield resp
}
}
def open[F[_]: Effect](cfg: Config, backend: BackendApp[F]): HttpRoutes[F] =
if (!cfg.fullTextSearch.enabled) notFound[F]
else {
val dsl = Http4sDsl[F]
import dsl._
HttpRoutes.of {
case POST -> Root / "reIndexAll" / Ident(id) =>
for {
res <-
if (id.nonEmpty && id == cfg.fullTextSearch.recreateKey)
backend.fulltext.reindexAll.attempt
else Left(new Exception("The provided key is invalid.")).pure[F]
resp <-
Ok(Conversions.basicResult(res, "Full-text index will be re-created."))
} yield resp
}
}
private def notFound[F[_]: Effect]: HttpRoutes[F] =
HttpRoutes(_ => OptionT.pure(Response.notFound[F]))
}

View File

@ -5,6 +5,7 @@ import cats.implicits._
import docspell.backend.BackendApp
import docspell.backend.auth.AuthToken
import docspell.backend.ops.OItemSearch.Batch
import docspell.backend.ops.OFulltext
import docspell.common.{Ident, ItemState}
import org.http4s.HttpRoutes
import org.http4s.dsl.Http4sDsl
@ -34,11 +35,25 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query")
items <- backend.itemSearch.findItems(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
resp <- Ok(Conversions.mkItemList(items))
resp <- mask.fullText match {
case Some(fq) if cfg.fullTextSearch.enabled =>
for {
items <- backend.fulltext.findItems(
query,
OFulltext.FtsInput(fq),
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListFts(items))
} yield ok
case _ =>
for {
items <- backend.itemSearch.findItems(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemList(items))
} yield ok
}
} yield resp
case req @ POST -> Root / "searchWithTags" =>
@ -47,11 +62,45 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query")
items <- backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
resp <- Ok(Conversions.mkItemListWithTags(items))
resp <- mask.fullText match {
case Some(fq) if cfg.fullTextSearch.enabled =>
for {
items <- backend.fulltext.findItemsWithTags(
query,
OFulltext.FtsInput(fq),
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTagsFts(items))
} yield ok
case _ =>
for {
items <- backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTags(items))
} yield ok
}
} yield resp
case req @ POST -> Root / "searchIndex" =>
for {
mask <- req.as[ItemFtsSearch]
resp <- mask.query match {
case q if q.length > 1 =>
val ftsIn = OFulltext.FtsInput(q)
for {
items <- backend.fulltext.findIndexOnly(
ftsIn,
user.account.collective,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTagsFtsPlain(items))
} yield ok
case _ =>
BadRequest(BasicResult(false, "Query string too short"))
}
} yield resp
case GET -> Root / Ident(id) =>

View File

@ -13,7 +13,8 @@ case class Flags(
baseUrl: LenientUri,
signupMode: SignupConfig.Mode,
docspellAssetPath: String,
integrationEnabled: Boolean
integrationEnabled: Boolean,
fullTextSearchEnabled: Boolean
)
object Flags {
@ -23,7 +24,8 @@ object Flags {
cfg.baseUrl,
cfg.backend.signup.mode,
s"/app/assets/docspell-webapp/${BuildInfo.version}",
cfg.integrationEndpoint.enabled
cfg.integrationEndpoint.enabled,
cfg.fullTextSearch.enabled
)
implicit val jsonEncoder: Encoder[Flags] =

View File

@ -0,0 +1,10 @@
CREATE TABLE `fts_migration` (
`id` varchar(254) not null primary key,
`version` int not null,
`fts_engine` varchar(254) not null,
`description` varchar(254) not null,
`created` timestamp not null
);
CREATE UNIQUE INDEX `fts_migration_version_engine_idx`
ON `fts_migration`(`version`, `fts_engine`);

View File

@ -0,0 +1,10 @@
CREATE TABLE "fts_migration" (
"id" varchar(254) not null primary key,
"version" int not null,
"fts_engine" varchar(254) not null,
"description" varchar(254) not null,
"created" timestamp not null
);
CREATE UNIQUE INDEX "fts_migration_version_engine_idx"
ON "fts_migration"("version", "fts_engine");

View File

@ -6,7 +6,7 @@ import cats.effect.Sync
import cats.data.OptionT
import doobie._
import doobie.implicits._
import docspell.common.{Ident, MetaProposalList}
import docspell.common._
import docspell.store.Store
import docspell.store.impl.Implicits._
import docspell.store.records._
@ -138,4 +138,39 @@ object QAttachment {
q.query[RAttachmentMeta].option
}
case class ContentAndName(
id: Ident,
item: Ident,
collective: Ident,
lang: Language,
name: Option[String],
content: Option[String]
)
def allAttachmentMetaAndName(
coll: Option[Ident],
chunkSize: Int
): Stream[ConnectionIO, ContentAndName] = {
val aId = RAttachment.Columns.id.prefix("a")
val aItem = RAttachment.Columns.itemId.prefix("a")
val aName = RAttachment.Columns.name.prefix("a")
val mId = RAttachmentMeta.Columns.id.prefix("m")
val mContent = RAttachmentMeta.Columns.content.prefix("m")
val iId = RItem.Columns.id.prefix("i")
val iColl = RItem.Columns.cid.prefix("i")
val cId = RCollective.Columns.id.prefix("c")
val cLang = RCollective.Columns.language.prefix("c")
val cols = Seq(aId, aItem, iColl, cLang, aName, mContent)
val from = RAttachment.table ++ fr"a INNER JOIN" ++
RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++
fr"INNER JOIN" ++ RCollective.table ++ fr"c ON" ++ cId.is(iColl)
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
selectSimple(cols, from, where)
.query[ContentAndName]
.streamWithChunkSize(chunkSize)
}
}

View File

@ -3,6 +3,7 @@ package docspell.store.queries
import bitpeace.FileMeta
import cats.effect.Sync
import cats.data.OptionT
import cats.data.NonEmptyList
import cats.implicits._
import cats.effect.concurrent.Ref
import fs2.Stream
@ -165,6 +166,7 @@ object QItem {
dueDateFrom: Option[Timestamp],
dueDateTo: Option[Timestamp],
allNames: Option[String],
itemIds: Option[Set[Ident]],
orderAsc: Option[RItem.Columns.type => Column]
)
@ -186,6 +188,7 @@ object QItem {
None,
None,
None,
None,
None
)
}
@ -193,6 +196,12 @@ object QItem {
case class Batch(offset: Int, limit: Int) {
def restrictLimitTo(n: Int): Batch =
Batch(offset, math.min(n, limit))
def next: Batch =
Batch(offset + limit, limit)
def first: Batch =
Batch(0, limit)
}
object Batch {
@ -205,7 +214,12 @@ object QItem {
Batch(0, c)
}
def findItems(q: Query, batch: Batch): Stream[ConnectionIO, ListItem] = {
private def findItemsBase(
q: Query,
distinct: Boolean,
moreCols: Seq[Fragment],
ctes: (String, Fragment)*
): Fragment = {
val IC = RItem.Columns
val AC = RAttachment.Columns
val PC = RPerson.Columns
@ -217,29 +231,31 @@ object QItem {
val equipCols = List(REquipment.Columns.eid, REquipment.Columns.name)
val finalCols = commas(
IC.id.prefix("i").f,
IC.name.prefix("i").f,
IC.state.prefix("i").f,
coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f),
IC.dueDate.prefix("i").f,
IC.source.prefix("i").f,
IC.incoming.prefix("i").f,
IC.created.prefix("i").f,
fr"COALESCE(a.num, 0)",
OC.oid.prefix("o0").f,
OC.name.prefix("o0").f,
PC.pid.prefix("p0").f,
PC.name.prefix("p0").f,
PC.pid.prefix("p1").f,
PC.name.prefix("p1").f,
EC.eid.prefix("e1").f,
EC.name.prefix("e1").f,
q.orderAsc match {
case Some(co) =>
coalesce(co(IC).prefix("i").f, IC.created.prefix("i").f)
case None =>
IC.created.prefix("i").f
}
Seq(
IC.id.prefix("i").f,
IC.name.prefix("i").f,
IC.state.prefix("i").f,
coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f),
IC.dueDate.prefix("i").f,
IC.source.prefix("i").f,
IC.incoming.prefix("i").f,
IC.created.prefix("i").f,
fr"COALESCE(a.num, 0)",
OC.oid.prefix("o0").f,
OC.name.prefix("o0").f,
PC.pid.prefix("p0").f,
PC.name.prefix("p0").f,
PC.pid.prefix("p1").f,
PC.name.prefix("p1").f,
EC.eid.prefix("e1").f,
EC.name.prefix("e1").f,
q.orderAsc match {
case Some(co) =>
coalesce(co(IC).prefix("i").f, IC.created.prefix("i").f)
case None =>
IC.created.prefix("i").f
}
) ++ moreCols
)
val withItem = selectSimple(itemCols, RItem.table, IC.cid.is(q.collective))
@ -249,19 +265,32 @@ object QItem {
val withAttach = fr"SELECT COUNT(" ++ AC.id.f ++ fr") as num, " ++ AC.itemId.f ++
fr"from" ++ RAttachment.table ++ fr"GROUP BY (" ++ AC.itemId.f ++ fr")"
val selectKW = if (distinct) fr"SELECT DISTINCT" else fr"SELECT"
val query = withCTE(
"items" -> withItem,
"persons" -> withPerson,
"orgs" -> withOrgs,
"equips" -> withEquips,
"attachs" -> withAttach
(Seq(
"items" -> withItem,
"persons" -> withPerson,
"orgs" -> withOrgs,
"equips" -> withEquips,
"attachs" -> withAttach
) ++ ctes): _*
) ++
fr"SELECT DISTINCT" ++ finalCols ++ fr" FROM items i" ++
selectKW ++ finalCols ++ fr" FROM items i" ++
fr"LEFT JOIN attachs a ON" ++ IC.id.prefix("i").is(AC.itemId.prefix("a")) ++
fr"LEFT JOIN persons p0 ON" ++ IC.corrPerson.prefix("i").is(PC.pid.prefix("p0")) ++
fr"LEFT JOIN orgs o0 ON" ++ IC.corrOrg.prefix("i").is(OC.oid.prefix("o0")) ++
fr"LEFT JOIN persons p1 ON" ++ IC.concPerson.prefix("i").is(PC.pid.prefix("p1")) ++
fr"LEFT JOIN equips e1 ON" ++ IC.concEquipment.prefix("i").is(EC.eid.prefix("e1"))
query
}
def findItems(q: Query, batch: Batch): Stream[ConnectionIO, ListItem] = {
val IC = RItem.Columns
val PC = RPerson.Columns
val OC = ROrganization.Columns
val EC = REquipment.Columns
val query = findItemsBase(q, true, Seq.empty)
// inclusive tags are AND-ed
val tagSelectsIncl = q.tagsInclude
@ -326,7 +355,15 @@ object QItem {
)
.getOrElse(Fragment.empty),
q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty),
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty)
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty),
q.itemIds
.map(ids =>
NonEmptyList
.fromList(ids.toList)
.map(nel => IC.id.prefix("i").isIn(nel))
.getOrElse(IC.id.prefix("i").is(""))
)
.getOrElse(Fragment.empty)
)
val order = q.orderAsc match {
@ -347,14 +384,39 @@ object QItem {
frag.query[ListItem].stream
}
case class SelectedItem(itemId: Ident, weight: Double)
def findSelectedItems(
q: Query,
items: Set[SelectedItem]
): Stream[ConnectionIO, ListItem] =
if (items.isEmpty) Stream.empty
else {
val IC = RItem.Columns
val values = items
.map(it => fr"(${it.itemId}, ${it.weight})")
.reduce((r, e) => r ++ fr"," ++ e)
val from = findItemsBase(
q,
true,
Seq(fr"tids.weight"),
("tids(item_id, weight)", fr"(VALUES" ++ values ++ fr")")
) ++
fr"INNER JOIN tids ON" ++ IC.id.prefix("i").f ++ fr" = tids.item_id" ++
fr"ORDER BY tids.weight DESC"
logger.trace(s"fts query: $from")
from.query[ListItem].stream
}
case class ListItemWithTags(item: ListItem, tags: List[RTag])
/** Same as `findItems` but resolves the tags for each item. Note that
* this is implemented by running an additional query per item.
*/
def findItemsWithTags(
q: Query,
batch: Batch
collective: Ident,
search: Stream[ConnectionIO, ListItem]
): Stream[ConnectionIO, ListItemWithTags] = {
def findTag(
cache: Ref[ConnectionIO, Map[Ident, RTag]],
@ -377,19 +439,20 @@ object QItem {
for {
resolvedTags <- Stream.eval(Ref.of[ConnectionIO, Map[Ident, RTag]](Map.empty))
item <- findItems(q, batch)
item <- search
tagItems <- Stream.eval(RTagItem.findByItem(item.id))
tags <- Stream.eval(tagItems.traverse(ti => findTag(resolvedTags, ti)))
ftags = tags.flatten.filter(t => t.collective == q.collective)
ftags = tags.flatten.filter(t => t.collective == collective)
} yield ListItemWithTags(item, ftags.toList.sortBy(_.name))
}
def delete[F[_]: Sync](store: Store[F])(itemId: Ident, collective: Ident): F[Int] =
for {
tn <- store.transact(RTagItem.deleteItemTags(itemId))
rn <- QAttachment.deleteItemAttachments(store)(itemId, collective)
tn <- store.transact(RTagItem.deleteItemTags(itemId))
mn <- store.transact(RSentMail.deleteByItem(itemId))
n <- store.transact(RItem.deleteByIdAndCollective(itemId, collective))
} yield tn + rn + n
} yield tn + rn + n + mn
def findByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Vector[RItem]] = {
val IC = RItem.Columns
@ -455,4 +518,25 @@ object QItem {
prefix(suffix(value))
}
final case class NameAndNotes(
id: Ident,
collective: Ident,
name: String,
notes: Option[String]
)
def allNameAndNotes(
coll: Option[Ident],
chunkSize: Int
): Stream[ConnectionIO, NameAndNotes] = {
val iId = RItem.Columns.id
val iColl = RItem.Columns.cid
val iName = RItem.Columns.name
val iNotes = RItem.Columns.notes
val cols = Seq(iId, iColl, iName, iNotes)
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
selectSimple(cols, RItem.table, where)
.query[NameAndNotes]
.streamWithChunkSize(chunkSize)
}
}

View File

@ -51,15 +51,4 @@ object QPeriodicTask {
selectSimple(RPeriodicTask.Columns.all, RPeriodicTask.table, where) ++ order
sql.query[RPeriodicTask].streamWithChunkSize(2).take(1).compile.last
}
def findNonFinal(pid: Ident): ConnectionIO[Option[RJob]] =
selectSimple(
RJob.Columns.all,
RJob.table,
and(
RJob.Columns.tracker.is(pid),
RJob.Columns.state.isOneOf(JobState.all.diff(JobState.done).toSeq)
)
).query[RJob].option
}

View File

@ -11,8 +11,19 @@ import org.log4s._
trait JobQueue[F[_]] {
/** Inserts the job into the queue to get picked up as soon as
* possible. The job must have a new unique id.
*/
def insert(job: RJob): F[Unit]
/** Inserts the job into the queue only, if there is no job with the
* same tracker-id running at the moment. The job id must be a new
* unique id.
*
* If the job has no tracker defined, it is simply inserted.
*/
def insertIfNew(job: RJob): F[Unit]
def insertAll(jobs: Seq[RJob]): F[Unit]
def nextJob(
@ -46,6 +57,19 @@ object JobQueue {
else ().pure[F]
}
def insertIfNew(job: RJob): F[Unit] =
for {
rj <- job.tracker match {
case Some(tid) =>
store.transact(RJob.findNonFinalByTracker(tid))
case None =>
None.pure[F]
}
ret <-
if (rj.isDefined) ().pure[F]
else insert(job)
} yield ret
def insertAll(jobs: Seq[RJob]): F[Unit] =
jobs.toList
.traverse(j => insert(j).attempt)

View File

@ -100,7 +100,7 @@ object PeriodicTaskStore {
}
def findNonFinalJob(pjobId: Ident): F[Option[RJob]] =
store.transact(QPeriodicTask.findNonFinal(pjobId))
store.transact(RJob.findNonFinalByTracker(pjobId))
def insert(task: RPeriodicTask): F[Unit] = {
val update = store.transact(RPeriodicTask.update(task))

View File

@ -183,4 +183,6 @@ object RAttachment {
n2 <- deleteFrom(table, id.is(attachId)).update.run
} yield n0 + n1 + n2
def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] =
selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option
}

View File

@ -0,0 +1,59 @@
package docspell.store.records
import cats.implicits._
import cats.effect._
import doobie._
import doobie.implicits._
import docspell.common._
import docspell.store.impl._
import docspell.store.impl.Implicits._
final case class RFtsMigration(
id: Ident,
version: Int,
ftsEngine: Ident,
description: String,
created: Timestamp
)
object RFtsMigration {
def create[F[_]: Sync](
version: Int,
ftsEngine: Ident,
description: String
): F[RFtsMigration] =
for {
newId <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RFtsMigration(newId, version, ftsEngine, description, now)
val table = fr"fts_migration"
object Columns {
val id = Column("id")
val version = Column("version")
val ftsEngine = Column("fts_engine")
val description = Column("description")
val created = Column("created")
val all = List(id, version, ftsEngine, description, created)
}
import Columns._
def insert(v: RFtsMigration): ConnectionIO[Int] =
insertRow(
table,
all,
fr"${v.id},${v.version},${v.ftsEngine},${v.description},${v.created}"
).updateWithLogHandler(LogHandler.nop).run
def exists(vers: Int, engine: Ident): ConnectionIO[Boolean] =
selectCount(id, table, and(version.is(vers), ftsEngine.is(engine)))
.query[Int]
.unique
.map(_ > 0)
def deleteById(rId: Ident): ConnectionIO[Int] =
deleteFrom(table, id.is(rId)).update.run
}

View File

@ -255,4 +255,12 @@ object RJob {
.map(_ => 1)
.compile
.foldMonoid
def findNonFinalByTracker(trackerId: Ident): ConnectionIO[Option[RJob]] =
selectSimple(
all,
table,
and(tracker.is(trackerId), state.isOneOf(JobState.all.diff(JobState.done).toSeq))
).query[RJob].option
}

View File

@ -3,6 +3,7 @@ package docspell.store.records
import fs2.Stream
import cats.effect._
import cats.implicits._
import cats.data.NonEmptyList
import doobie._
import doobie.implicits._
import docspell.common._
@ -115,4 +116,15 @@ object RSentMail {
def delete(mailId: Ident): ConnectionIO[Int] =
deleteFrom(table, id.is(mailId)).update.run
def deleteByItem(item: Ident): ConnectionIO[Int] =
for {
list <- RSentMailItem.findSentMailIdsByItem(item)
n1 <- RSentMailItem.deleteAllByItem(item)
n0 <- NonEmptyList.fromList(list.toList) match {
case Some(nel) => deleteFrom(table, id.isIn(nel)).update.run
case None => 0.pure[ConnectionIO]
}
} yield n0 + n1
}

View File

@ -54,4 +54,10 @@ object RSentMailItem {
def deleteMail(mailId: Ident): ConnectionIO[Int] =
deleteFrom(table, sentMailId.is(mailId)).update.run
def findSentMailIdsByItem(item: Ident): ConnectionIO[Set[Ident]] =
selectSimple(Seq(sentMailId), table, itemId.is(item)).query[Ident].to[Set]
def deleteAllByItem(item: Ident): ConnectionIO[Int] =
deleteFrom(table, itemId.is(item)).update.run
}

View File

@ -45,6 +45,7 @@ module Api exposing
, getTags
, getUsers
, itemDetail
, itemIndexSearch
, itemSearch
, login
, loginSession
@ -77,6 +78,7 @@ module Api exposing
, setUnconfirmed
, startOnceNotifyDueItems
, startOnceScanMailbox
, startReIndex
, submitNotifyDueItems
, updateNotifyDueItems
, updateScanMailbox
@ -104,6 +106,7 @@ import Api.Model.ImapSettings exposing (ImapSettings)
import Api.Model.ImapSettingsList exposing (ImapSettingsList)
import Api.Model.InviteResult exposing (InviteResult)
import Api.Model.ItemDetail exposing (ItemDetail)
import Api.Model.ItemFtsSearch exposing (ItemFtsSearch)
import Api.Model.ItemInsights exposing (ItemInsights)
import Api.Model.ItemLightList exposing (ItemLightList)
import Api.Model.ItemProposals exposing (ItemProposals)
@ -147,6 +150,20 @@ import Util.Http as Http2
--- Full-Text
startReIndex : Flags -> (Result Http.Error BasicResult -> msg) -> Cmd msg
startReIndex flags receive =
Http2.authPost
{ url = flags.config.baseUrl ++ "/api/v1/sec/fts/reIndex"
, account = getAccount flags
, body = Http.emptyBody
, expect = Http.expectJson receive Api.Model.BasicResult.decoder
}
--- Scan Mailboxes
@ -1092,6 +1109,20 @@ moveAttachmentBefore flags itemId data receive =
}
itemIndexSearch :
Flags
-> ItemFtsSearch
-> (Result Http.Error ItemLightList -> msg)
-> Cmd msg
itemIndexSearch flags query receive =
Http2.authPost
{ url = flags.config.baseUrl ++ "/api/v1/sec/item/searchIndex"
, account = getAccount flags
, body = Http.jsonBody (Api.Model.ItemFtsSearch.encode query)
, expect = Http.expectJson receive Api.Model.ItemLightList.decoder
}
itemSearch : Flags -> ItemSearch -> (Result Http.Error ItemLightList -> msg) -> Cmd msg
itemSearch flags search receive =
Http2.authPost

View File

@ -170,7 +170,7 @@ viewLogin model =
viewHome : Model -> Html Msg
viewHome model =
Html.map HomeMsg (Page.Home.View.view model.uiSettings model.homeModel)
Html.map HomeMsg (Page.Home.View.view model.flags model.uiSettings model.homeModel)
menuEntry : Model -> Page -> List (Html Msg) -> Html Msg

View File

@ -7,6 +7,8 @@ module Comp.CollectiveSettingsForm exposing
, view
)
import Api
import Api.Model.BasicResult exposing (BasicResult)
import Api.Model.CollectiveSettings exposing (CollectiveSettings)
import Comp.Dropdown
import Data.Flags exposing (Flags)
@ -14,13 +16,17 @@ import Data.Language exposing (Language)
import Data.UiSettings exposing (UiSettings)
import Html exposing (..)
import Html.Attributes exposing (..)
import Html.Events exposing (onCheck)
import Html.Events exposing (onCheck, onClick, onInput)
import Http
import Util.Http
type alias Model =
{ langModel : Comp.Dropdown.Model Language
, intEnabled : Bool
, initSettings : CollectiveSettings
, fullTextConfirmText : String
, fullTextReIndexResult : Maybe BasicResult
}
@ -44,6 +50,8 @@ init settings =
}
, intEnabled = settings.integrationEnabled
, initSettings = settings
, fullTextConfirmText = ""
, fullTextReIndexResult = Nothing
}
@ -61,10 +69,13 @@ getSettings model =
type Msg
= LangDropdownMsg (Comp.Dropdown.Msg Language)
| ToggleIntegrationEndpoint
| SetFullTextConfirm String
| TriggerReIndex
| TriggerReIndexResult (Result Http.Error BasicResult)
update : Flags -> Msg -> Model -> ( Model, Cmd Msg, Maybe CollectiveSettings )
update _ msg model =
update flags msg model =
case msg of
LangDropdownMsg m ->
let
@ -90,17 +101,70 @@ update _ msg model =
in
( nextModel, Cmd.none, Just (getSettings nextModel) )
SetFullTextConfirm str ->
( { model | fullTextConfirmText = str }, Cmd.none, Nothing )
TriggerReIndex ->
case String.toLower model.fullTextConfirmText of
"ok" ->
( { model | fullTextReIndexResult = Nothing }
, Api.startReIndex flags TriggerReIndexResult
, Nothing
)
_ ->
( { model
| fullTextReIndexResult =
Just
(BasicResult False <|
"Please type OK in the field if you really "
++ "want to start re-indexing your data."
)
}
, Cmd.none
, Nothing
)
TriggerReIndexResult (Ok br) ->
( { model | fullTextReIndexResult = Just br }, Cmd.none, Nothing )
TriggerReIndexResult (Err err) ->
( { model
| fullTextReIndexResult =
Just (BasicResult False (Util.Http.errorToString err))
}
, Cmd.none
, Nothing
)
view : Flags -> UiSettings -> Model -> Html Msg
view flags settings model =
div [ class "ui form" ]
[ div [ class "field" ]
div
[ classList
[ ( "ui form", True )
, ( "error", Maybe.map .success model.fullTextReIndexResult == Just False )
, ( "success", Maybe.map .success model.fullTextReIndexResult == Just True )
]
]
[ h3 [ class "ui dividing header" ]
[ text "Document Language"
]
, div [ class "field" ]
[ label [] [ text "Document Language" ]
, Html.map LangDropdownMsg (Comp.Dropdown.view settings model.langModel)
, span [ class "small-info" ]
[ text "The language of your documents. This helps text recognition (OCR) and text analysis."
]
]
, h3
[ classList
[ ( "ui dividing header", True )
, ( "invisible hidden", not flags.config.integrationEnabled )
]
]
[ text "Integration Endpoint"
]
, div
[ classList
[ ( "field", True )
@ -121,4 +185,50 @@ view flags settings model =
]
]
]
, h3
[ classList
[ ( "ui dividing header", True )
, ( "invisible hidden", not flags.config.fullTextSearchEnabled )
]
]
[ text "Full-Text Search"
]
, div
[ classList
[ ( "inline field", True )
, ( "invisible hidden", not flags.config.fullTextSearchEnabled )
]
]
[ div [ class "ui action input" ]
[ input
[ type_ "text"
, value model.fullTextConfirmText
, onInput SetFullTextConfirm
]
[]
, button
[ class "ui primary right labeled icon button"
, onClick TriggerReIndex
]
[ i [ class "refresh icon" ] []
, text "Re-Index All Data"
]
]
, div [ class "small-info" ]
[ text "This starts a task that clears the full-text index and re-indexes all your data again."
, text "You must type OK before clicking the button to avoid accidental re-indexing."
]
, div
[ classList
[ ( "ui message", True )
, ( "error", Maybe.map .success model.fullTextReIndexResult == Just False )
, ( "success", Maybe.map .success model.fullTextReIndexResult == Just True )
, ( "hidden invisible", model.fullTextReIndexResult == Nothing )
]
]
[ Maybe.map .message model.fullTextReIndexResult
|> Maybe.withDefault ""
|> text
]
]
]

View File

@ -9,6 +9,7 @@ module Comp.FixedDropdown exposing
, update
, view
, viewString
, viewStyled
)
import Html exposing (..)
@ -69,11 +70,12 @@ update msg model =
( model, Just item.id )
view : Maybe (Item a) -> Model a -> Html (Msg a)
view selected model =
viewStyled : String -> Maybe (Item a) -> Model a -> Html (Msg a)
viewStyled classes selected model =
div
[ classList
[ ( "ui selection dropdown", True )
, ( classes, True )
, ( "open", model.menuOpen )
]
, onClick ToggleMenu
@ -102,6 +104,11 @@ view selected model =
]
view : Maybe (Item a) -> Model a -> Html (Msg a)
view selected model =
viewStyled "" selected model
viewString : Maybe String -> Model String -> Html (Msg String)
viewString selected model =
view (Maybe.map (\s -> Item s s) selected) model

View File

@ -8,6 +8,7 @@ module Comp.ItemCardList exposing
, view
)
import Api.Model.HighlightEntry exposing (HighlightEntry)
import Api.Model.ItemLight exposing (ItemLight)
import Api.Model.ItemLightGroup exposing (ItemLightGroup)
import Api.Model.ItemLightList exposing (ItemLightList)
@ -19,6 +20,7 @@ import Data.UiSettings exposing (UiSettings)
import Html exposing (..)
import Html.Attributes exposing (..)
import Html.Events exposing (onClick)
import Markdown
import Ports
import Util.List
import Util.String
@ -74,21 +76,10 @@ update _ msg model =
else
let
firstNew =
Data.Items.first list
scrollCmd =
case firstNew of
Just item ->
Ports.scrollToElem item.id
Nothing ->
Cmd.none
newModel =
{ model | results = Data.Items.concat model.results list }
in
( newModel, scrollCmd, Nothing )
( newModel, Cmd.none, Nothing )
SelectItem item ->
( model, Cmd.none, Just item )
@ -247,4 +238,41 @@ viewItem settings item =
]
]
]
, div
[ classList
[ ( "content search-highlight", True )
, ( "invisible hidden", item.highlighting == [] )
]
]
[ div [ class "ui list" ]
(List.map renderHighlightEntry item.highlighting)
]
]
renderHighlightEntry : HighlightEntry -> Html Msg
renderHighlightEntry entry =
let
stripWhitespace str =
String.trim str
|> String.replace "```" ""
|> String.replace "\t" " "
|> String.replace "\n\n" "\n"
|> String.lines
|> List.map String.trim
|> String.join "\n"
in
div [ class "item" ]
[ div [ class "content" ]
(div [ class "header" ]
[ i [ class "caret right icon" ] []
, text (entry.name ++ ":")
]
:: List.map
(\str ->
Markdown.toHtml [ class "description" ] <|
(stripWhitespace str ++ "")
)
entry.lines
)
]

View File

@ -25,8 +25,9 @@ import Data.UiSettings exposing (UiSettings)
import DatePicker exposing (DatePicker)
import Html exposing (..)
import Html.Attributes exposing (..)
import Html.Events exposing (onCheck, onInput)
import Html.Events exposing (onCheck, onClick, onInput)
import Http
import Util.Html exposing (KeyCode(..))
import Util.Maybe
import Util.Tag
import Util.Update
@ -55,7 +56,9 @@ type alias Model =
, untilDueDate : Maybe Int
, nameModel : Maybe String
, allNameModel : Maybe String
, fulltextModel : Maybe String
, datePickerInitialized : Bool
, showNameHelp : Bool
}
@ -111,7 +114,9 @@ init =
, untilDueDate = Nothing
, nameModel = Nothing
, allNameModel = Nothing
, fulltextModel = Nothing
, datePickerInitialized = False
, showNameHelp = False
}
@ -135,7 +140,10 @@ type Msg
| GetPersonResp (Result Http.Error ReferenceList)
| SetName String
| SetAllName String
| SetFulltext String
| ResetForm
| KeyUpMsg (Maybe KeyCode)
| ToggleNameHelp
getDirection : Model -> Maybe Direction
@ -188,6 +196,7 @@ getItemSearch model =
, allNames =
model.allNameModel
|> Maybe.map amendWildcards
, fullText = model.fulltextModel
}
@ -471,7 +480,7 @@ update flags settings msg model =
( { model | nameModel = next }
, Cmd.none
)
(model.nameModel /= next)
False
SetAllName str ->
let
@ -482,15 +491,35 @@ update flags settings msg model =
( { model | allNameModel = next }
, Cmd.none
)
(model.allNameModel /= next)
False
SetFulltext str ->
let
next =
Util.Maybe.fromString str
in
NextState
( { model | fulltextModel = next }
, Cmd.none
)
False
KeyUpMsg (Just Enter) ->
NextState ( model, Cmd.none ) True
KeyUpMsg _ ->
NextState ( model, Cmd.none ) False
ToggleNameHelp ->
NextState ( { model | showNameHelp = not model.showNameHelp }, Cmd.none ) False
-- View
view : UiSettings -> Model -> Html Msg
view settings model =
view : Flags -> UiSettings -> Model -> Html Msg
view flags settings model =
let
formHeader icon headline =
div [ class "ui small dividing header" ]
@ -500,6 +529,21 @@ view settings model =
]
]
formHeaderHelp icon headline tagger =
div [ class "ui small dividing header" ]
[ a
[ class "right-float"
, href "#"
, onClick tagger
]
[ i [ class "small grey help link icon" ] []
]
, icon
, div [ class "content" ]
[ text headline
]
]
nameIcon =
i [ class "left align icon" ] []
in
@ -517,17 +561,54 @@ view settings model =
]
]
]
, formHeader nameIcon "Names"
, div
[ classList
[ ( "field", True )
, ( "invisible hidden", not flags.config.fullTextSearchEnabled )
]
]
[ label [] [ text "Content Search" ]
, input
[ type_ "text"
, onInput SetFulltext
, Util.Html.onKeyUpCode KeyUpMsg
, model.fulltextModel |> Maybe.withDefault "" |> value
]
[]
, span [ class "small-info" ]
[ text "Fulltext search in document contents."
]
]
, formHeaderHelp nameIcon "Names" ToggleNameHelp
, span
[ classList
[ ( "small-info", True )
, ( "invisible hidden", not model.showNameHelp )
]
]
[ text "Use wildcards "
, code [] [ text "*" ]
, text " at beginning or end. Added automatically if not "
, text "present and not quoted. Press "
, em [] [ text "Enter" ]
, text " to start searching."
]
, div [ class "field" ]
[ label [] [ text "All Names" ]
, input
[ type_ "text"
, onInput SetAllName
, Util.Html.onKeyUpCode KeyUpMsg
, model.allNameModel |> Maybe.withDefault "" |> value
]
[]
, span [ class "small-info" ]
[ text "Looks in correspondents, concerned, item name and notes."
, span
[ classList
[ ( "small-info", True )
, ( "invisible hidden", not model.showNameHelp )
]
]
[ text "Looks in correspondents, concerned entities, item name and notes."
]
]
, div [ class "field" ]
@ -535,18 +616,18 @@ view settings model =
, input
[ type_ "text"
, onInput SetName
, Util.Html.onKeyUpCode KeyUpMsg
, model.nameModel |> Maybe.withDefault "" |> value
]
[]
, span [ class "small-info" ]
[ text "Looks in item name."
, span
[ classList
[ ( "small-info", True )
, ( "invisible hidden", not model.showNameHelp )
]
]
[ text "Looks in item name only."
]
]
, span [ class "small-info" ]
[ text "Use wildcards "
, code [] [ text "*" ]
, text " at beginning or end. Added automatically if not "
, text "present and not quoted."
]
, formHeader (Icons.tagsIcon "") "Tags"
, div [ class "field" ]

View File

@ -15,6 +15,7 @@ type alias Config =
, signupMode : String
, docspellAssetPath : String
, integrationEnabled : Bool
, fullTextSearchEnabled : Bool
}

View File

@ -1,15 +1,19 @@
module Page.Home.Data exposing
( Model
, Msg(..)
, SearchType(..)
, ViewMode(..)
, defaultSearchType
, doSearchCmd
, init
, itemNav
, resultsBelowLimit
, searchTypeString
)
import Api
import Api.Model.ItemLightList exposing (ItemLightList)
import Comp.FixedDropdown
import Comp.ItemCardList
import Comp.SearchMenu
import Data.Flags exposing (Flags)
@ -17,6 +21,7 @@ import Data.Items
import Data.UiSettings exposing (UiSettings)
import Http
import Throttle exposing (Throttle)
import Util.Html exposing (KeyCode(..))
type alias Model =
@ -29,11 +34,22 @@ type alias Model =
, moreAvailable : Bool
, moreInProgress : Bool
, throttle : Throttle Msg
, searchTypeDropdown : Comp.FixedDropdown.Model SearchType
, searchType : SearchType
, contentOnlySearch : Maybe String
}
init : Flags -> Model
init _ =
init flags =
let
searchTypeOptions =
if flags.config.fullTextSearchEnabled then
[ BasicSearch, ContentSearch, ContentOnlySearch ]
else
[ BasicSearch ]
in
{ searchMenuModel = Comp.SearchMenu.init
, itemListModel = Comp.ItemCardList.init
, searchInProgress = False
@ -43,9 +59,23 @@ init _ =
, moreAvailable = True
, moreInProgress = False
, throttle = Throttle.create 1
, searchTypeDropdown =
Comp.FixedDropdown.initMap searchTypeString
searchTypeOptions
, searchType = defaultSearchType flags
, contentOnlySearch = Nothing
}
defaultSearchType : Flags -> SearchType
defaultSearchType flags =
if flags.config.fullTextSearchEnabled then
ContentSearch
else
BasicSearch
type Msg
= Init
| SearchMenuMsg Comp.SearchMenu.Msg
@ -58,6 +88,28 @@ type Msg
| LoadMore
| UpdateThrottle
| SetBasicSearch String
| SearchTypeMsg (Comp.FixedDropdown.Msg SearchType)
| KeyUpMsg (Maybe KeyCode)
| SetContentOnly String
type SearchType
= BasicSearch
| ContentSearch
| ContentOnlySearch
searchTypeString : SearchType -> String
searchTypeString st =
case st of
BasicSearch ->
"All Names"
ContentSearch ->
"Contents"
ContentOnlySearch ->
"Contents Only"
type ViewMode
@ -81,6 +133,19 @@ itemNav id model =
doSearchCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg
doSearchCmd flags settings offset model =
case model.searchType of
BasicSearch ->
doSearchDefaultCmd flags settings offset model
ContentSearch ->
doSearchDefaultCmd flags settings offset model
ContentOnlySearch ->
doSearchIndexCmd flags settings offset model
doSearchDefaultCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg
doSearchDefaultCmd flags settings offset model =
let
smask =
Comp.SearchMenu.getItemSearch model.searchMenuModel
@ -98,6 +163,27 @@ doSearchCmd flags settings offset model =
Api.itemSearch flags mask ItemSearchAddResp
doSearchIndexCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg
doSearchIndexCmd flags settings offset model =
case model.contentOnlySearch of
Just q ->
let
mask =
{ query = q
, limit = settings.itemSearchPageSize
, offset = offset
}
in
if offset == 0 then
Api.itemIndexSearch flags mask ItemSearchResp
else
Api.itemIndexSearch flags mask ItemSearchAddResp
Nothing ->
Cmd.none
resultsBelowLimit : UiSettings -> Model -> Bool
resultsBelowLimit settings model =
let

View File

@ -1,6 +1,7 @@
module Page.Home.Update exposing (update)
import Browser.Navigation as Nav
import Comp.FixedDropdown
import Comp.ItemCardList
import Comp.SearchMenu
import Data.Flags exposing (Flags)
@ -9,6 +10,8 @@ import Page exposing (Page(..))
import Page.Home.Data exposing (..)
import Throttle
import Time
import Util.Html exposing (KeyCode(..))
import Util.Maybe
import Util.Update
@ -25,7 +28,10 @@ update key flags settings msg model =
ResetSearch ->
let
nm =
{ model | searchOffset = 0 }
{ model
| searchOffset = 0
, searchType = defaultSearchType flags
}
in
update key flags settings (SearchMenuMsg Comp.SearchMenu.ResetForm) nm
@ -150,10 +156,43 @@ update key flags settings msg model =
SetBasicSearch str ->
let
m =
SearchMenuMsg (Comp.SearchMenu.SetAllName str)
smMsg =
case model.searchType of
BasicSearch ->
SearchMenuMsg (Comp.SearchMenu.SetAllName str)
ContentSearch ->
SearchMenuMsg (Comp.SearchMenu.SetFulltext str)
ContentOnlySearch ->
SetContentOnly str
in
update key flags settings m model
update key flags settings smMsg model
SetContentOnly str ->
withSub
( { model | contentOnlySearch = Util.Maybe.fromString str }
, Cmd.none
)
SearchTypeMsg lm ->
let
( sm, mv ) =
Comp.FixedDropdown.update lm model.searchTypeDropdown
in
withSub
( { model
| searchTypeDropdown = sm
, searchType = Maybe.withDefault model.searchType mv
}
, Cmd.none
)
KeyUpMsg (Just Enter) ->
update key flags settings DoSearch model
KeyUpMsg _ ->
withSub ( model, Cmd.none )
@ -196,6 +235,6 @@ withSub ( m, c ) =
( m
, c
, Throttle.ifNeeded
(Time.every 150 (\_ -> UpdateThrottle))
(Time.every 500 (\_ -> UpdateThrottle))
m.throttle
)

View File

@ -1,18 +1,21 @@
module Page.Home.View exposing (view)
import Api.Model.ItemSearch
import Comp.FixedDropdown
import Comp.ItemCardList
import Comp.SearchMenu
import Data.Flags exposing (Flags)
import Data.UiSettings exposing (UiSettings)
import Html exposing (..)
import Html.Attributes exposing (..)
import Html.Events exposing (onClick, onInput)
import Page exposing (Page(..))
import Page.Home.Data exposing (..)
import Util.Html
view : UiSettings -> Model -> Html Msg
view settings model =
view : Flags -> UiSettings -> Model -> Html Msg
view flags settings model =
div [ class "home-page ui padded grid" ]
[ div
[ classList
@ -23,20 +26,19 @@ view settings model =
]
]
[ div
[ class "ui top attached ablue-comp menu"
[ class "ui top attached ablue-comp icon menu"
]
[ a
[ class "item"
[ class "borderless item"
, href "#"
, onClick ToggleSearchMenu
, title "Hide menu"
]
[ i [ class "ui angle down icon" ] []
, text "Search"
]
, div [ class "right floated menu" ]
[ a
[ class "icon item"
[ class "borderless item"
, onClick ResetSearch
, title "Reset form"
, href "#"
@ -44,7 +46,7 @@ view settings model =
[ i [ class "undo icon" ] []
]
, a
[ class "icon item"
[ class "borderless item"
, onClick DoSearch
, title "Run search query"
, href ""
@ -61,7 +63,7 @@ view settings model =
]
]
, div [ class "ui attached fluid segment" ]
[ Html.map SearchMenuMsg (Comp.SearchMenu.view settings model.searchMenuModel)
[ Html.map SearchMenuMsg (Comp.SearchMenu.view flags settings model.searchMenuModel)
]
]
, div
@ -73,49 +75,7 @@ view settings model =
, ( "item-card-list", True )
]
]
[ div
[ classList
[ ( "invisible hidden", not model.menuCollapsed )
, ( "ui menu container", True )
]
]
[ a
[ class "item"
, onClick ToggleSearchMenu
, href "#"
, title "Open search menu"
]
[ i [ class "angle left icon" ] []
, i [ class "icons" ]
[ i [ class "grey bars icon" ] []
, i [ class "bottom left corner search icon" ] []
, if hasMoreSearch model then
i [ class "top right blue corner circle icon" ] []
else
span [ class "hidden invisible" ] []
]
]
, div [ class "ui category search item" ]
[ div [ class "ui transparent icon input" ]
[ input
[ type_ "text"
, placeholder "Basic search"
, onInput SetBasicSearch
, Maybe.map value model.searchMenuModel.allNameModel
|> Maybe.withDefault (value "")
]
[]
, i
[ classList
[ ( "search link icon", not model.searchInProgress )
, ( "loading spinner icon", model.searchInProgress )
]
]
[]
]
]
]
[ viewSearchBar flags model
, case model.viewMode of
Listing ->
Html.map ItemCardListMsg
@ -157,6 +117,92 @@ view settings model =
]
viewSearchBar : Flags -> Model -> Html Msg
viewSearchBar flags model =
let
searchTypeItem =
Comp.FixedDropdown.Item
model.searchType
(searchTypeString model.searchType)
searchInput =
case model.searchType of
BasicSearch ->
model.searchMenuModel.allNameModel
ContentSearch ->
model.searchMenuModel.fulltextModel
ContentOnlySearch ->
model.contentOnlySearch
searchTypeClass =
if flags.config.fullTextSearchEnabled then
"compact"
else
"hidden invisible"
in
div
[ classList
[ ( "invisible hidden", not model.menuCollapsed )
, ( "ui secondary stackable menu container", True )
]
]
[ a
[ class "item"
, onClick ToggleSearchMenu
, href "#"
, if model.searchType == ContentOnlySearch then
title "Search menu disabled"
else
title "Open search menu"
]
[ i [ class "angle left icon" ] []
, i [ class "icons" ]
[ i [ class "grey bars icon" ] []
, i [ class "bottom left corner search icon" ] []
, if model.searchType == ContentOnlySearch then
i [ class "top right red corner delete icon" ] []
else if hasMoreSearch model then
i [ class "top right blue corner circle icon" ] []
else
span [ class "hidden invisible" ] []
]
]
, div [ class "item" ]
[ div [ class "ui left icon right action input" ]
[ i
[ classList
[ ( "search link icon", not model.searchInProgress )
, ( "loading spinner icon", model.searchInProgress )
]
, href "#"
, onClick DoSearch
]
[]
, input
[ type_ "text"
, placeholder "Quick Search "
, onInput SetBasicSearch
, Util.Html.onKeyUpCode KeyUpMsg
, Maybe.map value searchInput
|> Maybe.withDefault (value "")
]
[]
, Html.map SearchTypeMsg
(Comp.FixedDropdown.viewStyled searchTypeClass
(Just searchTypeItem)
model.searchTypeDropdown
)
]
]
]
hasMoreSearch : Model -> Bool
hasMoreSearch model =
let
@ -164,6 +210,14 @@ hasMoreSearch model =
Comp.SearchMenu.getItemSearch model.searchMenuModel
is_ =
{ is | allNames = Nothing }
case model.searchType of
BasicSearch ->
{ is | allNames = Nothing }
ContentSearch ->
{ is | fullText = Nothing }
ContentOnlySearch ->
Api.Model.ItemSearch.empty
in
is_ /= Api.Model.ItemSearch.empty

View File

@ -9,6 +9,7 @@ module Util.Html exposing
, onDragOver
, onDropFiles
, onKeyUp
, onKeyUpCode
)
import File exposing (File)
@ -76,6 +77,11 @@ onKeyUp tagger =
on "keyup" (D.map tagger keyCode)
onKeyUpCode : (Maybe KeyCode -> msg) -> Attribute msg
onKeyUpCode tagger =
onKeyUp (intToKeyCode >> tagger)
onClickk : msg -> Attribute msg
onClickk msg =
Html.Events.preventDefaultOn "click" (D.map alwaysPreventDefault (D.succeed msg))

View File

@ -88,6 +88,22 @@
right: -8px;
}
.default-layout .ui.cards .ui.card .content.search-highlight {
background: rgba(246, 255, 158, 0.1);
font-size: smaller;
max-height: 25em;
overflow: auto;
}
.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .header {
}
.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .description {
color: rgba(0,0,0,.6);
margin-left: 0.75em;
}
.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .description strong > em {
background: rgba(220, 255, 71, 0.6);
}
.markdown-preview {
overflow: auto;
max-height: 300px;

View File

@ -32,20 +32,20 @@ elmApp.ports.setAllProgress.subscribe(function(input) {
}, 100);
});
elmApp.ports.scrollToElem.subscribe(function(id) {
if (id && id != "") {
window.setTimeout(function() {
var el = document.getElementById(id);
if (el) {
if (el["scrollIntoViewIfNeeded"]) {
el.scrollIntoViewIfNeeded();
} else {
el.scrollIntoView();
}
}
}, 20);
}
});
// elmApp.ports.scrollToElem.subscribe(function(id) {
// if (id && id != "") {
// window.setTimeout(function() {
// var el = document.getElementById(id);
// if (el) {
// if (el["scrollIntoViewIfNeeded"]) {
// el.scrollIntoViewIfNeeded();
// } else {
// el.scrollIntoView();
// }
// }
// }, 20);
// }
// });
elmApp.ports.saveUiSettings.subscribe(function(args) {
if (Array.isArray(args) && args.length == 2) {

View File

@ -13,7 +13,7 @@ let
app-name = "Docspell";
app-id = "rest1";
base-url = "http://localhost:7880";
max-item-page-size = 500;
max-item-page-size = 200;
bind = {
address = "localhost";
port = 7880;

View File

@ -159,16 +159,22 @@ object Dependencies {
"co.fs2" %% "fs2-io" % Fs2Version
)
val http4s = Seq(
"org.http4s" %% "http4s-blaze-server" % Http4sVersion,
"org.http4s" %% "http4s-circe" % Http4sVersion,
"org.http4s" %% "http4s-dsl" % Http4sVersion
)
val http4sClient = Seq(
"org.http4s" %% "http4s-blaze-client" % Http4sVersion
)
val http4sCirce = Seq(
"org.http4s" %% "http4s-circe" % Http4sVersion
)
val http4sDsl = Seq(
"org.http4s" %% "http4s-dsl" % Http4sVersion
)
val http4sServer = Seq(
"org.http4s" %% "http4s-blaze-server" % Http4sVersion
)
val circe = Seq(
"io.circe" %% "circe-generic" % CirceVersion,
"io.circe" %% "circe-parser" % CirceVersion