diff --git a/build.sbt b/build.sbt index 7e97df57..051a767b 100644 --- a/build.sbt +++ b/build.sbt @@ -259,6 +259,28 @@ val analysis = project.in(file("modules/analysis")). Dependencies.fs2 ++ Dependencies.stanfordNlpCore ).dependsOn(common, files % "test->test") + +val ftsclient = project.in(file("modules/fts-client")). + disablePlugins(RevolverPlugin). + settings(sharedSettings). + settings(testSettings). + settings( + name := "docspell-fts-client", + libraryDependencies ++= Seq.empty + ).dependsOn(common) + +val ftssolr = project.in(file("modules/fts-solr")). + disablePlugins(RevolverPlugin). + settings(sharedSettings). + settings(testSettings). + settings( + name := "docspell-fts-solr", + libraryDependencies ++= + Dependencies.http4sClient ++ + Dependencies.http4sCirce ++ + Dependencies.http4sDsl ++ + Dependencies.circe + ).dependsOn(common, ftsclient) val restapi = project.in(file("modules/restapi")). disablePlugins(RevolverPlugin). @@ -303,7 +325,7 @@ val backend = project.in(file("modules/backend")). Dependencies.bcrypt ++ Dependencies.http4sClient ++ Dependencies.emil - ).dependsOn(store, joexapi) + ).dependsOn(store, joexapi, ftsclient) val webapp = project.in(file("modules/webapp")). disablePlugins(RevolverPlugin). @@ -336,7 +358,9 @@ val joex = project.in(file("modules/joex")). name := "docspell-joex", libraryDependencies ++= Dependencies.fs2 ++ - Dependencies.http4s ++ + Dependencies.http4sServer ++ + Dependencies.http4sCirce ++ + Dependencies.http4sDsl ++ Dependencies.circe ++ Dependencies.pureconfig ++ Dependencies.emilTnef ++ @@ -350,7 +374,7 @@ val joex = project.in(file("modules/joex")). addCompilerPlugin(Dependencies.betterMonadicFor), buildInfoPackage := "docspell.joex", reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"local"/"dev.conf"}") - ).dependsOn(store, backend, extract, convert, analysis, joexapi, restapi) + ).dependsOn(store, backend, extract, convert, analysis, joexapi, restapi, ftssolr) val restserver = project.in(file("modules/restserver")). enablePlugins(BuildInfoPlugin @@ -364,7 +388,9 @@ val restserver = project.in(file("modules/restserver")). settings( name := "docspell-restserver", libraryDependencies ++= - Dependencies.http4s ++ + Dependencies.http4sServer ++ + Dependencies.http4sCirce ++ + Dependencies.http4sDsl ++ Dependencies.circe ++ Dependencies.pureconfig ++ Dependencies.yamusca ++ @@ -386,7 +412,7 @@ val restserver = project.in(file("modules/restserver")). }.taskValue, Compile/unmanagedResourceDirectories ++= Seq((Compile/resourceDirectory).value.getParentFile/"templates"), reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"local"/"dev.conf"}") - ).dependsOn(restapi, joexapi, backend, webapp) + ).dependsOn(restapi, joexapi, backend, webapp, ftssolr) @@ -472,6 +498,8 @@ val root = project.in(file(".")). , extract , convert , analysis + , ftsclient + , ftssolr , files , store , joexapi diff --git a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala index 18ab38c8..acb4e08d 100644 --- a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala +++ b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala @@ -1,12 +1,17 @@ package docspell.backend import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource} +import org.http4s.client.Client +import org.http4s.client.blaze.BlazeClientBuilder + import docspell.backend.auth.Login import docspell.backend.ops._ import docspell.backend.signup.OSignup +import docspell.joexapi.client.JoexClient import docspell.store.Store import docspell.store.queue.JobQueue import docspell.store.usertask.UserTaskStore +import docspell.ftsclient.FtsClient import scala.concurrent.ExecutionContext import emil.javamail.{JavaMailEmil, Settings} @@ -25,6 +30,7 @@ trait BackendApp[F[_]] { def job: OJob[F] def item: OItem[F] def itemSearch: OItemSearch[F] + def fulltext: OFulltext[F] def mail: OMail[F] def joex: OJoex[F] def userTask: OUserTask[F] @@ -35,7 +41,8 @@ object BackendApp { def create[F[_]: ConcurrentEffect: ContextShift]( cfg: Config, store: Store[F], - httpClientEc: ExecutionContext, + httpClient: Client[F], + ftsClient: FtsClient[F], blocker: Blocker ): Resource[F, BackendApp[F]] = for { @@ -48,12 +55,13 @@ object BackendApp { tagImpl <- OTag[F](store) equipImpl <- OEquipment[F](store) orgImpl <- OOrganization(store) - joexImpl <- OJoex.create(httpClientEc, store) + joexImpl <- OJoex(JoexClient(httpClient), store) uploadImpl <- OUpload(store, queue, cfg.files, joexImpl) nodeImpl <- ONode(store) jobImpl <- OJob(store, joexImpl) - itemImpl <- OItem(store) + itemImpl <- OItem(store, ftsClient) itemSearchImpl <- OItemSearch(store) + fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) javaEmil = JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug)) mailImpl <- OMail(store, javaEmil) @@ -71,6 +79,7 @@ object BackendApp { val job = jobImpl val item = itemImpl val itemSearch = itemSearchImpl + val fulltext = fulltextImpl val mail = mailImpl val joex = joexImpl val userTask = userTaskImpl @@ -81,9 +90,11 @@ object BackendApp { connectEC: ExecutionContext, httpClientEc: ExecutionContext, blocker: Blocker - ): Resource[F, BackendApp[F]] = + )(ftsFactory: Client[F] => Resource[F, FtsClient[F]]): Resource[F, BackendApp[F]] = for { - store <- Store.create(cfg.jdbc, connectEC, blocker) - backend <- create(cfg, store, httpClientEc, blocker) + store <- Store.create(cfg.jdbc, connectEC, blocker) + httpClient <- BlazeClientBuilder[F](httpClientEc).resource + ftsClient <- ftsFactory(httpClient) + backend <- create(cfg, store, httpClient, ftsClient, blocker) } yield backend } diff --git a/modules/backend/src/main/scala/docspell/backend/JobFactory.scala b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala new file mode 100644 index 00000000..42d6a654 --- /dev/null +++ b/modules/backend/src/main/scala/docspell/backend/JobFactory.scala @@ -0,0 +1,90 @@ +package docspell.backend + +import cats.effect._ +import cats.implicits._ +import docspell.common._ +import docspell.store.records.RJob + +object JobFactory { + + def processItem[F[_]: Sync]( + args: ProcessItemArgs, + account: AccountId, + prio: Priority, + tracker: Option[Ident] + ): F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + job = RJob.newJob( + id, + ProcessItemArgs.taskName, + account.collective, + args, + args.makeSubject, + now, + account.user, + prio, + tracker + ) + } yield job + + def processItems[F[_]: Sync]( + args: Vector[ProcessItemArgs], + account: AccountId, + prio: Priority, + tracker: Option[Ident] + ): F[Vector[RJob]] = { + def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob = + RJob.newJob( + id, + ProcessItemArgs.taskName, + account.collective, + arg, + arg.makeSubject, + now, + account.user, + prio, + tracker + ) + + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + jobs = args.map(a => create(id, now, a)) + } yield jobs + } + + def reIndexAll[F[_]: Sync]: F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + } yield RJob.newJob( + id, + ReIndexTaskArgs.taskName, + DocspellSystem.taskGroup, + ReIndexTaskArgs(None), + s"Recreate full-text index", + now, + DocspellSystem.taskGroup, + Priority.Low, + Some(DocspellSystem.migrationTaskTracker) + ) + + def reIndex[F[_]: Sync](account: AccountId): F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + args = ReIndexTaskArgs(Some(account.collective)) + } yield RJob.newJob( + id, + ReIndexTaskArgs.taskName, + account.collective, + args, + s"Recreate full-text index", + now, + account.user, + Priority.Low, + Some(ReIndexTaskArgs.tracker(args)) + ) +} diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala new file mode 100644 index 00000000..33bedffc --- /dev/null +++ b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala @@ -0,0 +1,243 @@ +package docspell.backend.ops + +import cats.effect._ +import cats.implicits._ +import fs2.Stream +import docspell.common._ +import docspell.ftsclient._ +import docspell.backend.JobFactory +import docspell.store.Store +import docspell.store.records.RJob +import docspell.store.queue.JobQueue +import docspell.store.queries.QItem +import OItemSearch.{Batch, ListItem, ListItemWithTags, Query} + +trait OFulltext[F[_]] { + + def findItems( + q: Query, + fts: OFulltext.FtsInput, + batch: Batch + ): F[Vector[OFulltext.FtsItem]] + + /** Same as `findItems` but does more queries per item to find all tags. */ + def findItemsWithTags( + q: Query, + fts: OFulltext.FtsInput, + batch: Batch + ): F[Vector[OFulltext.FtsItemWithTags]] + + def findIndexOnly( + fts: OFulltext.FtsInput, + collective: Ident, + batch: Batch + ): F[Vector[OFulltext.FtsItemWithTags]] + + /** Clears the full-text index completely and launches a task that + * indexes all data. + */ + def reindexAll: F[Unit] + + /** Clears the full-text index for the given collective and starts a + * task indexing all their data. + */ + def reindexCollective(account: AccountId): F[Unit] +} + +object OFulltext { + + case class FtsInput( + query: String, + highlightPre: String = "***", + highlightPost: String = "***" + ) + + case class FtsDataItem( + score: Double, + matchData: FtsResult.MatchData, + context: List[String] + ) + case class FtsData( + maxScore: Double, + count: Int, + qtime: Duration, + items: List[FtsDataItem] + ) + case class FtsItem(item: ListItem, ftsData: FtsData) + case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData) + + def apply[F[_]: Effect]( + itemSearch: OItemSearch[F], + fts: FtsClient[F], + store: Store[F], + queue: JobQueue[F], + joex: OJoex[F] + ): Resource[F, OFulltext[F]] = + Resource.pure[F, OFulltext[F]](new OFulltext[F] { + def reindexAll: F[Unit] = + for { + job <- JobFactory.reIndexAll[F] + _ <- queue.insertIfNew(job) *> joex.notifyAllNodes + } yield () + + def reindexCollective(account: AccountId): F[Unit] = + for { + exist <- store.transact( + RJob.findNonFinalByTracker(DocspellSystem.migrationTaskTracker) + ) + job <- JobFactory.reIndex(account) + _ <- + if (exist.isDefined) ().pure[F] + else queue.insertIfNew(job) *> joex.notifyAllNodes + } yield () + + def findIndexOnly( + ftsQ: OFulltext.FtsInput, + collective: Ident, + batch: Batch + ): F[Vector[OFulltext.FtsItemWithTags]] = { + val fq = FtsQuery( + ftsQ.query, + collective, + Set.empty, + batch.limit, + batch.offset, + FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) + ) + for { + ftsR <- fts.search(fq) + ftsItems = ftsR.results.groupBy(_.itemId) + select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet + itemsWithTags <- + store + .transact( + QItem.findItemsWithTags( + collective, + QItem.findSelectedItems(QItem.Query.empty(collective), select) + ) + ) + .take(batch.limit.toLong) + .compile + .toVector + res = + itemsWithTags + .collect(convertFtsData(ftsR, ftsItems)) + .map({ case (li, fd) => FtsItemWithTags(li, fd) }) + } yield res + } + + def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] = + findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem]) + .drop(batch.offset.toLong) + .take(batch.limit.toLong) + .map({ case (li, fd) => FtsItem(li, fd) }) + .compile + .toVector + + def findItemsWithTags( + q: Query, + ftsQ: FtsInput, + batch: Batch + ): F[Vector[FtsItemWithTags]] = + findItemsFts( + q, + ftsQ, + batch.first, + itemSearch.findItemsWithTags, + convertFtsData[ListItemWithTags] + ) + .drop(batch.offset.toLong) + .take(batch.limit.toLong) + .map({ case (li, fd) => FtsItemWithTags(li, fd) }) + .compile + .toVector + + // Helper + + private def findItemsFts[A: ItemId, B]( + q: Query, + ftsQ: FtsInput, + batch: Batch, + search: (Query, Batch) => F[Vector[A]], + convert: ( + FtsResult, + Map[Ident, List[FtsResult.ItemMatch]] + ) => PartialFunction[A, (A, FtsData)] + ): Stream[F, (A, FtsData)] = + findItemsFts0(q, ftsQ, batch, search, convert) + .takeThrough(_._1 >= batch.limit) + .flatMap(x => Stream.emits(x._2)) + + private def findItemsFts0[A: ItemId, B]( + q: Query, + ftsQ: FtsInput, + batch: Batch, + search: (Query, Batch) => F[Vector[A]], + convert: ( + FtsResult, + Map[Ident, List[FtsResult.ItemMatch]] + ) => PartialFunction[A, (A, FtsData)] + ): Stream[F, (Int, Vector[(A, FtsData)])] = { + val sqlResult = search(q, batch) + val fq = FtsQuery( + ftsQ.query, + q.collective, + Set.empty, + 0, + 0, + FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) + ) + + val qres = + for { + items <- sqlResult + ids = items.map(a => ItemId[A].itemId(a)) + // must find all index results involving the items. + // Currently there is one result per item + one result per + // attachment + limit = items.map(a => ItemId[A].fileCount(a)).sum + items.size + ftsQ = fq.copy(items = ids.toSet, limit = limit) + ftsR <- fts.search(ftsQ) + ftsItems = ftsR.results.groupBy(_.itemId) + res = items.collect(convert(ftsR, ftsItems)) + } yield (items.size, res) + + Stream.eval(qres) ++ findItemsFts0(q, ftsQ, batch.next, search, convert) + } + + private def convertFtsData[A: ItemId]( + ftr: FtsResult, + ftrItems: Map[Ident, List[FtsResult.ItemMatch]] + ): PartialFunction[A, (A, FtsData)] = { + case a if ftrItems.contains(ItemId[A].itemId(a)) => + val ftsDataItems = ftrItems + .get(ItemId[A].itemId(a)) + .getOrElse(Nil) + .map(im => + FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil)) + ) + (a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems)) + } + }) + + trait ItemId[A] { + def itemId(a: A): Ident + + def fileCount(a: A): Int + } + object ItemId { + def apply[A](implicit ev: ItemId[A]): ItemId[A] = ev + + def from[A](f: A => Ident, g: A => Int): ItemId[A] = + new ItemId[A] { + def itemId(a: A) = f(a) + def fileCount(a: A) = g(a) + } + + implicit val listItemId: ItemId[ListItem] = + ItemId.from(_.id, _.fileCount) + + implicit val listItemWithTagsId: ItemId[ListItemWithTags] = + ItemId.from(_.item.id, _.item.fileCount) + } +} diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index 1c503266..9a9cd646 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -5,10 +5,12 @@ import cats.implicits._ import cats.effect.{Effect, Resource} import doobie._ import doobie.implicits._ +import org.log4s.getLogger import docspell.store.{AddResult, Store} import docspell.store.queries.{QAttachment, QItem} -import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp} +import docspell.common._ import docspell.store.records._ +import docspell.ftsclient.FtsClient trait OItem[F[_]] { @@ -38,7 +40,7 @@ trait OItem[F[_]] { def setNotes(item: Ident, notes: Option[String], collective: Ident): F[AddResult] - def setName(item: Ident, notes: String, collective: Ident): F[AddResult] + def setName(item: Ident, name: String, collective: Ident): F[AddResult] def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult] @@ -67,11 +69,12 @@ trait OItem[F[_]] { object OItem { - def apply[F[_]: Effect](store: Store[F]): Resource[F, OItem[F]] = + def apply[F[_]: Effect](store: Store[F], fts: FtsClient[F]): Resource[F, OItem[F]] = for { otag <- OTag(store) oorg <- OOrganization(store) oequip <- OEquipment(store) + logger <- Resource.pure[F, Logger[F]](Logger.log4s(getLogger)) oitem <- Resource.pure[F, OItem[F]](new OItem[F] { def moveAttachmentBefore( itemId: Ident, @@ -259,12 +262,18 @@ object OItem { .transact(RItem.updateNotes(item, collective, notes)) .attempt .map(AddResult.fromUpdate) + .flatTap( + onSuccessIgnoreError(fts.updateItemNotes(logger, item, collective, notes)) + ) def setName(item: Ident, name: String, collective: Ident): F[AddResult] = store .transact(RItem.updateName(item, collective, name)) .attempt .map(AddResult.fromUpdate) + .flatTap( + onSuccessIgnoreError(fts.updateItemName(logger, item, collective, name)) + ) def setState(item: Ident, state: ItemState, collective: Ident): F[AddResult] = store @@ -293,13 +302,17 @@ object OItem { .map(AddResult.fromUpdate) def deleteItem(itemId: Ident, collective: Ident): F[Int] = - QItem.delete(store)(itemId, collective) + QItem + .delete(store)(itemId, collective) + .flatTap(_ => fts.removeItem(logger, itemId)) def getProposals(item: Ident, collective: Ident): F[MetaProposalList] = store.transact(QAttachment.getMetaProposals(item, collective)) def deleteAttachment(id: Ident, collective: Ident): F[Int] = - QAttachment.deleteSingleAttachment(store)(id, collective) + QAttachment + .deleteSingleAttachment(store)(id, collective) + .flatTap(_ => fts.removeAttachment(logger, id)) def setAttachmentName( attachId: Ident, @@ -310,6 +323,29 @@ object OItem { .transact(RAttachment.updateName(attachId, collective, name)) .attempt .map(AddResult.fromUpdate) + .flatTap( + onSuccessIgnoreError( + OptionT(store.transact(RAttachment.findItemId(attachId))) + .semiflatMap(itemId => + fts.updateAttachmentName(logger, itemId, attachId, collective, name) + ) + .fold(())(identity) + ) + ) + + private def onSuccessIgnoreError(update: F[Unit])(ar: AddResult): F[Unit] = + ar match { + case AddResult.Success => + update.attempt.flatMap { + case Right(()) => ().pure[F] + case Left(ex) => + logger.warn(s"Error updating full-text index: ${ex.getMessage}") + } + case AddResult.Failure(_) => + ().pure[F] + case AddResult.EntityExists(_) => + ().pure[F] + } }) } yield oitem } diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala index a1478aad..d6bf64ee 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItemSearch.scala @@ -110,11 +110,15 @@ object OItemSearch { .compile .toVector - def findItemsWithTags(q: Query, batch: Batch): F[Vector[ListItemWithTags]] = + def findItemsWithTags(q: Query, batch: Batch): F[Vector[ListItemWithTags]] = { + val search = QItem.findItems(q, batch) store - .transact(QItem.findItemsWithTags(q, batch).take(batch.limit.toLong)) + .transact( + QItem.findItemsWithTags(q.collective, search).take(batch.limit.toLong) + ) .compile .toVector + } def findAttachment(id: Ident, collective: Ident): F[Option[AttachmentData[F]]] = store diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala index 70fda316..44f89a9f 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OUpload.scala @@ -5,7 +5,7 @@ import cats.Functor import cats.data.{EitherT, OptionT} import cats.effect._ import cats.implicits._ -import docspell.backend.Config +import docspell.backend.{Config, JobFactory} import fs2.Stream import docspell.common._ import docspell.common.syntax.all._ @@ -204,26 +204,7 @@ object OUpload { account: AccountId, prio: Priority, tracker: Option[Ident] - ): F[Vector[RJob]] = { - def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob = - RJob.newJob( - id, - ProcessItemArgs.taskName, - account.collective, - arg, - arg.makeSubject, - now, - account.user, - prio, - tracker - ) - - for { - id <- Ident.randomId[F] - now <- Timestamp.current[F] - jobs = args.map(a => create(id, now, a)) - } yield jobs - - } + ): F[Vector[RJob]] = + JobFactory.processItems[F](args, account, prio, tracker) }) } diff --git a/modules/common/src/main/scala/docspell/common/DocspellSystem.scala b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala new file mode 100644 index 00000000..21247829 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/DocspellSystem.scala @@ -0,0 +1,8 @@ +package docspell.common + +object DocspellSystem { + + val taskGroup = Ident.unsafe("docspell-system") + val migrationTaskTracker = Ident.unsafe("full-text-index-tracker") + +} diff --git a/modules/common/src/main/scala/docspell/common/Duration.scala b/modules/common/src/main/scala/docspell/common/Duration.scala index bb47059e..dfda4652 100644 --- a/modules/common/src/main/scala/docspell/common/Duration.scala +++ b/modules/common/src/main/scala/docspell/common/Duration.scala @@ -25,6 +25,9 @@ case class Duration(nanos: Long) { def formatExact: String = s"$millis ms" + + override def toString(): String = + s"Duration(${millis}ms)" } object Duration { diff --git a/modules/common/src/main/scala/docspell/common/Ident.scala b/modules/common/src/main/scala/docspell/common/Ident.scala index a1d6cb8a..3d752053 100644 --- a/modules/common/src/main/scala/docspell/common/Ident.scala +++ b/modules/common/src/main/scala/docspell/common/Ident.scala @@ -15,6 +15,9 @@ case class Ident(id: String) { def nonEmpty: Boolean = !isEmpty + + def /(next: Ident): Ident = + new Ident(id + "." + next.id) } object Ident { diff --git a/modules/common/src/main/scala/docspell/common/ReIndexTaskArgs.scala b/modules/common/src/main/scala/docspell/common/ReIndexTaskArgs.scala new file mode 100644 index 00000000..d7624048 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/ReIndexTaskArgs.scala @@ -0,0 +1,24 @@ +package docspell.common + +import io.circe._ +import io.circe.generic.semiauto._ + +final case class ReIndexTaskArgs(collective: Option[Ident]) + +object ReIndexTaskArgs { + val taskName = Ident.unsafe("full-text-reindex") + + def tracker(args: ReIndexTaskArgs): Ident = + args.collective match { + case Some(cid) => + cid / DocspellSystem.migrationTaskTracker + case None => + DocspellSystem.migrationTaskTracker + } + + implicit val jsonEncoder: Encoder[ReIndexTaskArgs] = + deriveEncoder[ReIndexTaskArgs] + + implicit val jsonDecoder: Decoder[ReIndexTaskArgs] = + deriveDecoder[ReIndexTaskArgs] +} diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala new file mode 100644 index 00000000..703162cc --- /dev/null +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala @@ -0,0 +1,132 @@ +package docspell.ftsclient + +import fs2.Stream +import cats.implicits._ +import cats.effect._ +import org.log4s.getLogger +import docspell.common._ + +/** The fts client is the interface for docspell to a fulltext search + * engine. + * + * It defines all operations required for integration into docspell. + * It uses data structures from docspell. Implementation modules need + * to translate it to the engine that provides the features. + */ +trait FtsClient[F[_]] { + + /** Initialization tasks. This is called exactly once and then never + * again (except when re-indexing everything). It may be used to + * setup the database. + */ + def initialize: F[Unit] + + /** Run a full-text search. */ + def search(q: FtsQuery): F[FtsResult] + + /** Continually run a full-text search and concatenate the results. */ + def searchAll(q: FtsQuery): Stream[F, FtsResult] = + Stream.eval(search(q)).flatMap { result => + if (result.results.size < q.limit) Stream.emit(result) + else Stream.emit(result) ++ searchAll(q.nextPage) + } + + /** Push all data to the index. Data with same `id' is replaced. + * Values that are `None' are removed from the index (or set to an + * empty string). + */ + def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] + + def indexData(logger: Logger[F], data: TextData*): F[Unit] = + indexData(logger, Stream.emits(data)) + + /** Push all data to the index, but only update existing entries. No + * new entries are created and values that are given as `None' are + * skipped. + */ + def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] + + def updateIndex(logger: Logger[F], data: TextData*): F[Unit] = + updateIndex(logger, Stream.emits(data)) + + def updateItemName( + logger: Logger[F], + itemId: Ident, + collective: Ident, + name: String + ): F[Unit] = + updateIndex(logger, TextData.item(itemId, collective, Some(name), None)) + + def updateItemNotes( + logger: Logger[F], + itemId: Ident, + collective: Ident, + notes: Option[String] + ): F[Unit] = + updateIndex( + logger, + TextData.item(itemId, collective, None, Some(notes.getOrElse(""))) + ) + + def updateAttachmentName( + logger: Logger[F], + itemId: Ident, + attachId: Ident, + collective: Ident, + name: Option[String] + ): F[Unit] = + updateIndex( + logger, + TextData.attachment( + itemId, + attachId, + collective, + Language.English, + Some(name.getOrElse("")), + None + ) + ) + + def removeItem(logger: Logger[F], itemId: Ident): F[Unit] + + def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] + + /** Clears the index – removes everything. */ + def clearAll(logger: Logger[F]): F[Unit] + + /** Clears the index from all data belonging to the given collective. */ + def clear(logger: Logger[F], collective: Ident): F[Unit] + +} + +object FtsClient { + + def none[F[_]: Sync] = + new FtsClient[F] { + private[this] val logger = Logger.log4s[F](getLogger) + + def initialize: F[Unit] = + logger.info("Full-text search is disabled!") + + def search(q: FtsQuery): F[FtsResult] = + logger.warn("Full-text search is disabled!") *> FtsResult.empty.pure[F] + + def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = + logger.warn("Full-text search is disabled!") + + def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = + logger.warn("Full-text search is disabled!") + + def removeItem(logger: Logger[F], itemId: Ident): F[Unit] = + logger.warn("Full-text search is disabled!") + + def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] = + logger.warn("Full-text search is disabled!") + + def clearAll(logger: Logger[F]): F[Unit] = + logger.warn("Full-text search is disabled!") + + def clear(logger: Logger[F], collective: Ident): F[Unit] = + logger.warn("Full-text search is disabled!") + } +} diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala new file mode 100644 index 00000000..785d2e20 --- /dev/null +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala @@ -0,0 +1,34 @@ +package docspell.ftsclient + +import docspell.common._ + +/** A fulltext query. + * + * The query itself is a raw string. Each implementation may + * interpret it according to the system in use. + * + * Searches must only look for given collective and in the given list + * of item ids, if it is non-empty. If the item set is empty, then + * don't restrict the result in this way. + */ +final case class FtsQuery( + q: String, + collective: Ident, + items: Set[Ident], + limit: Int, + offset: Int, + highlight: FtsQuery.HighlightSetting +) { + + def nextPage: FtsQuery = + copy(offset = limit + offset) +} + +object FtsQuery { + + case class HighlightSetting(pre: String, post: String) + + object HighlightSetting { + val default = HighlightSetting("**", "**") + } +} diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsResult.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsResult.scala new file mode 100644 index 00000000..df1c43ac --- /dev/null +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsResult.scala @@ -0,0 +1,31 @@ +package docspell.ftsclient + +import docspell.common._ + +import FtsResult.ItemMatch + +final case class FtsResult( + qtime: Duration, + count: Int, + maxScore: Double, + highlight: Map[Ident, List[String]], + results: List[ItemMatch] +) {} + +object FtsResult { + + val empty = + FtsResult(Duration.millis(0), 0, 0.0, Map.empty, Nil) + + sealed trait MatchData + case class AttachmentData(attachId: Ident, attachName: String) extends MatchData + case object ItemData extends MatchData + + case class ItemMatch( + id: Ident, + itemId: Ident, + collectiveId: Ident, + score: Double, + data: MatchData + ) +} diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala new file mode 100644 index 00000000..625411ad --- /dev/null +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala @@ -0,0 +1,63 @@ +package docspell.ftsclient + +import docspell.common._ + +sealed trait TextData { + + def id: Ident + + def item: Ident + + def collective: Ident + + final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A = + this match { + case a: TextData.Attachment => f(a) + case a: TextData.Item => g(a) + } +} + +object TextData { + + final case class Attachment( + item: Ident, + attachId: Ident, + collective: Ident, + lang: Language, + name: Option[String], + text: Option[String] + ) extends TextData { + + val id = item / attachId + + } + + def attachment( + item: Ident, + attachId: Ident, + collective: Ident, + lang: Language, + name: Option[String], + text: Option[String] + ): TextData = + Attachment(item, attachId, collective, lang, name, text) + + final case class Item( + item: Ident, + collective: Ident, + name: Option[String], + notes: Option[String] + ) extends TextData { + + val id = Ident.unsafe("item") / item + + } + + def item( + item: Ident, + collective: Ident, + name: Option[String], + notes: Option[String] + ): TextData = + Item(item, collective, name, notes) +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala new file mode 100644 index 00000000..256844ee --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala @@ -0,0 +1,38 @@ +package docspell.ftssolr + +import io.circe._ +import docspell.common._ + +final class Field(val name: String) extends AnyVal { + + override def toString(): String = s"Field($name)" +} + +object Field { + + def apply(name: String): Field = + new Field(name) + + val id = Field("id") + val itemId = Field("itemId") + val collectiveId = Field("collectiveId") + val attachmentId = Field("attachmentId") + val discriminator = Field("discriminator") + val attachmentName = Field("attachmentName") + val content = Field("content") + val content_de = Field("content_de") + val content_en = Field("content_en") + val itemName = Field("itemName") + val itemNotes = Field("itemNotes") + + def contentField(lang: Language): Field = + lang match { + case Language.German => + Field.content_de + case Language.English => + Field.content_en + } + + implicit val jsonEncoder: Encoder[Field] = + Encoder.encodeString.contramap(_.name) +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala new file mode 100644 index 00000000..96ee969d --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala @@ -0,0 +1,136 @@ +package docspell.ftssolr + +import io.circe._ +import io.circe.syntax._ +import docspell.common._ +import docspell.ftsclient._ + +trait JsonCodec { + + implicit def attachmentEncoder(implicit + enc: Encoder[Ident] + ): Encoder[TextData.Attachment] = + new Encoder[TextData.Attachment] { + final def apply(td: TextData.Attachment): Json = { + val cnt = + (Field.contentField(td.lang).name, Json.fromString(td.text.getOrElse(""))) + + Json.fromFields( + cnt :: List( + (Field.id.name, enc(td.id)), + (Field.itemId.name, enc(td.item)), + (Field.collectiveId.name, enc(td.collective)), + (Field.attachmentId.name, enc(td.attachId)), + (Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))), + (Field.discriminator.name, Json.fromString("attachment")) + ) + ) + + } + } + + implicit def itemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] = + new Encoder[TextData.Item] { + final def apply(td: TextData.Item): Json = + Json.obj( + (Field.id.name, enc(td.id)), + (Field.itemId.name, enc(td.item)), + (Field.collectiveId.name, enc(td.collective)), + (Field.itemName.name, Json.fromString(td.name.getOrElse(""))), + (Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))), + (Field.discriminator.name, Json.fromString("item")) + ) + } + + implicit def textDataEncoder(implicit + ae: Encoder[TextData.Attachment], + ie: Encoder[TextData.Item] + ): Encoder[TextData] = + Encoder(_.fold(ae.apply, ie.apply)) + + implicit def ftsResultDecoder: Decoder[FtsResult] = + new Decoder[FtsResult] { + final def apply(c: HCursor): Decoder.Result[FtsResult] = + for { + qtime <- c.downField("responseHeader").get[Duration]("QTime") + count <- c.downField("response").get[Int]("numFound") + maxScore <- c.downField("response").get[Double]("maxScore") + results <- c.downField("response").get[List[FtsResult.ItemMatch]]("docs") + highlightng <- c.get[Map[Ident, Map[String, List[String]]]]("highlighting") + highlight = highlightng.map(kv => kv._1 -> kv._2.values.flatten.toList) + } yield FtsResult(qtime, count, maxScore, highlight, results) + } + + implicit def decodeItemMatch: Decoder[FtsResult.ItemMatch] = + new Decoder[FtsResult.ItemMatch] { + final def apply(c: HCursor): Decoder.Result[FtsResult.ItemMatch] = + for { + itemId <- c.get[Ident](Field.itemId.name) + id <- c.get[Ident](Field.id.name) + coll <- c.get[Ident](Field.collectiveId.name) + score <- c.get[Double]("score") + md <- decodeMatchData(c) + } yield FtsResult.ItemMatch(id, itemId, coll, score, md) + } + + def decodeMatchData: Decoder[FtsResult.MatchData] = + new Decoder[FtsResult.MatchData] { + final def apply(c: HCursor): Decoder.Result[FtsResult.MatchData] = + for { + disc <- c.get[String]("discriminator") + md <- + if ("attachment" == disc) + for { + aId <- c.get[Ident](Field.attachmentId.name) + aName <- c.get[String](Field.attachmentName.name) + } yield FtsResult.AttachmentData(aId, aName) + else Right(FtsResult.ItemData) + } yield md + } + + implicit def identKeyEncoder: KeyEncoder[Ident] = + new KeyEncoder[Ident] { + override def apply(ident: Ident): String = ident.id + } + implicit def identKeyDecoder: KeyDecoder[Ident] = + new KeyDecoder[Ident] { + override def apply(ident: String): Option[Ident] = Ident(ident).toOption + } + + def setAttachmentEncoder(implicit + enc: Encoder[Ident] + ): Encoder[TextData.Attachment] = + new Encoder[TextData.Attachment] { + final def apply(td: TextData.Attachment): Json = { + val setter = List( + td.name.map(n => (Field.attachmentName.name, Map("set" -> n.asJson).asJson)), + td.text.map(txt => + (Field.contentField(td.lang).name, Map("set" -> txt.asJson).asJson) + ) + ).flatten + Json.fromFields( + (Field.id.name, enc(td.id)) :: setter + ) + } + } + + def setItemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] = + new Encoder[TextData.Item] { + final def apply(td: TextData.Item): Json = { + val setter = List( + td.name.map(n => (Field.itemName.name, Map("set" -> n.asJson).asJson)), + td.notes.map(n => (Field.itemNotes.name, Map("set" -> n.asJson).asJson)) + ).flatten + + Json.fromFields( + (Field.id.name, enc(td.id)) :: setter + ) + } + } + + implicit def textDataEncoder: Encoder[SetFields] = + Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply)) + +} + +object JsonCodec extends JsonCodec diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala new file mode 100644 index 00000000..063f3048 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala @@ -0,0 +1,68 @@ +package docspell.ftssolr + +import io.circe._ +import io.circe.generic.semiauto._ +import docspell.ftsclient.FtsQuery + +final case class QueryData( + query: String, + filter: String, + limit: Int, + offset: Int, + fields: List[Field], + params: Map[String, String] +) { + + def nextPage: QueryData = + copy(offset = offset + limit) + + def withHighLight(fields: List[Field], pre: String, post: String): QueryData = + copy(params = + params ++ Map( + "hl" -> "on", + "hl.requireFieldMatch" -> "true", + "hl.fl" -> fields.map(_.name).mkString(","), + "hl.simple.pre" -> pre, + "hl.simple.post" -> post + ) + ) +} + +object QueryData { + + implicit val jsonEncoder: Encoder[QueryData] = + deriveEncoder[QueryData] + + def apply( + cfg: SolrConfig, + search: List[Field], + fields: List[Field], + fq: FtsQuery + ): QueryData = { + val q = sanitize(fq.q) + val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ") + val items = fq.items.map(_.id).mkString(" ") + val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}"""" + val filterQ = fq.items match { + case s if s.isEmpty => + collQ + case _ => + (collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ") + } + QueryData( + extQ, + filterQ, + fq.limit, + fq.offset, + fields, + Map("defType" -> cfg.defType, "q.op" -> cfg.qOp) + ).withHighLight( + search, + fq.highlight.pre, + fq.highlight.post + ) + } + + private def sanitize(q: String): String = + q.replaceAll("[\\(,\\)]+", " ") +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFields.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFields.scala new file mode 100644 index 00000000..af9b370e --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFields.scala @@ -0,0 +1,5 @@ +package docspell.ftssolr + +import docspell.ftsclient._ + +final case class SetFields(td: TextData) diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrConfig.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrConfig.scala new file mode 100644 index 00000000..639ae424 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrConfig.scala @@ -0,0 +1,13 @@ +package docspell.ftssolr + +import docspell.common._ + +final case class SolrConfig( + url: LenientUri, + commitWithin: Int, + logVerbose: Boolean, + defType: String, + qOp: String +) + +object SolrConfig {} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala new file mode 100644 index 00000000..4bdf3fda --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala @@ -0,0 +1,89 @@ +package docspell.ftssolr + +import fs2.Stream +import cats.effect._ +import cats.implicits._ +import org.http4s.client.Client +import org.http4s.client.middleware.Logger +import org.log4s.getLogger + +import docspell.common._ +import docspell.ftsclient._ + +final class SolrFtsClient[F[_]: Effect]( + solrUpdate: SolrUpdate[F], + solrSetup: SolrSetup[F], + solrQuery: SolrQuery[F] +) extends FtsClient[F] { + + def initialize: F[Unit] = + solrSetup.setupSchema + + def search(q: FtsQuery): F[FtsResult] = + solrQuery.query(q) + + def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = + modifyIndex(logger, data)(solrUpdate.add) + + def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = + modifyIndex(logger, data)(solrUpdate.update) + + def modifyIndex(logger: Logger[F], data: Stream[F, TextData])( + f: List[TextData] => F[Unit] + ): F[Unit] = + (for { + _ <- Stream.eval(logger.debug("Updating SOLR index")) + chunks <- data.chunks + res <- Stream.eval(f(chunks.toList).attempt) + _ <- res match { + case Right(()) => Stream.emit(()) + case Left(ex) => + Stream.eval(logger.error(ex)("Error updating with chunk of data")) + } + } yield ()).compile.drain + + def removeItem(logger: Logger[F], itemId: Ident): F[Unit] = + logger.debug(s"Remove item '${itemId.id}' from index") *> + solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}", None) + + def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] = + logger.debug(s"Remove attachment '${attachId.id}' from index") *> + solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}", None) + + def clearAll(logger: Logger[F]): F[Unit] = + logger.info("Deleting complete full-text index!") *> + solrUpdate.delete("*:*", Option(0)) + + def clear(logger: Logger[F], collective: Ident): F[Unit] = + logger.info(s"Deleting full-text index for collective ${collective.id}") *> + solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}", Option(0)) +} + +object SolrFtsClient { + private[this] val logger = getLogger + + def apply[F[_]: ConcurrentEffect]( + cfg: SolrConfig, + httpClient: Client[F] + ): Resource[F, FtsClient[F]] = { + val client = loggingMiddleware(cfg, httpClient) + Resource.pure[F, FtsClient[F]]( + new SolrFtsClient( + SolrUpdate(cfg, client), + SolrSetup(cfg, client), + SolrQuery(cfg, client) + ) + ) + } + + private def loggingMiddleware[F[_]: Concurrent]( + cfg: SolrConfig, + client: Client[F] + ): Client[F] = + Logger( + logHeaders = true, + logBody = cfg.logVerbose, + logAction = Some((msg: String) => Sync[F].delay(logger.trace(msg))) + )(client) + +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala new file mode 100644 index 00000000..c557344d --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrQuery.scala @@ -0,0 +1,60 @@ +package docspell.ftssolr + +import cats.effect._ +import org.http4s._ +import org.http4s.client.Client +import org.http4s.circe._ +import org.http4s.circe.CirceEntityDecoder._ +import org.http4s.client.dsl.Http4sClientDsl +import _root_.io.circe.syntax._ + +import docspell.ftsclient._ +import JsonCodec._ + +trait SolrQuery[F[_]] { + + def query(q: QueryData): F[FtsResult] + + def query(q: FtsQuery): F[FtsResult] +} + +object SolrQuery { + def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = { + val dsl = new Http4sClientDsl[F] {} + import dsl._ + + new SolrQuery[F] { + val url = Uri.unsafeFromString(cfg.url.asString) / "query" + + def query(q: QueryData): F[FtsResult] = { + val req = Method.POST(q.asJson, url) + client.expect[FtsResult](req) + } + + def query(q: FtsQuery): F[FtsResult] = { + val fq = QueryData( + cfg, + List( + Field.content, + Field.content_de, + Field.content_en, + Field.itemName, + Field.itemNotes, + Field.attachmentName + ), + List( + Field.id, + Field.itemId, + Field.collectiveId, + Field("score"), + Field.attachmentId, + Field.attachmentName, + Field.discriminator + ), + q + ) + query(fq) + } + } + } +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala new file mode 100644 index 00000000..c25431a8 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala @@ -0,0 +1,121 @@ +package docspell.ftssolr + +import cats.effect._ +import org.http4s._ +import cats.implicits._ +import org.http4s.client.Client +import org.http4s.circe._ +import org.http4s.client.dsl.Http4sClientDsl +import _root_.io.circe.syntax._ +import _root_.io.circe._ +import _root_.io.circe.generic.semiauto._ +import docspell.common._ + +trait SolrSetup[F[_]] { + + def setupSchema: F[Unit] + +} + +object SolrSetup { + + def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = { + val dsl = new Http4sClientDsl[F] {} + import dsl._ + + new SolrSetup[F] { + val url = (Uri.unsafeFromString(cfg.url.asString) / "schema") + .withQueryParam("commitWithin", cfg.commitWithin.toString) + + def setupSchema: F[Unit] = { + val cmds0 = + List( + Field.id, + Field.itemId, + Field.collectiveId, + Field.discriminator, + Field.attachmentId + ) + .traverse(addStringField) + val cmds1 = List( + Field.attachmentName, + Field.content, + Field.itemName, + Field.itemNotes + ) + .traverse(addTextField(None)) + + val cntLang = Language.all.traverse { + case l @ Language.German => + addTextField(l.some)(Field.content_de) + case l @ Language.English => + addTextField(l.some)(Field.content_en) + } + + cmds0 *> cmds1 *> cntLang *> ().pure[F] + } + + private def run(cmd: Json): F[Unit] = { + val req = Method.POST(cmd, url) + client.expect[Unit](req) + } + + private def addStringField(field: Field): F[Unit] = + run(DeleteField.command(DeleteField(field))).attempt *> + run(AddField.command(AddField.string(field))) + + private def addTextField(lang: Option[Language])(field: Field): F[Unit] = + lang match { + case None => + run(DeleteField.command(DeleteField(field))).attempt *> + run(AddField.command(AddField.text(field))) + case Some(Language.German) => + run(DeleteField.command(DeleteField(field))).attempt *> + run(AddField.command(AddField.textDE(field))) + case Some(Language.English) => + run(DeleteField.command(DeleteField(field))).attempt *> + run(AddField.command(AddField.textEN(field))) + } + } + } + + // Schema Commands: The structure is for conveniently creating the + // solr json. All fields must be stored, because of highlighting and + // single-updates only work when all fields are stored. + + case class AddField( + name: Field, + `type`: String, + stored: Boolean, + indexed: Boolean, + multiValued: Boolean + ) + object AddField { + implicit val encoder: Encoder[AddField] = + deriveEncoder[AddField] + + def command(body: AddField): Json = + Map("add-field" -> body.asJson).asJson + + def string(field: Field): AddField = + AddField(field, "string", true, true, false) + + def text(field: Field): AddField = + AddField(field, "text_general", true, true, false) + + def textDE(field: Field): AddField = + AddField(field, "text_de", true, true, false) + + def textEN(field: Field): AddField = + AddField(field, "text_en", true, true, false) + } + + case class DeleteField(name: Field) + object DeleteField { + implicit val encoder: Encoder[DeleteField] = + deriveEncoder[DeleteField] + + def command(body: DeleteField): Json = + Map("delete-field" -> body.asJson).asJson + } +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala new file mode 100644 index 00000000..fcfe1151 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala @@ -0,0 +1,78 @@ +package docspell.ftssolr + +import cats.effect._ +import org.http4s._ +import org.http4s.client.Client +import org.http4s.circe._ +import org.http4s.client.dsl.Http4sClientDsl +import _root_.io.circe._ +import _root_.io.circe.syntax._ + +import docspell.ftsclient._ +import JsonCodec._ + +trait SolrUpdate[F[_]] { + + def add(tds: List[TextData]): F[Unit] + + def update(tds: List[TextData]): F[Unit] + + def delete(q: String, commitWithin: Option[Int]): F[Unit] +} + +object SolrUpdate { + + def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrUpdate[F] = { + val dsl = new Http4sClientDsl[F] {} + import dsl._ + + new SolrUpdate[F] { + val url = (Uri.unsafeFromString(cfg.url.asString) / "update") + .withQueryParam("commitWithin", cfg.commitWithin.toString) + .withQueryParam("overwrite", "true") + .withQueryParam("wt", "json") + + def add(tds: List[TextData]): F[Unit] = { + val req = Method.POST(tds.asJson, url) + client.expect[Unit](req) + } + + def update(tds: List[TextData]): F[Unit] = { + val req = Method.POST(tds.filter(minOneChange).map(SetFields).asJson, url) + client.expect[Unit](req) + } + + def delete(q: String, commitWithin: Option[Int]): F[Unit] = { + val uri = commitWithin match { + case Some(n) => + if (n <= 0) + url.removeQueryParam("commitWithin").withQueryParam("commit", "true") + else url.withQueryParam("commitWithin", n.toString) + case None => + url + } + val req = Method.POST(Delete(q).asJson, uri) + client.expect[Unit](req) + } + + private val minOneChange: TextData => Boolean = + _ match { + case td: TextData.Attachment => + td.name.isDefined || td.text.isDefined + case td: TextData.Item => + td.name.isDefined || td.notes.isDefined + } + } + } + + case class Delete(query: String) + object Delete { + implicit val jsonEncoder: Encoder[Delete] = + new Encoder[Delete] { + def apply(d: Delete): Json = + Json.obj( + ("delete", Json.obj("query" -> d.query.asJson)) + ) + } + } +} diff --git a/modules/joex/src/main/resources/reference.conf b/modules/joex/src/main/resources/reference.conf index 1958070b..16bc791a 100644 --- a/modules/joex/src/main/resources/reference.conf +++ b/modules/joex/src/main/resources/reference.conf @@ -364,4 +364,38 @@ docspell.joex { # By default all files are allowed. valid-mime-types = [ ] } + + # Configuration of the full-text search engine. + full-text-search { + # The full-text search feature can be disabled. It requires an + # additional index server which needs additional memory and disk + # space. It can be enabled later any time. + # + # Currently the SOLR search platform is supported. + enabled = false + + # Configuration for the SOLR backend. + solr = { + # The URL to solr + url = "http://localhost:8983/solr/docspell" + # Used to tell solr when to commit the data + commit-within = 1000 + # If true, logs request and response bodies + log-verbose = false + # The defType parameter to lucene that defines the parser to + # use. You might want to try "edismax" or look here: + # https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing + def-type = "lucene" + # The default combiner for tokens. One of {AND, OR}. + q-op = "OR" + } + + # Settings for running the index migration tasks + migration = { + # Chunk size to use when indexing data from the database. This + # many attachments are loaded into memory and pushed to the + # full-text index. + index-all-chunk = 10 + } + } } \ No newline at end of file diff --git a/modules/joex/src/main/scala/docspell/joex/Config.scala b/modules/joex/src/main/scala/docspell/joex/Config.scala index 27817d69..c9c54528 100644 --- a/modules/joex/src/main/scala/docspell/joex/Config.scala +++ b/modules/joex/src/main/scala/docspell/joex/Config.scala @@ -8,6 +8,7 @@ import docspell.convert.ConvertConfig import docspell.extract.ExtractConfig import docspell.joex.hk.HouseKeepingConfig import docspell.backend.Config.Files +import docspell.ftssolr.SolrConfig case class Config( appId: Ident, @@ -23,7 +24,8 @@ case class Config( convert: ConvertConfig, sendMail: MailSendConfig, files: Files, - mailDebug: Boolean + mailDebug: Boolean, + fullTextSearch: Config.FullTextSearch ) object Config { @@ -34,4 +36,15 @@ object Config { math.min(mailChunkSize, maxMails) } case class UserTasks(scanMailbox: ScanMailbox) + + case class FullTextSearch( + enabled: Boolean, + migration: FullTextSearch.Migration, + solr: SolrConfig + ) + + object FullTextSearch { + + final case class Migration(indexAllChunk: Int) + } } diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index f2d3cd91..fe3fc586 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -3,10 +3,15 @@ package docspell.joex import cats.implicits._ import cats.effect._ import emil.javamail._ +import fs2.concurrent.SignallingRef +import scala.concurrent.ExecutionContext +import org.http4s.client.Client +import org.http4s.client.blaze.BlazeClientBuilder import docspell.common._ import docspell.backend.ops._ import docspell.joex.hk._ import docspell.joex.notify._ +import docspell.joex.fts.{MigrationTask, ReIndexTask} import docspell.joex.scanmailbox._ import docspell.joex.process.ItemHandler import docspell.joex.scheduler._ @@ -14,13 +19,14 @@ import docspell.joexapi.client.JoexClient import docspell.store.Store import docspell.store.queue._ import docspell.store.records.RJobLog -import fs2.concurrent.SignallingRef -import scala.concurrent.ExecutionContext +import docspell.ftsclient.FtsClient +import docspell.ftssolr.SolrFtsClient final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer]( cfg: Config, nodeOps: ONode[F], store: Store[F], + queue: JobQueue[F], pstore: PeriodicTaskStore[F], termSignal: SignallingRef[F, Boolean], val scheduler: Scheduler[F], @@ -50,7 +56,10 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer]( periodicScheduler.shutdown *> scheduler.shutdown(false) *> termSignal.set(true) private def scheduleBackgroundTasks: F[Unit] = - HouseKeepingTask.periodicTask[F](cfg.houseKeeping.schedule).flatMap(pstore.insert) + HouseKeepingTask + .periodicTask[F](cfg.houseKeeping.schedule) + .flatMap(pstore.insert) *> + MigrationTask.job.flatMap(queue.insertIfNew) } object JoexAppImpl { @@ -63,13 +72,15 @@ object JoexAppImpl { blocker: Blocker ): Resource[F, JoexApp[F]] = for { - client <- JoexClient.resource(clientEC) + httpClient <- BlazeClientBuilder[F](clientEC).resource + client = JoexClient(httpClient) store <- Store.create(cfg.jdbc, connectEC, blocker) queue <- JobQueue(store) pstore <- PeriodicTaskStore.create(store) nodeOps <- ONode(store) joex <- OJoex(client, store) upload <- OUpload(store, queue, cfg.files, joex) + fts <- createFtsClient(cfg)(httpClient) javaEmil = JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug)) sch <- SchedulerBuilder(cfg.scheduler, blocker, store) @@ -77,7 +88,7 @@ object JoexAppImpl { .withTask( JobTask.json( ProcessItemArgs.taskName, - ItemHandler.newItem[F](cfg), + ItemHandler.newItem[F](cfg, fts), ItemHandler.onCancel[F] ) ) @@ -95,6 +106,20 @@ object JoexAppImpl { ScanMailboxTask.onCancel[F] ) ) + .withTask( + JobTask.json( + MigrationTask.taskName, + MigrationTask[F](cfg.fullTextSearch, fts), + MigrationTask.onCancel[F] + ) + ) + .withTask( + JobTask.json( + ReIndexTask.taskName, + ReIndexTask[F](cfg.fullTextSearch, fts), + ReIndexTask.onCancel[F] + ) + ) .withTask( JobTask.json( HouseKeepingTask.taskName, @@ -111,7 +136,13 @@ object JoexAppImpl { client, Timer[F] ) - app = new JoexAppImpl(cfg, nodeOps, store, pstore, termSignal, sch, psch) + app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch) appR <- Resource.make(app.init.map(_ => app))(_.shutdown) } yield appR + + private def createFtsClient[F[_]: ConcurrentEffect: ContextShift]( + cfg: Config + )(client: Client[F]): Resource[F, FtsClient[F]] = + if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) + else Resource.pure[F, FtsClient[F]](FtsClient.none[F]) } diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala new file mode 100644 index 00000000..ac1267e6 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala @@ -0,0 +1,24 @@ +package docspell.joex.fts + +import docspell.common.Logger +import docspell.joex.Config +import docspell.joex.scheduler.Context +import docspell.store.Store +import docspell.ftsclient.FtsClient + +case class FtsContext[F[_]]( + cfg: Config.FullTextSearch, + store: Store[F], + fts: FtsClient[F], + logger: Logger[F] +) + +object FtsContext { + + def apply[F[_]]( + cfg: Config.FullTextSearch, + fts: FtsClient[F], + ctx: Context[F, _] + ): FtsContext[F] = + FtsContext(cfg, ctx.store, fts, ctx.logger) +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala new file mode 100644 index 00000000..5e861b8d --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala @@ -0,0 +1,93 @@ +package docspell.joex.fts + +import cats.effect._ +import cats.data.{Kleisli, NonEmptyList} +import cats.{ApplicativeError, FlatMap, Semigroup} +import cats.implicits._ +import docspell.common._ +import docspell.ftsclient._ +import docspell.joex.scheduler.Context +import docspell.joex.Config +import docspell.store.queries.{QAttachment, QItem} + +object FtsWork { + def apply[F[_]](f: FtsContext[F] => F[Unit]): FtsWork[F] = + Kleisli(f) + + def all[F[_]: FlatMap]( + m0: FtsWork[F], + mn: FtsWork[F]* + ): FtsWork[F] = + NonEmptyList.of(m0, mn: _*).reduce(semigroup[F]) + + implicit def semigroup[F[_]: FlatMap]: Semigroup[FtsWork[F]] = + Semigroup.instance((mt1, mt2) => mt1.flatMap(_ => mt2)) + + // some tasks + + def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] = + FtsWork(ctx => f(ctx.logger)) + + def initialize[F[_]]: FtsWork[F] = + FtsWork(_.fts.initialize) + + def clearIndex[F[_]](coll: Option[Ident]): FtsWork[F] = + coll match { + case Some(cid) => + FtsWork(ctx => ctx.fts.clear(ctx.logger, cid)) + case None => + FtsWork(ctx => ctx.fts.clearAll(ctx.logger)) + } + + def insertAll[F[_]: Effect](coll: Option[Ident]): FtsWork[F] = + FtsWork + .all( + FtsWork(ctx => + ctx.fts.indexData( + ctx.logger, + ctx.store + .transact( + QAttachment + .allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk) + ) + .map(caa => + TextData + .attachment( + caa.item, + caa.id, + caa.collective, + caa.lang, + caa.name, + caa.content + ) + ) + ) + ), + FtsWork(ctx => + ctx.fts.indexData( + ctx.logger, + ctx.store + .transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5)) + .map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes)) + ) + ) + ) + + object syntax { + implicit final class FtsWorkOps[F[_]](mt: FtsWork[F]) { + def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] = + all(mt, mn) + + def recoverWith( + other: FtsWork[F] + )(implicit ev: ApplicativeError[F, Throwable]): FtsWork[F] = + Kleisli(ctx => mt.run(ctx).onError({ case _ => other.run(ctx) })) + + def forContext( + cfg: Config.FullTextSearch, + fts: FtsClient[F] + ): Kleisli[F, Context[F, _], Unit] = + mt.local(ctx => FtsContext(cfg, fts, ctx)) + } + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala b/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala new file mode 100644 index 00000000..cfc63940 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala @@ -0,0 +1,66 @@ +package docspell.joex.fts + +import cats.effect._ +import cats.implicits._ +import cats.data.{Kleisli, OptionT} +import cats.Traverse +import docspell.common._ +import docspell.joex.Config +import docspell.store.{AddResult, Store} +import docspell.store.records.RFtsMigration +import docspell.ftsclient._ + +case class Migration[F[_]]( + version: Int, + engine: Ident, + description: String, + task: FtsWork[F] +) + +object Migration { + + def apply[F[_]: Effect]( + cfg: Config.FullTextSearch, + fts: FtsClient[F], + store: Store[F], + logger: Logger[F] + ): Kleisli[F, List[Migration[F]], Unit] = { + val ctx = FtsContext(cfg, store, fts, logger) + Kleisli(migs => Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ())) + } + + def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] = { + val insertRecord: F[Option[RFtsMigration]] = + for { + rec <- RFtsMigration.create(m.version, m.engine, m.description) + res <- ctx.store.add( + RFtsMigration.insert(rec), + RFtsMigration.exists(m.version, m.engine) + ) + ret <- res match { + case AddResult.Success => rec.some.pure[F] + case AddResult.EntityExists(_) => None.pure[F] + case AddResult.Failure(ex) => Effect[F].raiseError(ex) + } + } yield ret + + (for { + _ <- OptionT.liftF(ctx.logger.info(s"Apply ${m.version}/${m.description}")) + rec <- OptionT(insertRecord) + res <- OptionT.liftF(m.task.run(ctx).attempt) + ret <- OptionT.liftF(res match { + case Right(()) => ().pure[F] + case Left(ex) => + ctx.logger.error(ex)( + s"Applying index migration ${m.version}/${m.description} failed" + ) *> + ctx.store.transact(RFtsMigration.deleteById(rec.id)) *> Effect[F] + .raiseError[Unit]( + ex + ) + }) + } yield ret).getOrElseF( + ctx.logger.info(s"Migration ${m.version}/${m.description} already applied.") + ) + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala new file mode 100644 index 00000000..f7d1b980 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala @@ -0,0 +1,53 @@ +package docspell.joex.fts + +import cats.effect._ +import cats.implicits._ +import docspell.common._ +import docspell.joex.Config +import docspell.joex.scheduler.Task +import docspell.ftsclient._ +import docspell.store.records.RJob + +object MigrationTask { + val taskName = Ident.unsafe("full-text-index") + + def apply[F[_]: ConcurrentEffect]( + cfg: Config.FullTextSearch, + fts: FtsClient[F] + ): Task[F, Unit, Unit] = + Task + .log[F, Unit](_.info(s"Running full-text-index migrations now")) + .flatMap(_ => + Task(ctx => + Migration[F](cfg, fts, ctx.store, ctx.logger) + .run(migrationTasks[F]) + ) + ) + + def onCancel[F[_]: Sync]: Task[F, Unit, Unit] = + Task.log[F, Unit](_.warn("Cancelling full-text-index task")) + + def job[F[_]: Sync]: F[RJob] = + for { + id <- Ident.randomId[F] + now <- Timestamp.current[F] + } yield RJob.newJob( + id, + taskName, + DocspellSystem.taskGroup, + (), + "Create full-text index", + now, + DocspellSystem.taskGroup, + Priority.Low, + Some(DocspellSystem.migrationTaskTracker) + ) + + private val solrEngine = Ident.unsafe("solr") + def migrationTasks[F[_]: Effect]: List[Migration[F]] = + List( + Migration[F](1, solrEngine, "initialize", FtsWork.initialize[F]), + Migration[F](2, solrEngine, "Index all from database", FtsWork.insertAll[F](None)) + ) + +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala new file mode 100644 index 00000000..edb1cc0d --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala @@ -0,0 +1,54 @@ +package docspell.joex.fts + +import cats.effect._ +import docspell.common._ +import docspell.joex.Config +import docspell.joex.scheduler.Task +import docspell.ftsclient._ +import FtsWork.syntax._ + +object ReIndexTask { + type Args = ReIndexTaskArgs + + val taskName = ReIndexTaskArgs.taskName + val tracker = DocspellSystem.migrationTaskTracker + + def apply[F[_]: ConcurrentEffect]( + cfg: Config.FullTextSearch, + fts: FtsClient[F] + ): Task[F, Args, Unit] = + Task + .log[F, Args](_.info(s"Running full-text re-index now")) + .flatMap(_ => + Task(ctx => + (clearData[F](ctx.args.collective) ++ + FtsWork.log[F](_.info("Inserting data from database")) ++ + FtsWork.insertAll[F]( + ctx.args.collective + )).forContext(cfg, fts).run(ctx) + ) + ) + + def onCancel[F[_]: Sync]: Task[F, Args, Unit] = + Task.log[F, Args](_.warn("Cancelling full-text re-index task")) + + private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] = + FtsWork.log[F](_.info("Clearing index data")) ++ + (collective match { + case Some(_) => + FtsWork + .clearIndex(collective) + .recoverWith( + FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) + ) + + case None => + FtsWork + .clearIndex(None) + .recoverWith( + FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) + ) ++ + FtsWork.log[F](_.info("Running index initialize")) ++ + FtsWork.initialize[F] + }) +} diff --git a/modules/joex/src/main/scala/docspell/joex/fts/package.scala b/modules/joex/src/main/scala/docspell/joex/fts/package.scala new file mode 100644 index 00000000..784754ab --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/fts/package.scala @@ -0,0 +1,9 @@ +package docspell.joex + +import cats.data.Kleisli + +package object fts { + + type FtsWork[F[_]] = Kleisli[F, FtsContext[F], Unit] + +} diff --git a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala index 909f14e7..1ebcaa96 100644 --- a/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/hk/HouseKeepingTask.scala @@ -11,7 +11,6 @@ import docspell.store.records._ object HouseKeepingTask { private val periodicId = Ident.unsafe("docspell-houskeeping") - val systemGroup: Ident = Ident.unsafe("docspell-system") val taskName: Ident = Ident.unsafe("housekeeping") @@ -29,10 +28,10 @@ object HouseKeepingTask { .createJson( true, taskName, - systemGroup, + DocspellSystem.taskGroup, (), "Docspell house-keeping", - systemGroup, + DocspellSystem.taskGroup, Priority.Low, ce ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index dbc0f70a..5b7e9552 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -8,6 +8,7 @@ import docspell.joex.Config import docspell.joex.scheduler.Task import docspell.store.queries.QItem import docspell.store.records.RItem +import docspell.ftsclient.FtsClient object ItemHandler { def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] = @@ -16,11 +17,12 @@ object ItemHandler { ) def newItem[F[_]: ConcurrentEffect: ContextShift]( - cfg: Config + cfg: Config, + fts: FtsClient[F] ): Task[F, ProcessItemArgs, Unit] = CreateItem[F] .flatMap(itemStateTask(ItemState.Processing)) - .flatMap(safeProcess[F](cfg)) + .flatMap(safeProcess[F](cfg, fts)) .map(_ => ()) def itemStateTask[F[_]: Sync, A]( @@ -36,11 +38,12 @@ object ItemHandler { Task(_.isLastRetry) def safeProcess[F[_]: ConcurrentEffect: ContextShift]( - cfg: Config + cfg: Config, + fts: FtsClient[F] )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = isLastRetry[F].flatMap { case true => - ProcessItem[F](cfg)(data).attempt.flatMap({ + ProcessItem[F](cfg, fts)(data).attempt.flatMap({ case Right(d) => Task.pure(d) case Left(ex) => @@ -50,7 +53,7 @@ object ItemHandler { .andThen(_ => Sync[F].raiseError(ex)) }) case false => - ProcessItem[F](cfg)(data).flatMap(itemStateTask(ItemState.Created)) + ProcessItem[F](cfg, fts)(data).flatMap(itemStateTask(ItemState.Created)) } def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index b667d894..de5de412 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -5,17 +5,19 @@ import docspell.common.ProcessItemArgs import docspell.analysis.TextAnalysisConfig import docspell.joex.scheduler.Task import docspell.joex.Config +import docspell.ftsclient.FtsClient object ProcessItem { def apply[F[_]: ConcurrentEffect: ContextShift]( - cfg: Config + cfg: Config, + fts: FtsClient[F] )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = ExtractArchive(item) .flatMap(Task.setProgress(20)) .flatMap(ConvertPdf(cfg.convert, _)) .flatMap(Task.setProgress(40)) - .flatMap(TextExtraction(cfg.extraction, _)) + .flatMap(TextExtraction(cfg.extraction, fts)) .flatMap(Task.setProgress(60)) .flatMap(analysisOnly[F](cfg.textAnalysis)) .flatMap(Task.setProgress(80)) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index e1bb4de1..8bfa250b 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -3,17 +3,17 @@ package docspell.joex.process import bitpeace.{Mimetype, RangeDef} import cats.data.OptionT import cats.implicits._ -import cats.effect.{ContextShift, Sync} +import cats.effect._ import docspell.common._ import docspell.extract.{ExtractConfig, ExtractResult, Extraction} import docspell.joex.scheduler.{Context, Task} import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta} import docspell.store.syntax.MimeTypes._ +import docspell.ftsclient.{FtsClient, TextData} object TextExtraction { - def apply[F[_]: Sync: ContextShift]( - cfg: ExtractConfig, + def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])( item: ItemData ): Task[F, ProcessItemArgs, ItemData] = Task { ctx => @@ -21,28 +21,52 @@ object TextExtraction { _ <- ctx.logger.info("Starting text extraction") start <- Duration.stopTime[F] txt <- item.attachments.traverse( - extractTextIfEmpty(ctx, cfg, ctx.args.meta.language, item) + extractTextIfEmpty( + ctx, + cfg, + ctx.args.meta.language, + ctx.args.meta.collective, + item + ) ) - _ <- ctx.logger.debug("Storing extracted texts") - _ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm))) + _ <- ctx.logger.debug("Storing extracted texts") + _ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1))) + idxItem = + TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None) + _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*) dur <- start _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}") - } yield item.copy(metas = txt) + } yield item.copy(metas = txt.map(_._1)) } def extractTextIfEmpty[F[_]: Sync: ContextShift]( ctx: Context[F, _], cfg: ExtractConfig, lang: Language, + collective: Ident, item: ItemData - )(ra: RAttachment): F[RAttachmentMeta] = { + )(ra: RAttachment): F[(RAttachmentMeta, TextData)] = { + def makeTextData(rm: RAttachmentMeta): (RAttachmentMeta, TextData) = + ( + rm, + TextData.attachment( + item.item.id, + ra.id, + collective, + lang, + ra.name, + rm.content + ) + ) + val rm = item.findOrCreate(ra.id) rm.content match { case Some(_) => ctx.logger.info("TextExtraction skipped, since text is already available.") *> - rm.pure[F] + makeTextData(rm).pure[F] case None => extractTextToMeta[F](ctx, cfg, lang, item)(ra) + .map(makeTextData) } } diff --git a/modules/microsite/docs/demo.md b/modules/microsite/docs/demo.md index 3b8c237b..234b60f3 100644 --- a/modules/microsite/docs/demo.md +++ b/modules/microsite/docs/demo.md @@ -6,6 +6,17 @@ permalink: demo # {{ page.title }} +## Finding Items + + - +## Basic Idea (First Version) + + diff --git a/modules/microsite/docs/dev/adr/0014_fulltext_search_engine.md b/modules/microsite/docs/dev/adr/0014_fulltext_search_engine.md new file mode 100644 index 00000000..1719c7fc --- /dev/null +++ b/modules/microsite/docs/dev/adr/0014_fulltext_search_engine.md @@ -0,0 +1,49 @@ +--- +layout: docs +title: Fulltext Search Engine +--- + +# Choose a Fulltext Search Engine + +It should be possible to search the contents of all documents. + +## Context and Problem Statement + +To allow searching the documents contents efficiently, a separate +index is necessary. The "defacto standard" for fulltext search on the +JVM is something backed by [Lucene](https://lucene.apache.org). +Another option is to use a RDBMS that supports fulltext search. + +This adds another component to the mix, which increases the complexity +of the setup and the software. Since docspell works great without this +feature, it shouldn't have a huge impact on the application, i.e. if +the fulltext search component is down or broken, docspell should still +work (just the fulltext search is then not working). + +## Considered Options + +* [Apache SOLR](https://lucene.apache.org/solr) +* [ElasticSearch](https://www.elastic.co/elasticsearch/) +* [PostgreSQL](https://www.postgresql.org/docs/12/textsearch.html) +* All of them or a subset + +## Decision Outcome + +If docspell is running on PostgreSQL, it would be nice to also use it +for fulltext search to save the cost of running another component. But +I don't want to lock the database to PostgreSQL *only* because of the +fulltext search feature. + +ElasticSearch and Apache SOLR are quite similiar in features. SOLR is +part of Lucene and therefore lives in the Apache ecosystem. I would +choose SOLR over ElasticSearch, because I used it before. + +The last option (supporting all) is interesting, since it would enable +to use PostgreSQL for fulltext search for those that use PostgreSQL as +the database for docspell. + +In a first step, identify what docspell needs from a fulltext search +component and create this interface and an implementation for Apache +SOLR. This enables all users to use the fulltext search feature. As a +later step, an implementation based on PostgreSQL and/or ElasticSearch +could be provided, too. diff --git a/modules/microsite/docs/doc/configure.md b/modules/microsite/docs/doc/configure.md index ffcdc553..1f170670 100644 --- a/modules/microsite/docs/doc/configure.md +++ b/modules/microsite/docs/doc/configure.md @@ -72,6 +72,68 @@ H2 url = "jdbc:h2:///path/to/a/file.db;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;AUTO_SERVER=TRUE" ``` + +### Full-Text Search: SOLR + +[Apache SOLR](https://lucene.apache.org/solr) is used to provide the +full-text search. Both docspell components must provide the same +connection setup. This is defined in the `full-text-search.solr` +subsection: + +``` +... + full-text-search { + enabled = true + ... + solr = { + url = "http://localhost:8983/solr/docspell" + } + } +``` + +The default configuration at the end of this page contains more +information about each setting. + +The `solr.url` is the mandatory setting that you need to change to +point to your SOLR instance. Then you need to set the `enabled` flag +to `true`. + +When installing docspell manually, just install solr and create a core +as described in the [solr +documentation](https://lucene.apache.org/solr/guide/8_4/installing-solr.html). +That will provide you with the connection url (the last part is the +core name). + +While the `full-text-search.solr` options are the same for joex and +the restserver, there are some settings that differ. The restserver +has this additional setting, that may be of interest: + +``` +full-text-search { + recreate-key = "test123" +} +``` + +This key is required if you want docspell to drop and re-create the +entire index. This is possible via a REST call: + +``` shell +$ curl -XPOST http://localhost:7880/api/v1/open/fts/reIndexAll/test123 +``` + +Here the `test123` is the key defined with `recreate-key`. If it is +empty (the default), this REST call is disabled. Otherwise, the POST +request will submit a system task that is executed by a joex instance +eventually. + +Using this endpoint, the index will be re-created. This is sometimes +necessary, for example if you upgrade SOLR or delete the core to +provide a new one (see +[here](https://lucene.apache.org/solr/guide/8_4/reindexing.html) for +details). Note that a collective can also re-index their data using a +similiar endpoint; but this is only deleting their data and doesn't do +a full re-index. + ### Bind The host and port the http server binds to. This applies to both diff --git a/modules/microsite/docs/doc/curate.md b/modules/microsite/docs/doc/curate.md index 766fde1d..feb52c9e 100644 --- a/modules/microsite/docs/doc/curate.md +++ b/modules/microsite/docs/doc/curate.md @@ -1,6 +1,6 @@ --- layout: docs -title: Find and Review +title: Curate Metadata permalink: doc/curate --- diff --git a/modules/microsite/docs/doc/finding.md b/modules/microsite/docs/doc/finding.md index 99414ffa..69e4985e 100644 --- a/modules/microsite/docs/doc/finding.md +++ b/modules/microsite/docs/doc/finding.md @@ -6,9 +6,9 @@ permalink: doc/finding # {{ page.title }} -Items can be searched by their annotated meta data. The landing page -shows a list of current items. Items are displayed sorted by their -date, newest first. +Items can be searched by their annotated meta data and their contents +using full text search. The landing page shows a list of current +items. Items are displayed sorted by their date, newest first. Docspell has two modes for searching: a simple search bar and a search menu with many options. Both are active at the same time, but only one @@ -19,32 +19,51 @@ is visible. You can switch between them without affecting the results. -By default, the search bar is shown. It searches in the name -properties of the following meta data: +By default, the search bar is shown. It provides a refined view of the +search menu. The dropdown contains different options to do a quick +search. -- the item name -- the notes -- correspondent organization and person -- concerning person and equipment +### *All Names* and *Contents* -A wildcard `*` can be used at the start or end of a search term to do -a substring match. A `*` means "everything". So a term `*company` -matches all names ending in `company` and `*company*` matches all -names containing the word `company`. The matching is case insensitive. +These two options correspond to the same named field in the search +menu. If you switch between search menu and search bar (by clicking +the icon on the left), you'll see that they are the same fields. +Typing in the search bar also fills the corresponding field in the +search menu (and vice versa). -Docspell adds a `*` to the front and end of a term automatically, -unless one of the following is true: +- The *All Names* searches in the item name, item notes, names of + correspondent organization and person, and names of concering person + and equipment. It uses a simple substring search. +- The option *Contents* searches the contents of all attachments + (documents), attachment names, the item name and item notes. It uses + full text search. However, it does not search the names of attached + meta data. -- The term already has a wildcard. -- The term is enclosed in quotes `"`. +When searching with one of these fields active, it simply submits the +(hidden) search menu. So if the menu has other fields filled out, they +will affect the result, too. Using one of these fields, the bar is +just a reduced view of the search menu. -You can go to the search menu by clicking the left icon in the search -bar. +So you can choose tags or correspondents in the search menu and +further restrict the results using full text search. The results will +be returned sorted by the item date, newest first. -If the search bar shows a little blue bubble, it means that there are -more search fields filled out in the search menu. In this case the -results are not only restricted by the search term given in the -search-bar, but also by what is specified in the search menu. +If the left button in the search bar shows a little blue bubble, it +means that there are more search fields filled out in the search menu +that you currently can't see. In this case the results are not only +restricted by the search term given in the search-bar, but also by +what is specified in the search menu. + + +### *Contents Only* + +This option has no corresponding part in the search menu. Searching +with this option active, there is only a full text search done in the +attachments contents, attachment names, item name and item notes. + +The results are not ordered by item date, but by relevance with +respect to the search term. This ordering is returned from the full +text search engine and is simply transfered unmodified. ## Search Menu @@ -104,9 +123,61 @@ within this range. Items without a due date are not shown. Specify whether to show only incoming, only outgoing or all items. +## Customize Substring Search + +The substring search of the *All Names* and *Name* field can be +customized in the following way: A wildcard `*` can be used at the +start or end of a search term to do a substring match. A `*` means +"everything". So a term `*company` matches all names ending in +`company` and `*company*` matches all names containing the word +`company`. The matching is case insensitive. + +Docspell adds a `*` to the front and end of a term automatically, +unless one of the following is true: + +- The term already has a wildcard. +- The term is enclosed in quotes `"`. + + +## Full Text Search + + +### The Query + +The query string for full text search is very powerful. Docspell +currently supports [Apache SOLR](https://lucene.apache.org/solr/) as +full text search backend, so you may want to have a look at their +[documentation on query +syntax](https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing) +for a in depth guide. + +- Wildcards: `?` matches any single character, `*` matches zero or + more characters +- Fuzzy search: Appending a `~` to a term, results in a fuzzy search + (search this term and similiar spelled ones) +- Proximity Search: Search for terms that "near" each other, again + using `~` appended to a search phrase. Example: `"cheese cake"~5`. +- Boosting: apply more weight to a term with `^`. Example: `cheese^4 + cake` – cheese is 4x more important. + +Docspell will preprocess the search query to prepare a query for SOLR. +It will by default search all indexed fields, which are: attachment +contents, attachment names, item name and item notes. + + +### The Results + +When using full text search, each item in the result list is annotated +with the highlighted occurrence of the match. + +
+ +
+ + ## Screencast diff --git a/modules/microsite/docs/features.md b/modules/microsite/docs/features.md index 61694b2c..0adbe905 100644 --- a/modules/microsite/docs/features.md +++ b/modules/microsite/docs/features.md @@ -11,13 +11,18 @@ permalink: features account) - Handle multiple documents as one unit - OCR using [tesseract](https://github.com/tesseract-ocr/tesseract) +- [Full-Text Search](doc/finding#full-text-search) based on [Apache + SOLR](https://lucene.apache.org/solr) - Conversion to PDF: all files are converted into a PDF file - Non-destructive: all your uploaded files are never modified and can always be downloaded untouched - Text is analysed to find and attach meta data automatically - [Manage document processing](doc/processing): cancel jobs, set priorities -- Everything available via a documented [REST Api](api) +- Everything available via a [documented](https://www.openapis.org/) + [REST Api](api); allows to [generate + clients](https://openapi-generator.tech/docs/generators) for + (almost) any language - mobile-friendly Web-UI - [Create “share-urls”](doc/uploading#anonymous-upload) to upload files anonymously @@ -28,6 +33,8 @@ permalink: features - REST server and document processing are separate applications which can be scaled-out independently - Everything stored in a SQL database: PostgreSQL, MariaDB or H2 + - H2 is embedded, a "one-file-only" database, avoids installing db + servers - Files supported: - Documents: - PDF @@ -55,8 +62,7 @@ permalink: features # Limitations These are current known limitations that may be of interest for -considering docspell at the moment. Hopefully they will be resolved -eventually…. +considering docspell at the moment. -- No fulltext search implemented. This is now planned for an upcoming - release. +- Documents cannot be modified. +- You can remove and add documents but there is no versioning. diff --git a/modules/microsite/src/main/resources/microsite/data/menu.yml b/modules/microsite/src/main/resources/microsite/data/menu.yml index 8608ad37..e56dc416 100644 --- a/modules/microsite/src/main/resources/microsite/data/menu.yml +++ b/modules/microsite/src/main/resources/microsite/data/menu.yml @@ -33,15 +33,15 @@ options: - title: Finding Items url: doc/finding + - title: Curate Metadata + url: doc/curate + - title: Uploads url: doc/uploading - title: Processing Queue url: doc/processing - - title: Find and Review - url: doc/curate - - title: E-Mail Settings url: doc/emailsettings diff --git a/modules/microsite/src/main/resources/microsite/img/search-bar.png b/modules/microsite/src/main/resources/microsite/img/search-bar.png index 97ca1429..5ac9dc0b 100644 Binary files a/modules/microsite/src/main/resources/microsite/img/search-bar.png and b/modules/microsite/src/main/resources/microsite/img/search-bar.png differ diff --git a/modules/microsite/src/main/resources/microsite/img/search-content-results.png b/modules/microsite/src/main/resources/microsite/img/search-content-results.png new file mode 100644 index 00000000..754074f0 Binary files /dev/null and b/modules/microsite/src/main/resources/microsite/img/search-content-results.png differ diff --git a/modules/microsite/src/main/resources/microsite/img/search-menu.png b/modules/microsite/src/main/resources/microsite/img/search-menu.png index 0d23a880..3cab49cd 100644 Binary files a/modules/microsite/src/main/resources/microsite/img/search-menu.png and b/modules/microsite/src/main/resources/microsite/img/search-menu.png differ diff --git a/modules/microsite/src/main/resources/microsite/static/docspell-demo.webm b/modules/microsite/src/main/resources/microsite/static/docspell-demo.webm new file mode 100644 index 00000000..59f6230c Binary files /dev/null and b/modules/microsite/src/main/resources/microsite/static/docspell-demo.webm differ diff --git a/modules/microsite/src/main/resources/microsite/static/docspell-search-2020-06-24.webm b/modules/microsite/src/main/resources/microsite/static/docspell-search-2020-06-24.webm new file mode 100644 index 00000000..016a6076 Binary files /dev/null and b/modules/microsite/src/main/resources/microsite/static/docspell-search-2020-06-24.webm differ diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index 7159cd10..aaf52fb5 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -151,6 +151,46 @@ paths: application/json: schema: $ref: "#/components/schemas/BasicResult" + /open/fts/reIndexAll/{id}: + post: + tags: [Full-Text Index] + summary: Re-creates the full-text index. + description: | + Clears the full-text index and inserts all data from the + database. This migh take a while to complete. The response + returns immediately. A task is submitted that will be executed + by a job executor. Note that this affects all data of all + collectives. + + The `id` is required and refers to the key given in the config + file to ensure that only admins can call this route. + parameters: + - $ref: "#/components/parameters/id" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + + /sec/fts/reIndex: + post: + tags: [Full-Text Index] + summary: Re-creates the full-text index for the current collective + description: | + Clears the full-text index for all data belonging to the + current collective and inserts the data from the database. The + response is immediately returned and a task is submitted that + will be executed by a job executor. + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/BasicResult" + /sec/checkfile/{checksum}: get: tags: [ Upload ] @@ -987,9 +1027,13 @@ paths: summary: Search for items. description: | Search for items given a search form. The results are grouped - by month by default. Tags are *not* resolved! The results will - always contain an empty list for item tags. Use - `/searchWithTags` to also retrieve all tags of an item. + by month and are sorted by item date (newest first). Tags are + *not* resolved. The results will always contain an empty list + for item tags. Use `/searchWithTags` to also retrieve all tags + of an item. + + The `fulltext` field can be used to restrict the results by + using full-text search in the documents contents. security: - authTokenHeader: [] requestBody: @@ -1011,7 +1055,11 @@ paths: description: | Search for items given a search form. The results are grouped by month by default. For each item, its tags are also - returned. This uses more queries and is therefore slower. + returned. This uses more queries and is therefore slower, but + returns all tags to an item. + + The `fulltext` field can be used to restrict the results by + using full-text search in the documents contents. security: - authTokenHeader: [] requestBody: @@ -1026,6 +1074,37 @@ paths: application/json: schema: $ref: "#/components/schemas/ItemLightList" + /sec/item/searchIndex: + post: + tags: [ Item ] + summary: Search for items using full-text search only. + description: | + Search for items by only using the full-text search index. + + Unlike the other search routes, this one only asks the + full-text search index and returns only one group that + contains the results in the same order as given from the + index. Most full-text search engines use an ordering that + reflect the relevance wrt the search term. + + The other search routes always order the results by some + property (the item date) and thus the relevance ordering is + destroyed when using the full-text search. + security: + - authTokenHeader: [] + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/ItemFtsSearch" + responses: + 200: + description: Ok + content: + application/json: + schema: + $ref: "#/components/schemas/ItemLightList" + /sec/item/{id}: get: tags: [ Item ] @@ -2255,6 +2334,28 @@ paths: components: schemas: + ItemFtsSearch: + description: | + Query description for a full-text only search. + required: + - query + - offset + - limit + properties: + offset: + type: integer + format: int32 + limit: + type: integer + format: int32 + description: | + The maximum number of results to return. Note that this + limit is a soft limit, there is some hard limit on the + server, too. + query: + type: string + description: | + A query searching the contents of documents. MoveAttachment: description: | Data to move an attachment to another position. @@ -3386,6 +3487,10 @@ components: description: | Search in item names, correspondents, concerned entities and notes. + fullText: + type: string + description: | + A query searching the contents of documents. corrOrg: type: string format: ident @@ -3458,6 +3563,29 @@ components: type: array items: $ref: "#/components/schemas/Tag" + highlighting: + description: | + Optional contextual information of a search query. Each + item refers to some field where a search match was found + (e.g. the name of an attachment or the item notes) and a + list of lines giving surrounding context of the macth. + type: array + items: + $ref: "#/components/schemas/HighlightEntry" + HighlightEntry: + description: | + Highlighting information for a single field (maybe attachment + name or item notes). + required: + - name + - lines + properties: + name: + type: string + lines: + type: array + items: + type: string IdName: description: | The identifier and a human readable name of some entity. diff --git a/modules/restserver/src/main/resources/reference.conf b/modules/restserver/src/main/resources/reference.conf index 4b174649..1142c2cb 100644 --- a/modules/restserver/src/main/resources/reference.conf +++ b/modules/restserver/src/main/resources/reference.conf @@ -22,7 +22,7 @@ docspell.server { # within the client config, but it is restricted by the server to # the number defined here. An admin might choose a lower number # depending on the available resources. - max-item-page-size = 500 + max-item-page-size = 200 # Authentication. auth { @@ -84,6 +84,40 @@ docspell.server { } } + # Configuration of the full-text search engine. + full-text-search { + # The full-text search feature can be disabled. It requires an + # additional index server which needs additional memory and disk + # space. It can be enabled later any time. + # + # Currently the SOLR search platform is supported. + enabled = false + + # When re-creating the complete index via a REST call, this key + # is required. If left empty (the default), recreating the index + # is disabled. + # + # Example curl command: + # curl -XPOST http://localhost:7880/api/v1/open/fts/reIndexAll/test123 + recreate-key = "" + + # Configuration for the SOLR backend. + solr = { + # The URL to solr + url = "http://localhost:8983/solr/docspell" + # Used to tell solr when to commit the data + commit-within = 1000 + # If true, logs request and response bodies + log-verbose = false + # The defType parameter to lucene that defines the parser to + # use. You might want to try "edismax" or look here: + # https://lucene.apache.org/solr/guide/8_4/query-syntax-and-parsing.html#query-syntax-and-parsing + def-type = "lucene" + # The default combiner for tokens. One of {AND, OR}. + q-op = "OR" + } + } + # Configuration for the backend. backend { # Enable or disable debugging for e-mail related functionality. This diff --git a/modules/restserver/src/main/scala/docspell/restserver/Config.scala b/modules/restserver/src/main/scala/docspell/restserver/Config.scala index f971cc63..165f6822 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/Config.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/Config.scala @@ -1,9 +1,10 @@ package docspell.restserver import java.net.InetAddress +import docspell.common._ import docspell.backend.auth.Login import docspell.backend.{Config => BackendConfig} -import docspell.common._ +import docspell.ftssolr.SolrConfig case class Config( appName: String, @@ -13,7 +14,8 @@ case class Config( backend: BackendConfig, auth: Login.Config, integrationEndpoint: Config.IntegrationEndpoint, - maxItemPageSize: Int + maxItemPageSize: Int, + fullTextSearch: Config.FullTextSearch ) object Config { @@ -50,4 +52,9 @@ object Config { } } } + + case class FullTextSearch(enabled: Boolean, recreateKey: Ident, solr: SolrConfig) + + object FullTextSearch {} + } diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala b/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala index b2662cd3..220bf066 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestAppImpl.scala @@ -2,8 +2,11 @@ package docspell.restserver import cats.implicits._ import cats.effect._ +import org.http4s.client.Client import docspell.backend.BackendApp import docspell.common.NodeType +import docspell.ftsclient.FtsClient +import docspell.ftssolr.SolrFtsClient import scala.concurrent.ExecutionContext @@ -26,9 +29,16 @@ object RestAppImpl { blocker: Blocker ): Resource[F, RestApp[F]] = for { - backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker) + backend <- BackendApp(cfg.backend, connectEC, httpClientEc, blocker)( + createFtsClient[F](cfg) + ) app = new RestAppImpl[F](cfg, backend) appR <- Resource.make(app.init.map(_ => app))(_.shutdown) } yield appR + private def createFtsClient[F[_]: ConcurrentEffect: ContextShift]( + cfg: Config + )(client: Client[F]): Resource[F, FtsClient[F]] = + if (cfg.fullTextSearch.enabled) SolrFtsClient(cfg.fullTextSearch.solr, client) + else Resource.pure[F, FtsClient[F]](FtsClient.none[F]) } diff --git a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala index e0495298..b8f0a700 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/RestServer.scala @@ -78,7 +78,8 @@ object RestServer { "email/sent" -> SentMailRoutes(restApp.backend, token), "usertask/notifydueitems" -> NotifyDueItemsRoutes(cfg, restApp.backend, token), "usertask/scanmailbox" -> ScanMailboxRoutes(restApp.backend, token), - "calevent/check" -> CalEventCheckRoutes() + "calevent/check" -> CalEventCheckRoutes(), + "fts" -> FullTextIndexRoutes.secured(cfg, restApp.backend, token) ) def openRoutes[F[_]: Effect](cfg: Config, restApp: RestApp[F]): HttpRoutes[F] = @@ -87,7 +88,8 @@ object RestServer { "signup" -> RegisterRoutes(restApp.backend, cfg), "upload" -> UploadRoutes.open(restApp.backend, cfg), "checkfile" -> CheckFileRoutes.open(restApp.backend), - "integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg) + "integration" -> IntegrationEndpointRoutes.open(restApp.backend, cfg), + "fts" -> FullTextIndexRoutes.open(cfg, restApp.backend) ) def redirectTo[F[_]: Effect](path: String): HttpRoutes[F] = { diff --git a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala index e43300f0..86296a15 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/conv/Conversions.scala @@ -14,8 +14,9 @@ import bitpeace.FileMeta import docspell.backend.ops.OCollective.{InsightData, PassChangeResult} import docspell.backend.ops.OJob.JobCancelResult import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult} -import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload} +import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload} import docspell.store.AddResult +import docspell.ftsclient.FtsResult import org.http4s.multipart.Multipart import org.http4s.headers.`Content-Type` import org.log4s.Logger @@ -124,6 +125,7 @@ trait Conversions { m.dueDateFrom, m.dueDateUntil, m.allNames, + None, None ) @@ -138,6 +140,17 @@ trait Conversions { ItemLightList(gs) } + def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = { + val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7)) + + def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup = + ItemLightGroup(g._1, g._2.map(mkItemLight).toList) + + val gs = + groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0) + ItemLightList(gs) + } + def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = { val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7)) @@ -149,6 +162,21 @@ trait Conversions { ItemLightList(gs) } + def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = { + val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7)) + + def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup = + ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList) + + val gs = + groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0) + ItemLightList(gs) + } + + def mkItemListWithTagsFtsPlain(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = + if (v.isEmpty) ItemLightList(Nil) + else ItemLightList(List(ItemLightGroup("Results", v.map(mkItemLightWithTags).toList))) + def mkItemLight(i: OItemSearch.ListItem): ItemLight = ItemLight( i.id, @@ -163,12 +191,35 @@ trait Conversions { i.concPerson.map(mkIdName), i.concEquip.map(mkIdName), i.fileCount, + Nil, Nil ) + def mkItemLight(i: OFulltext.FtsItem): ItemLight = { + val il = mkItemLight(i.item) + val highlight = mkHighlight(i.ftsData) + il.copy(highlighting = highlight) + } + def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight = mkItemLight(i.item).copy(tags = i.tags.map(mkTag)) + def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = { + val il = mkItemLightWithTags(i.item) + val highlight = mkHighlight(i.ftsData) + il.copy(highlighting = highlight) + } + + private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] = + ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi => + fdi.matchData match { + case FtsResult.AttachmentData(_, aName) => + HighlightEntry(aName, fdi.context) + case FtsResult.ItemData => + HighlightEntry("Item", fdi.context) + } + } + // job def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = { def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = { diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/FullTextIndexRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/FullTextIndexRoutes.scala new file mode 100644 index 00000000..9a8d2c7a --- /dev/null +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/FullTextIndexRoutes.scala @@ -0,0 +1,60 @@ +package docspell.restserver.routes + +import cats.effect._ +import cats.implicits._ +import cats.data.OptionT +import org.http4s._ +//import org.http4s.circe.CirceEntityDecoder._ +import org.http4s.circe.CirceEntityEncoder._ +import org.http4s.dsl.Http4sDsl + +import docspell.common._ +import docspell.backend.BackendApp +import docspell.backend.auth.AuthToken +import docspell.restserver.Config +import docspell.restserver.conv.Conversions + +object FullTextIndexRoutes { + + def secured[F[_]: Effect]( + cfg: Config, + backend: BackendApp[F], + user: AuthToken + ): HttpRoutes[F] = + if (!cfg.fullTextSearch.enabled) notFound[F] + else { + val dsl = Http4sDsl[F] + import dsl._ + + HttpRoutes.of { + case POST -> Root / "reIndex" => + for { + res <- backend.fulltext.reindexCollective(user.account).attempt + resp <- + Ok(Conversions.basicResult(res, "Full-text index will be re-created.")) + } yield resp + } + } + + def open[F[_]: Effect](cfg: Config, backend: BackendApp[F]): HttpRoutes[F] = + if (!cfg.fullTextSearch.enabled) notFound[F] + else { + val dsl = Http4sDsl[F] + import dsl._ + + HttpRoutes.of { + case POST -> Root / "reIndexAll" / Ident(id) => + for { + res <- + if (id.nonEmpty && id == cfg.fullTextSearch.recreateKey) + backend.fulltext.reindexAll.attempt + else Left(new Exception("The provided key is invalid.")).pure[F] + resp <- + Ok(Conversions.basicResult(res, "Full-text index will be re-created.")) + } yield resp + } + } + + private def notFound[F[_]: Effect]: HttpRoutes[F] = + HttpRoutes(_ => OptionT.pure(Response.notFound[F])) +} diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala index a33017fb..03e8d7ae 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/ItemRoutes.scala @@ -5,6 +5,7 @@ import cats.implicits._ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.backend.ops.OItemSearch.Batch +import docspell.backend.ops.OFulltext import docspell.common.{Ident, ItemState} import org.http4s.HttpRoutes import org.http4s.dsl.Http4sDsl @@ -34,11 +35,25 @@ object ItemRoutes { _ <- logger.ftrace(s"Got search mask: $mask") query = Conversions.mkQuery(mask, user.account.collective) _ <- logger.ftrace(s"Running query: $query") - items <- backend.itemSearch.findItems( - query, - Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) - ) - resp <- Ok(Conversions.mkItemList(items)) + resp <- mask.fullText match { + case Some(fq) if cfg.fullTextSearch.enabled => + for { + items <- backend.fulltext.findItems( + query, + OFulltext.FtsInput(fq), + Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) + ) + ok <- Ok(Conversions.mkItemListFts(items)) + } yield ok + case _ => + for { + items <- backend.itemSearch.findItems( + query, + Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) + ) + ok <- Ok(Conversions.mkItemList(items)) + } yield ok + } } yield resp case req @ POST -> Root / "searchWithTags" => @@ -47,11 +62,45 @@ object ItemRoutes { _ <- logger.ftrace(s"Got search mask: $mask") query = Conversions.mkQuery(mask, user.account.collective) _ <- logger.ftrace(s"Running query: $query") - items <- backend.itemSearch.findItemsWithTags( - query, - Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) - ) - resp <- Ok(Conversions.mkItemListWithTags(items)) + resp <- mask.fullText match { + case Some(fq) if cfg.fullTextSearch.enabled => + for { + items <- backend.fulltext.findItemsWithTags( + query, + OFulltext.FtsInput(fq), + Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) + ) + ok <- Ok(Conversions.mkItemListWithTagsFts(items)) + } yield ok + case _ => + for { + items <- backend.itemSearch.findItemsWithTags( + query, + Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) + ) + ok <- Ok(Conversions.mkItemListWithTags(items)) + } yield ok + } + } yield resp + + case req @ POST -> Root / "searchIndex" => + for { + mask <- req.as[ItemFtsSearch] + resp <- mask.query match { + case q if q.length > 1 => + val ftsIn = OFulltext.FtsInput(q) + for { + items <- backend.fulltext.findIndexOnly( + ftsIn, + user.account.collective, + Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) + ) + ok <- Ok(Conversions.mkItemListWithTagsFtsPlain(items)) + } yield ok + + case _ => + BadRequest(BasicResult(false, "Query string too short")) + } } yield resp case GET -> Root / Ident(id) => diff --git a/modules/restserver/src/main/scala/docspell/restserver/webapp/Flags.scala b/modules/restserver/src/main/scala/docspell/restserver/webapp/Flags.scala index 0dea6b6f..0ceb466c 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/webapp/Flags.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/webapp/Flags.scala @@ -13,7 +13,8 @@ case class Flags( baseUrl: LenientUri, signupMode: SignupConfig.Mode, docspellAssetPath: String, - integrationEnabled: Boolean + integrationEnabled: Boolean, + fullTextSearchEnabled: Boolean ) object Flags { @@ -23,7 +24,8 @@ object Flags { cfg.baseUrl, cfg.backend.signup.mode, s"/app/assets/docspell-webapp/${BuildInfo.version}", - cfg.integrationEndpoint.enabled + cfg.integrationEndpoint.enabled, + cfg.fullTextSearch.enabled ) implicit val jsonEncoder: Encoder[Flags] = diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.7.0__fts-migration.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.7.0__fts-migration.sql new file mode 100644 index 00000000..fce07b37 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.7.0__fts-migration.sql @@ -0,0 +1,10 @@ +CREATE TABLE `fts_migration` ( + `id` varchar(254) not null primary key, + `version` int not null, + `fts_engine` varchar(254) not null, + `description` varchar(254) not null, + `created` timestamp not null +); + +CREATE UNIQUE INDEX `fts_migration_version_engine_idx` +ON `fts_migration`(`version`, `fts_engine`); diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.7.0__fts-migration.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.7.0__fts-migration.sql new file mode 100644 index 00000000..f78fd7eb --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.7.0__fts-migration.sql @@ -0,0 +1,10 @@ +CREATE TABLE "fts_migration" ( + "id" varchar(254) not null primary key, + "version" int not null, + "fts_engine" varchar(254) not null, + "description" varchar(254) not null, + "created" timestamp not null +); + +CREATE UNIQUE INDEX "fts_migration_version_engine_idx" +ON "fts_migration"("version", "fts_engine"); diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index 56b2fffe..90060fba 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -6,7 +6,7 @@ import cats.effect.Sync import cats.data.OptionT import doobie._ import doobie.implicits._ -import docspell.common.{Ident, MetaProposalList} +import docspell.common._ import docspell.store.Store import docspell.store.impl.Implicits._ import docspell.store.records._ @@ -138,4 +138,39 @@ object QAttachment { q.query[RAttachmentMeta].option } + + case class ContentAndName( + id: Ident, + item: Ident, + collective: Ident, + lang: Language, + name: Option[String], + content: Option[String] + ) + def allAttachmentMetaAndName( + coll: Option[Ident], + chunkSize: Int + ): Stream[ConnectionIO, ContentAndName] = { + val aId = RAttachment.Columns.id.prefix("a") + val aItem = RAttachment.Columns.itemId.prefix("a") + val aName = RAttachment.Columns.name.prefix("a") + val mId = RAttachmentMeta.Columns.id.prefix("m") + val mContent = RAttachmentMeta.Columns.content.prefix("m") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + val cId = RCollective.Columns.id.prefix("c") + val cLang = RCollective.Columns.language.prefix("c") + + val cols = Seq(aId, aItem, iColl, cLang, aName, mContent) + val from = RAttachment.table ++ fr"a INNER JOIN" ++ + RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++ + fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++ + fr"INNER JOIN" ++ RCollective.table ++ fr"c ON" ++ cId.is(iColl) + + val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty) + + selectSimple(cols, from, where) + .query[ContentAndName] + .streamWithChunkSize(chunkSize) + } } diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index bad0aa59..a5a72ff2 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -3,6 +3,7 @@ package docspell.store.queries import bitpeace.FileMeta import cats.effect.Sync import cats.data.OptionT +import cats.data.NonEmptyList import cats.implicits._ import cats.effect.concurrent.Ref import fs2.Stream @@ -165,6 +166,7 @@ object QItem { dueDateFrom: Option[Timestamp], dueDateTo: Option[Timestamp], allNames: Option[String], + itemIds: Option[Set[Ident]], orderAsc: Option[RItem.Columns.type => Column] ) @@ -186,6 +188,7 @@ object QItem { None, None, None, + None, None ) } @@ -193,6 +196,12 @@ object QItem { case class Batch(offset: Int, limit: Int) { def restrictLimitTo(n: Int): Batch = Batch(offset, math.min(n, limit)) + + def next: Batch = + Batch(offset + limit, limit) + + def first: Batch = + Batch(0, limit) } object Batch { @@ -205,7 +214,12 @@ object QItem { Batch(0, c) } - def findItems(q: Query, batch: Batch): Stream[ConnectionIO, ListItem] = { + private def findItemsBase( + q: Query, + distinct: Boolean, + moreCols: Seq[Fragment], + ctes: (String, Fragment)* + ): Fragment = { val IC = RItem.Columns val AC = RAttachment.Columns val PC = RPerson.Columns @@ -217,29 +231,31 @@ object QItem { val equipCols = List(REquipment.Columns.eid, REquipment.Columns.name) val finalCols = commas( - IC.id.prefix("i").f, - IC.name.prefix("i").f, - IC.state.prefix("i").f, - coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f), - IC.dueDate.prefix("i").f, - IC.source.prefix("i").f, - IC.incoming.prefix("i").f, - IC.created.prefix("i").f, - fr"COALESCE(a.num, 0)", - OC.oid.prefix("o0").f, - OC.name.prefix("o0").f, - PC.pid.prefix("p0").f, - PC.name.prefix("p0").f, - PC.pid.prefix("p1").f, - PC.name.prefix("p1").f, - EC.eid.prefix("e1").f, - EC.name.prefix("e1").f, - q.orderAsc match { - case Some(co) => - coalesce(co(IC).prefix("i").f, IC.created.prefix("i").f) - case None => - IC.created.prefix("i").f - } + Seq( + IC.id.prefix("i").f, + IC.name.prefix("i").f, + IC.state.prefix("i").f, + coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f), + IC.dueDate.prefix("i").f, + IC.source.prefix("i").f, + IC.incoming.prefix("i").f, + IC.created.prefix("i").f, + fr"COALESCE(a.num, 0)", + OC.oid.prefix("o0").f, + OC.name.prefix("o0").f, + PC.pid.prefix("p0").f, + PC.name.prefix("p0").f, + PC.pid.prefix("p1").f, + PC.name.prefix("p1").f, + EC.eid.prefix("e1").f, + EC.name.prefix("e1").f, + q.orderAsc match { + case Some(co) => + coalesce(co(IC).prefix("i").f, IC.created.prefix("i").f) + case None => + IC.created.prefix("i").f + } + ) ++ moreCols ) val withItem = selectSimple(itemCols, RItem.table, IC.cid.is(q.collective)) @@ -249,19 +265,32 @@ object QItem { val withAttach = fr"SELECT COUNT(" ++ AC.id.f ++ fr") as num, " ++ AC.itemId.f ++ fr"from" ++ RAttachment.table ++ fr"GROUP BY (" ++ AC.itemId.f ++ fr")" + val selectKW = if (distinct) fr"SELECT DISTINCT" else fr"SELECT" val query = withCTE( - "items" -> withItem, - "persons" -> withPerson, - "orgs" -> withOrgs, - "equips" -> withEquips, - "attachs" -> withAttach + (Seq( + "items" -> withItem, + "persons" -> withPerson, + "orgs" -> withOrgs, + "equips" -> withEquips, + "attachs" -> withAttach + ) ++ ctes): _* ) ++ - fr"SELECT DISTINCT" ++ finalCols ++ fr" FROM items i" ++ + selectKW ++ finalCols ++ fr" FROM items i" ++ fr"LEFT JOIN attachs a ON" ++ IC.id.prefix("i").is(AC.itemId.prefix("a")) ++ fr"LEFT JOIN persons p0 ON" ++ IC.corrPerson.prefix("i").is(PC.pid.prefix("p0")) ++ fr"LEFT JOIN orgs o0 ON" ++ IC.corrOrg.prefix("i").is(OC.oid.prefix("o0")) ++ fr"LEFT JOIN persons p1 ON" ++ IC.concPerson.prefix("i").is(PC.pid.prefix("p1")) ++ fr"LEFT JOIN equips e1 ON" ++ IC.concEquipment.prefix("i").is(EC.eid.prefix("e1")) + query + } + + def findItems(q: Query, batch: Batch): Stream[ConnectionIO, ListItem] = { + val IC = RItem.Columns + val PC = RPerson.Columns + val OC = ROrganization.Columns + val EC = REquipment.Columns + + val query = findItemsBase(q, true, Seq.empty) // inclusive tags are AND-ed val tagSelectsIncl = q.tagsInclude @@ -326,7 +355,15 @@ object QItem { ) .getOrElse(Fragment.empty), q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty), - q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty) + q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty), + q.itemIds + .map(ids => + NonEmptyList + .fromList(ids.toList) + .map(nel => IC.id.prefix("i").isIn(nel)) + .getOrElse(IC.id.prefix("i").is("")) + ) + .getOrElse(Fragment.empty) ) val order = q.orderAsc match { @@ -347,14 +384,39 @@ object QItem { frag.query[ListItem].stream } + case class SelectedItem(itemId: Ident, weight: Double) + def findSelectedItems( + q: Query, + items: Set[SelectedItem] + ): Stream[ConnectionIO, ListItem] = + if (items.isEmpty) Stream.empty + else { + val IC = RItem.Columns + val values = items + .map(it => fr"(${it.itemId}, ${it.weight})") + .reduce((r, e) => r ++ fr"," ++ e) + + val from = findItemsBase( + q, + true, + Seq(fr"tids.weight"), + ("tids(item_id, weight)", fr"(VALUES" ++ values ++ fr")") + ) ++ + fr"INNER JOIN tids ON" ++ IC.id.prefix("i").f ++ fr" = tids.item_id" ++ + fr"ORDER BY tids.weight DESC" + + logger.trace(s"fts query: $from") + from.query[ListItem].stream + } + case class ListItemWithTags(item: ListItem, tags: List[RTag]) /** Same as `findItems` but resolves the tags for each item. Note that * this is implemented by running an additional query per item. */ def findItemsWithTags( - q: Query, - batch: Batch + collective: Ident, + search: Stream[ConnectionIO, ListItem] ): Stream[ConnectionIO, ListItemWithTags] = { def findTag( cache: Ref[ConnectionIO, Map[Ident, RTag]], @@ -377,19 +439,20 @@ object QItem { for { resolvedTags <- Stream.eval(Ref.of[ConnectionIO, Map[Ident, RTag]](Map.empty)) - item <- findItems(q, batch) + item <- search tagItems <- Stream.eval(RTagItem.findByItem(item.id)) tags <- Stream.eval(tagItems.traverse(ti => findTag(resolvedTags, ti))) - ftags = tags.flatten.filter(t => t.collective == q.collective) + ftags = tags.flatten.filter(t => t.collective == collective) } yield ListItemWithTags(item, ftags.toList.sortBy(_.name)) } def delete[F[_]: Sync](store: Store[F])(itemId: Ident, collective: Ident): F[Int] = for { - tn <- store.transact(RTagItem.deleteItemTags(itemId)) rn <- QAttachment.deleteItemAttachments(store)(itemId, collective) + tn <- store.transact(RTagItem.deleteItemTags(itemId)) + mn <- store.transact(RSentMail.deleteByItem(itemId)) n <- store.transact(RItem.deleteByIdAndCollective(itemId, collective)) - } yield tn + rn + n + } yield tn + rn + n + mn def findByFileIds(fileMetaIds: Seq[Ident]): ConnectionIO[Vector[RItem]] = { val IC = RItem.Columns @@ -455,4 +518,25 @@ object QItem { prefix(suffix(value)) } + final case class NameAndNotes( + id: Ident, + collective: Ident, + name: String, + notes: Option[String] + ) + def allNameAndNotes( + coll: Option[Ident], + chunkSize: Int + ): Stream[ConnectionIO, NameAndNotes] = { + val iId = RItem.Columns.id + val iColl = RItem.Columns.cid + val iName = RItem.Columns.name + val iNotes = RItem.Columns.notes + + val cols = Seq(iId, iColl, iName, iNotes) + val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty) + selectSimple(cols, RItem.table, where) + .query[NameAndNotes] + .streamWithChunkSize(chunkSize) + } } diff --git a/modules/store/src/main/scala/docspell/store/queries/QPeriodicTask.scala b/modules/store/src/main/scala/docspell/store/queries/QPeriodicTask.scala index e52c27cc..21624f7a 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QPeriodicTask.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QPeriodicTask.scala @@ -51,15 +51,4 @@ object QPeriodicTask { selectSimple(RPeriodicTask.Columns.all, RPeriodicTask.table, where) ++ order sql.query[RPeriodicTask].streamWithChunkSize(2).take(1).compile.last } - - def findNonFinal(pid: Ident): ConnectionIO[Option[RJob]] = - selectSimple( - RJob.Columns.all, - RJob.table, - and( - RJob.Columns.tracker.is(pid), - RJob.Columns.state.isOneOf(JobState.all.diff(JobState.done).toSeq) - ) - ).query[RJob].option - } diff --git a/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala b/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala index e77fa755..2fa60d91 100644 --- a/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala +++ b/modules/store/src/main/scala/docspell/store/queue/JobQueue.scala @@ -11,8 +11,19 @@ import org.log4s._ trait JobQueue[F[_]] { + /** Inserts the job into the queue to get picked up as soon as + * possible. The job must have a new unique id. + */ def insert(job: RJob): F[Unit] + /** Inserts the job into the queue only, if there is no job with the + * same tracker-id running at the moment. The job id must be a new + * unique id. + * + * If the job has no tracker defined, it is simply inserted. + */ + def insertIfNew(job: RJob): F[Unit] + def insertAll(jobs: Seq[RJob]): F[Unit] def nextJob( @@ -46,6 +57,19 @@ object JobQueue { else ().pure[F] } + def insertIfNew(job: RJob): F[Unit] = + for { + rj <- job.tracker match { + case Some(tid) => + store.transact(RJob.findNonFinalByTracker(tid)) + case None => + None.pure[F] + } + ret <- + if (rj.isDefined) ().pure[F] + else insert(job) + } yield ret + def insertAll(jobs: Seq[RJob]): F[Unit] = jobs.toList .traverse(j => insert(j).attempt) diff --git a/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala b/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala index c94458bb..16350602 100644 --- a/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala +++ b/modules/store/src/main/scala/docspell/store/queue/PeriodicTaskStore.scala @@ -100,7 +100,7 @@ object PeriodicTaskStore { } def findNonFinalJob(pjobId: Ident): F[Option[RJob]] = - store.transact(QPeriodicTask.findNonFinal(pjobId)) + store.transact(RJob.findNonFinalByTracker(pjobId)) def insert(task: RPeriodicTask): F[Unit] = { val update = store.transact(RPeriodicTask.update(task)) diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala index bc1fa8f4..e11c76b7 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala @@ -183,4 +183,6 @@ object RAttachment { n2 <- deleteFrom(table, id.is(attachId)).update.run } yield n0 + n1 + n2 + def findItemId(attachId: Ident): ConnectionIO[Option[Ident]] = + selectSimple(Seq(itemId), table, id.is(attachId)).query[Ident].option } diff --git a/modules/store/src/main/scala/docspell/store/records/RFtsMigration.scala b/modules/store/src/main/scala/docspell/store/records/RFtsMigration.scala new file mode 100644 index 00000000..edd47c1c --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/records/RFtsMigration.scala @@ -0,0 +1,59 @@ +package docspell.store.records + +import cats.implicits._ +import cats.effect._ +import doobie._ +import doobie.implicits._ +import docspell.common._ +import docspell.store.impl._ +import docspell.store.impl.Implicits._ + +final case class RFtsMigration( + id: Ident, + version: Int, + ftsEngine: Ident, + description: String, + created: Timestamp +) + +object RFtsMigration { + + def create[F[_]: Sync]( + version: Int, + ftsEngine: Ident, + description: String + ): F[RFtsMigration] = + for { + newId <- Ident.randomId[F] + now <- Timestamp.current[F] + } yield RFtsMigration(newId, version, ftsEngine, description, now) + + val table = fr"fts_migration" + + object Columns { + val id = Column("id") + val version = Column("version") + val ftsEngine = Column("fts_engine") + val description = Column("description") + val created = Column("created") + + val all = List(id, version, ftsEngine, description, created) + } + import Columns._ + + def insert(v: RFtsMigration): ConnectionIO[Int] = + insertRow( + table, + all, + fr"${v.id},${v.version},${v.ftsEngine},${v.description},${v.created}" + ).updateWithLogHandler(LogHandler.nop).run + + def exists(vers: Int, engine: Ident): ConnectionIO[Boolean] = + selectCount(id, table, and(version.is(vers), ftsEngine.is(engine))) + .query[Int] + .unique + .map(_ > 0) + + def deleteById(rId: Ident): ConnectionIO[Int] = + deleteFrom(table, id.is(rId)).update.run +} diff --git a/modules/store/src/main/scala/docspell/store/records/RJob.scala b/modules/store/src/main/scala/docspell/store/records/RJob.scala index 9a286743..9f607c5c 100644 --- a/modules/store/src/main/scala/docspell/store/records/RJob.scala +++ b/modules/store/src/main/scala/docspell/store/records/RJob.scala @@ -255,4 +255,12 @@ object RJob { .map(_ => 1) .compile .foldMonoid + + def findNonFinalByTracker(trackerId: Ident): ConnectionIO[Option[RJob]] = + selectSimple( + all, + table, + and(tracker.is(trackerId), state.isOneOf(JobState.all.diff(JobState.done).toSeq)) + ).query[RJob].option + } diff --git a/modules/store/src/main/scala/docspell/store/records/RSentMail.scala b/modules/store/src/main/scala/docspell/store/records/RSentMail.scala index 4d02a8bd..461fdc23 100644 --- a/modules/store/src/main/scala/docspell/store/records/RSentMail.scala +++ b/modules/store/src/main/scala/docspell/store/records/RSentMail.scala @@ -3,6 +3,7 @@ package docspell.store.records import fs2.Stream import cats.effect._ import cats.implicits._ +import cats.data.NonEmptyList import doobie._ import doobie.implicits._ import docspell.common._ @@ -115,4 +116,15 @@ object RSentMail { def delete(mailId: Ident): ConnectionIO[Int] = deleteFrom(table, id.is(mailId)).update.run + + def deleteByItem(item: Ident): ConnectionIO[Int] = + for { + list <- RSentMailItem.findSentMailIdsByItem(item) + n1 <- RSentMailItem.deleteAllByItem(item) + n0 <- NonEmptyList.fromList(list.toList) match { + case Some(nel) => deleteFrom(table, id.isIn(nel)).update.run + case None => 0.pure[ConnectionIO] + } + } yield n0 + n1 + } diff --git a/modules/store/src/main/scala/docspell/store/records/RSentMailItem.scala b/modules/store/src/main/scala/docspell/store/records/RSentMailItem.scala index 5f796c6e..41fbcf90 100644 --- a/modules/store/src/main/scala/docspell/store/records/RSentMailItem.scala +++ b/modules/store/src/main/scala/docspell/store/records/RSentMailItem.scala @@ -54,4 +54,10 @@ object RSentMailItem { def deleteMail(mailId: Ident): ConnectionIO[Int] = deleteFrom(table, sentMailId.is(mailId)).update.run + + def findSentMailIdsByItem(item: Ident): ConnectionIO[Set[Ident]] = + selectSimple(Seq(sentMailId), table, itemId.is(item)).query[Ident].to[Set] + + def deleteAllByItem(item: Ident): ConnectionIO[Int] = + deleteFrom(table, itemId.is(item)).update.run } diff --git a/modules/webapp/src/main/elm/Api.elm b/modules/webapp/src/main/elm/Api.elm index edd76b96..b84880b0 100644 --- a/modules/webapp/src/main/elm/Api.elm +++ b/modules/webapp/src/main/elm/Api.elm @@ -45,6 +45,7 @@ module Api exposing , getTags , getUsers , itemDetail + , itemIndexSearch , itemSearch , login , loginSession @@ -77,6 +78,7 @@ module Api exposing , setUnconfirmed , startOnceNotifyDueItems , startOnceScanMailbox + , startReIndex , submitNotifyDueItems , updateNotifyDueItems , updateScanMailbox @@ -104,6 +106,7 @@ import Api.Model.ImapSettings exposing (ImapSettings) import Api.Model.ImapSettingsList exposing (ImapSettingsList) import Api.Model.InviteResult exposing (InviteResult) import Api.Model.ItemDetail exposing (ItemDetail) +import Api.Model.ItemFtsSearch exposing (ItemFtsSearch) import Api.Model.ItemInsights exposing (ItemInsights) import Api.Model.ItemLightList exposing (ItemLightList) import Api.Model.ItemProposals exposing (ItemProposals) @@ -147,6 +150,20 @@ import Util.Http as Http2 +--- Full-Text + + +startReIndex : Flags -> (Result Http.Error BasicResult -> msg) -> Cmd msg +startReIndex flags receive = + Http2.authPost + { url = flags.config.baseUrl ++ "/api/v1/sec/fts/reIndex" + , account = getAccount flags + , body = Http.emptyBody + , expect = Http.expectJson receive Api.Model.BasicResult.decoder + } + + + --- Scan Mailboxes @@ -1092,6 +1109,20 @@ moveAttachmentBefore flags itemId data receive = } +itemIndexSearch : + Flags + -> ItemFtsSearch + -> (Result Http.Error ItemLightList -> msg) + -> Cmd msg +itemIndexSearch flags query receive = + Http2.authPost + { url = flags.config.baseUrl ++ "/api/v1/sec/item/searchIndex" + , account = getAccount flags + , body = Http.jsonBody (Api.Model.ItemFtsSearch.encode query) + , expect = Http.expectJson receive Api.Model.ItemLightList.decoder + } + + itemSearch : Flags -> ItemSearch -> (Result Http.Error ItemLightList -> msg) -> Cmd msg itemSearch flags search receive = Http2.authPost diff --git a/modules/webapp/src/main/elm/App/View.elm b/modules/webapp/src/main/elm/App/View.elm index bc6a460b..bbf90084 100644 --- a/modules/webapp/src/main/elm/App/View.elm +++ b/modules/webapp/src/main/elm/App/View.elm @@ -170,7 +170,7 @@ viewLogin model = viewHome : Model -> Html Msg viewHome model = - Html.map HomeMsg (Page.Home.View.view model.uiSettings model.homeModel) + Html.map HomeMsg (Page.Home.View.view model.flags model.uiSettings model.homeModel) menuEntry : Model -> Page -> List (Html Msg) -> Html Msg diff --git a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm index 74a7540a..04fbe4de 100644 --- a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm +++ b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm @@ -7,6 +7,8 @@ module Comp.CollectiveSettingsForm exposing , view ) +import Api +import Api.Model.BasicResult exposing (BasicResult) import Api.Model.CollectiveSettings exposing (CollectiveSettings) import Comp.Dropdown import Data.Flags exposing (Flags) @@ -14,13 +16,17 @@ import Data.Language exposing (Language) import Data.UiSettings exposing (UiSettings) import Html exposing (..) import Html.Attributes exposing (..) -import Html.Events exposing (onCheck) +import Html.Events exposing (onCheck, onClick, onInput) +import Http +import Util.Http type alias Model = { langModel : Comp.Dropdown.Model Language , intEnabled : Bool , initSettings : CollectiveSettings + , fullTextConfirmText : String + , fullTextReIndexResult : Maybe BasicResult } @@ -44,6 +50,8 @@ init settings = } , intEnabled = settings.integrationEnabled , initSettings = settings + , fullTextConfirmText = "" + , fullTextReIndexResult = Nothing } @@ -61,10 +69,13 @@ getSettings model = type Msg = LangDropdownMsg (Comp.Dropdown.Msg Language) | ToggleIntegrationEndpoint + | SetFullTextConfirm String + | TriggerReIndex + | TriggerReIndexResult (Result Http.Error BasicResult) update : Flags -> Msg -> Model -> ( Model, Cmd Msg, Maybe CollectiveSettings ) -update _ msg model = +update flags msg model = case msg of LangDropdownMsg m -> let @@ -90,17 +101,70 @@ update _ msg model = in ( nextModel, Cmd.none, Just (getSettings nextModel) ) + SetFullTextConfirm str -> + ( { model | fullTextConfirmText = str }, Cmd.none, Nothing ) + + TriggerReIndex -> + case String.toLower model.fullTextConfirmText of + "ok" -> + ( { model | fullTextReIndexResult = Nothing } + , Api.startReIndex flags TriggerReIndexResult + , Nothing + ) + + _ -> + ( { model + | fullTextReIndexResult = + Just + (BasicResult False <| + "Please type OK in the field if you really " + ++ "want to start re-indexing your data." + ) + } + , Cmd.none + , Nothing + ) + + TriggerReIndexResult (Ok br) -> + ( { model | fullTextReIndexResult = Just br }, Cmd.none, Nothing ) + + TriggerReIndexResult (Err err) -> + ( { model + | fullTextReIndexResult = + Just (BasicResult False (Util.Http.errorToString err)) + } + , Cmd.none + , Nothing + ) + view : Flags -> UiSettings -> Model -> Html Msg view flags settings model = - div [ class "ui form" ] - [ div [ class "field" ] + div + [ classList + [ ( "ui form", True ) + , ( "error", Maybe.map .success model.fullTextReIndexResult == Just False ) + , ( "success", Maybe.map .success model.fullTextReIndexResult == Just True ) + ] + ] + [ h3 [ class "ui dividing header" ] + [ text "Document Language" + ] + , div [ class "field" ] [ label [] [ text "Document Language" ] , Html.map LangDropdownMsg (Comp.Dropdown.view settings model.langModel) , span [ class "small-info" ] [ text "The language of your documents. This helps text recognition (OCR) and text analysis." ] ] + , h3 + [ classList + [ ( "ui dividing header", True ) + , ( "invisible hidden", not flags.config.integrationEnabled ) + ] + ] + [ text "Integration Endpoint" + ] , div [ classList [ ( "field", True ) @@ -121,4 +185,50 @@ view flags settings model = ] ] ] + , h3 + [ classList + [ ( "ui dividing header", True ) + , ( "invisible hidden", not flags.config.fullTextSearchEnabled ) + ] + ] + [ text "Full-Text Search" + ] + , div + [ classList + [ ( "inline field", True ) + , ( "invisible hidden", not flags.config.fullTextSearchEnabled ) + ] + ] + [ div [ class "ui action input" ] + [ input + [ type_ "text" + , value model.fullTextConfirmText + , onInput SetFullTextConfirm + ] + [] + , button + [ class "ui primary right labeled icon button" + , onClick TriggerReIndex + ] + [ i [ class "refresh icon" ] [] + , text "Re-Index All Data" + ] + ] + , div [ class "small-info" ] + [ text "This starts a task that clears the full-text index and re-indexes all your data again." + , text "You must type OK before clicking the button to avoid accidental re-indexing." + ] + , div + [ classList + [ ( "ui message", True ) + , ( "error", Maybe.map .success model.fullTextReIndexResult == Just False ) + , ( "success", Maybe.map .success model.fullTextReIndexResult == Just True ) + , ( "hidden invisible", model.fullTextReIndexResult == Nothing ) + ] + ] + [ Maybe.map .message model.fullTextReIndexResult + |> Maybe.withDefault "" + |> text + ] + ] ] diff --git a/modules/webapp/src/main/elm/Comp/FixedDropdown.elm b/modules/webapp/src/main/elm/Comp/FixedDropdown.elm index 3cc016d6..079545a5 100644 --- a/modules/webapp/src/main/elm/Comp/FixedDropdown.elm +++ b/modules/webapp/src/main/elm/Comp/FixedDropdown.elm @@ -9,6 +9,7 @@ module Comp.FixedDropdown exposing , update , view , viewString + , viewStyled ) import Html exposing (..) @@ -69,11 +70,12 @@ update msg model = ( model, Just item.id ) -view : Maybe (Item a) -> Model a -> Html (Msg a) -view selected model = +viewStyled : String -> Maybe (Item a) -> Model a -> Html (Msg a) +viewStyled classes selected model = div [ classList [ ( "ui selection dropdown", True ) + , ( classes, True ) , ( "open", model.menuOpen ) ] , onClick ToggleMenu @@ -102,6 +104,11 @@ view selected model = ] +view : Maybe (Item a) -> Model a -> Html (Msg a) +view selected model = + viewStyled "" selected model + + viewString : Maybe String -> Model String -> Html (Msg String) viewString selected model = view (Maybe.map (\s -> Item s s) selected) model diff --git a/modules/webapp/src/main/elm/Comp/ItemCardList.elm b/modules/webapp/src/main/elm/Comp/ItemCardList.elm index 1d4e9023..47d2c345 100644 --- a/modules/webapp/src/main/elm/Comp/ItemCardList.elm +++ b/modules/webapp/src/main/elm/Comp/ItemCardList.elm @@ -8,6 +8,7 @@ module Comp.ItemCardList exposing , view ) +import Api.Model.HighlightEntry exposing (HighlightEntry) import Api.Model.ItemLight exposing (ItemLight) import Api.Model.ItemLightGroup exposing (ItemLightGroup) import Api.Model.ItemLightList exposing (ItemLightList) @@ -19,6 +20,7 @@ import Data.UiSettings exposing (UiSettings) import Html exposing (..) import Html.Attributes exposing (..) import Html.Events exposing (onClick) +import Markdown import Ports import Util.List import Util.String @@ -74,21 +76,10 @@ update _ msg model = else let - firstNew = - Data.Items.first list - - scrollCmd = - case firstNew of - Just item -> - Ports.scrollToElem item.id - - Nothing -> - Cmd.none - newModel = { model | results = Data.Items.concat model.results list } in - ( newModel, scrollCmd, Nothing ) + ( newModel, Cmd.none, Nothing ) SelectItem item -> ( model, Cmd.none, Just item ) @@ -247,4 +238,41 @@ viewItem settings item = ] ] ] + , div + [ classList + [ ( "content search-highlight", True ) + , ( "invisible hidden", item.highlighting == [] ) + ] + ] + [ div [ class "ui list" ] + (List.map renderHighlightEntry item.highlighting) + ] + ] + + +renderHighlightEntry : HighlightEntry -> Html Msg +renderHighlightEntry entry = + let + stripWhitespace str = + String.trim str + |> String.replace "```" "" + |> String.replace "\t" " " + |> String.replace "\n\n" "\n" + |> String.lines + |> List.map String.trim + |> String.join "\n" + in + div [ class "item" ] + [ div [ class "content" ] + (div [ class "header" ] + [ i [ class "caret right icon" ] [] + , text (entry.name ++ ":") + ] + :: List.map + (\str -> + Markdown.toHtml [ class "description" ] <| + (stripWhitespace str ++ "…") + ) + entry.lines + ) ] diff --git a/modules/webapp/src/main/elm/Comp/SearchMenu.elm b/modules/webapp/src/main/elm/Comp/SearchMenu.elm index d06de0e0..5e1b93d9 100644 --- a/modules/webapp/src/main/elm/Comp/SearchMenu.elm +++ b/modules/webapp/src/main/elm/Comp/SearchMenu.elm @@ -25,8 +25,9 @@ import Data.UiSettings exposing (UiSettings) import DatePicker exposing (DatePicker) import Html exposing (..) import Html.Attributes exposing (..) -import Html.Events exposing (onCheck, onInput) +import Html.Events exposing (onCheck, onClick, onInput) import Http +import Util.Html exposing (KeyCode(..)) import Util.Maybe import Util.Tag import Util.Update @@ -55,7 +56,9 @@ type alias Model = , untilDueDate : Maybe Int , nameModel : Maybe String , allNameModel : Maybe String + , fulltextModel : Maybe String , datePickerInitialized : Bool + , showNameHelp : Bool } @@ -111,7 +114,9 @@ init = , untilDueDate = Nothing , nameModel = Nothing , allNameModel = Nothing + , fulltextModel = Nothing , datePickerInitialized = False + , showNameHelp = False } @@ -135,7 +140,10 @@ type Msg | GetPersonResp (Result Http.Error ReferenceList) | SetName String | SetAllName String + | SetFulltext String | ResetForm + | KeyUpMsg (Maybe KeyCode) + | ToggleNameHelp getDirection : Model -> Maybe Direction @@ -188,6 +196,7 @@ getItemSearch model = , allNames = model.allNameModel |> Maybe.map amendWildcards + , fullText = model.fulltextModel } @@ -471,7 +480,7 @@ update flags settings msg model = ( { model | nameModel = next } , Cmd.none ) - (model.nameModel /= next) + False SetAllName str -> let @@ -482,15 +491,35 @@ update flags settings msg model = ( { model | allNameModel = next } , Cmd.none ) - (model.allNameModel /= next) + False + + SetFulltext str -> + let + next = + Util.Maybe.fromString str + in + NextState + ( { model | fulltextModel = next } + , Cmd.none + ) + False + + KeyUpMsg (Just Enter) -> + NextState ( model, Cmd.none ) True + + KeyUpMsg _ -> + NextState ( model, Cmd.none ) False + + ToggleNameHelp -> + NextState ( { model | showNameHelp = not model.showNameHelp }, Cmd.none ) False -- View -view : UiSettings -> Model -> Html Msg -view settings model = +view : Flags -> UiSettings -> Model -> Html Msg +view flags settings model = let formHeader icon headline = div [ class "ui small dividing header" ] @@ -500,6 +529,21 @@ view settings model = ] ] + formHeaderHelp icon headline tagger = + div [ class "ui small dividing header" ] + [ a + [ class "right-float" + , href "#" + , onClick tagger + ] + [ i [ class "small grey help link icon" ] [] + ] + , icon + , div [ class "content" ] + [ text headline + ] + ] + nameIcon = i [ class "left align icon" ] [] in @@ -517,17 +561,54 @@ view settings model = ] ] ] - , formHeader nameIcon "Names" + , div + [ classList + [ ( "field", True ) + , ( "invisible hidden", not flags.config.fullTextSearchEnabled ) + ] + ] + [ label [] [ text "Content Search" ] + , input + [ type_ "text" + , onInput SetFulltext + , Util.Html.onKeyUpCode KeyUpMsg + , model.fulltextModel |> Maybe.withDefault "" |> value + ] + [] + , span [ class "small-info" ] + [ text "Fulltext search in document contents." + ] + ] + , formHeaderHelp nameIcon "Names" ToggleNameHelp + , span + [ classList + [ ( "small-info", True ) + , ( "invisible hidden", not model.showNameHelp ) + ] + ] + [ text "Use wildcards " + , code [] [ text "*" ] + , text " at beginning or end. Added automatically if not " + , text "present and not quoted. Press " + , em [] [ text "Enter" ] + , text " to start searching." + ] , div [ class "field" ] [ label [] [ text "All Names" ] , input [ type_ "text" , onInput SetAllName + , Util.Html.onKeyUpCode KeyUpMsg , model.allNameModel |> Maybe.withDefault "" |> value ] [] - , span [ class "small-info" ] - [ text "Looks in correspondents, concerned, item name and notes." + , span + [ classList + [ ( "small-info", True ) + , ( "invisible hidden", not model.showNameHelp ) + ] + ] + [ text "Looks in correspondents, concerned entities, item name and notes." ] ] , div [ class "field" ] @@ -535,18 +616,18 @@ view settings model = , input [ type_ "text" , onInput SetName + , Util.Html.onKeyUpCode KeyUpMsg , model.nameModel |> Maybe.withDefault "" |> value ] [] - , span [ class "small-info" ] - [ text "Looks in item name." + , span + [ classList + [ ( "small-info", True ) + , ( "invisible hidden", not model.showNameHelp ) + ] + ] + [ text "Looks in item name only." ] - ] - , span [ class "small-info" ] - [ text "Use wildcards " - , code [] [ text "*" ] - , text " at beginning or end. Added automatically if not " - , text "present and not quoted." ] , formHeader (Icons.tagsIcon "") "Tags" , div [ class "field" ] diff --git a/modules/webapp/src/main/elm/Data/Flags.elm b/modules/webapp/src/main/elm/Data/Flags.elm index 2be9c862..16fd0122 100644 --- a/modules/webapp/src/main/elm/Data/Flags.elm +++ b/modules/webapp/src/main/elm/Data/Flags.elm @@ -15,6 +15,7 @@ type alias Config = , signupMode : String , docspellAssetPath : String , integrationEnabled : Bool + , fullTextSearchEnabled : Bool } diff --git a/modules/webapp/src/main/elm/Page/Home/Data.elm b/modules/webapp/src/main/elm/Page/Home/Data.elm index 047cd2b1..ad60e3db 100644 --- a/modules/webapp/src/main/elm/Page/Home/Data.elm +++ b/modules/webapp/src/main/elm/Page/Home/Data.elm @@ -1,15 +1,19 @@ module Page.Home.Data exposing ( Model , Msg(..) + , SearchType(..) , ViewMode(..) + , defaultSearchType , doSearchCmd , init , itemNav , resultsBelowLimit + , searchTypeString ) import Api import Api.Model.ItemLightList exposing (ItemLightList) +import Comp.FixedDropdown import Comp.ItemCardList import Comp.SearchMenu import Data.Flags exposing (Flags) @@ -17,6 +21,7 @@ import Data.Items import Data.UiSettings exposing (UiSettings) import Http import Throttle exposing (Throttle) +import Util.Html exposing (KeyCode(..)) type alias Model = @@ -29,11 +34,22 @@ type alias Model = , moreAvailable : Bool , moreInProgress : Bool , throttle : Throttle Msg + , searchTypeDropdown : Comp.FixedDropdown.Model SearchType + , searchType : SearchType + , contentOnlySearch : Maybe String } init : Flags -> Model -init _ = +init flags = + let + searchTypeOptions = + if flags.config.fullTextSearchEnabled then + [ BasicSearch, ContentSearch, ContentOnlySearch ] + + else + [ BasicSearch ] + in { searchMenuModel = Comp.SearchMenu.init , itemListModel = Comp.ItemCardList.init , searchInProgress = False @@ -43,9 +59,23 @@ init _ = , moreAvailable = True , moreInProgress = False , throttle = Throttle.create 1 + , searchTypeDropdown = + Comp.FixedDropdown.initMap searchTypeString + searchTypeOptions + , searchType = defaultSearchType flags + , contentOnlySearch = Nothing } +defaultSearchType : Flags -> SearchType +defaultSearchType flags = + if flags.config.fullTextSearchEnabled then + ContentSearch + + else + BasicSearch + + type Msg = Init | SearchMenuMsg Comp.SearchMenu.Msg @@ -58,6 +88,28 @@ type Msg | LoadMore | UpdateThrottle | SetBasicSearch String + | SearchTypeMsg (Comp.FixedDropdown.Msg SearchType) + | KeyUpMsg (Maybe KeyCode) + | SetContentOnly String + + +type SearchType + = BasicSearch + | ContentSearch + | ContentOnlySearch + + +searchTypeString : SearchType -> String +searchTypeString st = + case st of + BasicSearch -> + "All Names" + + ContentSearch -> + "Contents" + + ContentOnlySearch -> + "Contents Only" type ViewMode @@ -81,6 +133,19 @@ itemNav id model = doSearchCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg doSearchCmd flags settings offset model = + case model.searchType of + BasicSearch -> + doSearchDefaultCmd flags settings offset model + + ContentSearch -> + doSearchDefaultCmd flags settings offset model + + ContentOnlySearch -> + doSearchIndexCmd flags settings offset model + + +doSearchDefaultCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg +doSearchDefaultCmd flags settings offset model = let smask = Comp.SearchMenu.getItemSearch model.searchMenuModel @@ -98,6 +163,27 @@ doSearchCmd flags settings offset model = Api.itemSearch flags mask ItemSearchAddResp +doSearchIndexCmd : Flags -> UiSettings -> Int -> Model -> Cmd Msg +doSearchIndexCmd flags settings offset model = + case model.contentOnlySearch of + Just q -> + let + mask = + { query = q + , limit = settings.itemSearchPageSize + , offset = offset + } + in + if offset == 0 then + Api.itemIndexSearch flags mask ItemSearchResp + + else + Api.itemIndexSearch flags mask ItemSearchAddResp + + Nothing -> + Cmd.none + + resultsBelowLimit : UiSettings -> Model -> Bool resultsBelowLimit settings model = let diff --git a/modules/webapp/src/main/elm/Page/Home/Update.elm b/modules/webapp/src/main/elm/Page/Home/Update.elm index 2dfe2b01..ceeef3e1 100644 --- a/modules/webapp/src/main/elm/Page/Home/Update.elm +++ b/modules/webapp/src/main/elm/Page/Home/Update.elm @@ -1,6 +1,7 @@ module Page.Home.Update exposing (update) import Browser.Navigation as Nav +import Comp.FixedDropdown import Comp.ItemCardList import Comp.SearchMenu import Data.Flags exposing (Flags) @@ -9,6 +10,8 @@ import Page exposing (Page(..)) import Page.Home.Data exposing (..) import Throttle import Time +import Util.Html exposing (KeyCode(..)) +import Util.Maybe import Util.Update @@ -25,7 +28,10 @@ update key flags settings msg model = ResetSearch -> let nm = - { model | searchOffset = 0 } + { model + | searchOffset = 0 + , searchType = defaultSearchType flags + } in update key flags settings (SearchMenuMsg Comp.SearchMenu.ResetForm) nm @@ -150,10 +156,43 @@ update key flags settings msg model = SetBasicSearch str -> let - m = - SearchMenuMsg (Comp.SearchMenu.SetAllName str) + smMsg = + case model.searchType of + BasicSearch -> + SearchMenuMsg (Comp.SearchMenu.SetAllName str) + + ContentSearch -> + SearchMenuMsg (Comp.SearchMenu.SetFulltext str) + + ContentOnlySearch -> + SetContentOnly str in - update key flags settings m model + update key flags settings smMsg model + + SetContentOnly str -> + withSub + ( { model | contentOnlySearch = Util.Maybe.fromString str } + , Cmd.none + ) + + SearchTypeMsg lm -> + let + ( sm, mv ) = + Comp.FixedDropdown.update lm model.searchTypeDropdown + in + withSub + ( { model + | searchTypeDropdown = sm + , searchType = Maybe.withDefault model.searchType mv + } + , Cmd.none + ) + + KeyUpMsg (Just Enter) -> + update key flags settings DoSearch model + + KeyUpMsg _ -> + withSub ( model, Cmd.none ) @@ -196,6 +235,6 @@ withSub ( m, c ) = ( m , c , Throttle.ifNeeded - (Time.every 150 (\_ -> UpdateThrottle)) + (Time.every 500 (\_ -> UpdateThrottle)) m.throttle ) diff --git a/modules/webapp/src/main/elm/Page/Home/View.elm b/modules/webapp/src/main/elm/Page/Home/View.elm index 8d7628e9..2c569d1a 100644 --- a/modules/webapp/src/main/elm/Page/Home/View.elm +++ b/modules/webapp/src/main/elm/Page/Home/View.elm @@ -1,18 +1,21 @@ module Page.Home.View exposing (view) import Api.Model.ItemSearch +import Comp.FixedDropdown import Comp.ItemCardList import Comp.SearchMenu +import Data.Flags exposing (Flags) import Data.UiSettings exposing (UiSettings) import Html exposing (..) import Html.Attributes exposing (..) import Html.Events exposing (onClick, onInput) import Page exposing (Page(..)) import Page.Home.Data exposing (..) +import Util.Html -view : UiSettings -> Model -> Html Msg -view settings model = +view : Flags -> UiSettings -> Model -> Html Msg +view flags settings model = div [ class "home-page ui padded grid" ] [ div [ classList @@ -23,20 +26,19 @@ view settings model = ] ] [ div - [ class "ui top attached ablue-comp menu" + [ class "ui top attached ablue-comp icon menu" ] [ a - [ class "item" + [ class "borderless item" , href "#" , onClick ToggleSearchMenu , title "Hide menu" ] [ i [ class "ui angle down icon" ] [] - , text "Search" ] , div [ class "right floated menu" ] [ a - [ class "icon item" + [ class "borderless item" , onClick ResetSearch , title "Reset form" , href "#" @@ -44,7 +46,7 @@ view settings model = [ i [ class "undo icon" ] [] ] , a - [ class "icon item" + [ class "borderless item" , onClick DoSearch , title "Run search query" , href "" @@ -61,7 +63,7 @@ view settings model = ] ] , div [ class "ui attached fluid segment" ] - [ Html.map SearchMenuMsg (Comp.SearchMenu.view settings model.searchMenuModel) + [ Html.map SearchMenuMsg (Comp.SearchMenu.view flags settings model.searchMenuModel) ] ] , div @@ -73,49 +75,7 @@ view settings model = , ( "item-card-list", True ) ] ] - [ div - [ classList - [ ( "invisible hidden", not model.menuCollapsed ) - , ( "ui menu container", True ) - ] - ] - [ a - [ class "item" - , onClick ToggleSearchMenu - , href "#" - , title "Open search menu" - ] - [ i [ class "angle left icon" ] [] - , i [ class "icons" ] - [ i [ class "grey bars icon" ] [] - , i [ class "bottom left corner search icon" ] [] - , if hasMoreSearch model then - i [ class "top right blue corner circle icon" ] [] - - else - span [ class "hidden invisible" ] [] - ] - ] - , div [ class "ui category search item" ] - [ div [ class "ui transparent icon input" ] - [ input - [ type_ "text" - , placeholder "Basic search…" - , onInput SetBasicSearch - , Maybe.map value model.searchMenuModel.allNameModel - |> Maybe.withDefault (value "") - ] - [] - , i - [ classList - [ ( "search link icon", not model.searchInProgress ) - , ( "loading spinner icon", model.searchInProgress ) - ] - ] - [] - ] - ] - ] + [ viewSearchBar flags model , case model.viewMode of Listing -> Html.map ItemCardListMsg @@ -157,6 +117,92 @@ view settings model = ] +viewSearchBar : Flags -> Model -> Html Msg +viewSearchBar flags model = + let + searchTypeItem = + Comp.FixedDropdown.Item + model.searchType + (searchTypeString model.searchType) + + searchInput = + case model.searchType of + BasicSearch -> + model.searchMenuModel.allNameModel + + ContentSearch -> + model.searchMenuModel.fulltextModel + + ContentOnlySearch -> + model.contentOnlySearch + + searchTypeClass = + if flags.config.fullTextSearchEnabled then + "compact" + + else + "hidden invisible" + in + div + [ classList + [ ( "invisible hidden", not model.menuCollapsed ) + , ( "ui secondary stackable menu container", True ) + ] + ] + [ a + [ class "item" + , onClick ToggleSearchMenu + , href "#" + , if model.searchType == ContentOnlySearch then + title "Search menu disabled" + + else + title "Open search menu" + ] + [ i [ class "angle left icon" ] [] + , i [ class "icons" ] + [ i [ class "grey bars icon" ] [] + , i [ class "bottom left corner search icon" ] [] + , if model.searchType == ContentOnlySearch then + i [ class "top right red corner delete icon" ] [] + + else if hasMoreSearch model then + i [ class "top right blue corner circle icon" ] [] + + else + span [ class "hidden invisible" ] [] + ] + ] + , div [ class "item" ] + [ div [ class "ui left icon right action input" ] + [ i + [ classList + [ ( "search link icon", not model.searchInProgress ) + , ( "loading spinner icon", model.searchInProgress ) + ] + , href "#" + , onClick DoSearch + ] + [] + , input + [ type_ "text" + , placeholder "Quick Search …" + , onInput SetBasicSearch + , Util.Html.onKeyUpCode KeyUpMsg + , Maybe.map value searchInput + |> Maybe.withDefault (value "") + ] + [] + , Html.map SearchTypeMsg + (Comp.FixedDropdown.viewStyled searchTypeClass + (Just searchTypeItem) + model.searchTypeDropdown + ) + ] + ] + ] + + hasMoreSearch : Model -> Bool hasMoreSearch model = let @@ -164,6 +210,14 @@ hasMoreSearch model = Comp.SearchMenu.getItemSearch model.searchMenuModel is_ = - { is | allNames = Nothing } + case model.searchType of + BasicSearch -> + { is | allNames = Nothing } + + ContentSearch -> + { is | fullText = Nothing } + + ContentOnlySearch -> + Api.Model.ItemSearch.empty in is_ /= Api.Model.ItemSearch.empty diff --git a/modules/webapp/src/main/elm/Util/Html.elm b/modules/webapp/src/main/elm/Util/Html.elm index a25e7ad0..7649fa46 100644 --- a/modules/webapp/src/main/elm/Util/Html.elm +++ b/modules/webapp/src/main/elm/Util/Html.elm @@ -9,6 +9,7 @@ module Util.Html exposing , onDragOver , onDropFiles , onKeyUp + , onKeyUpCode ) import File exposing (File) @@ -76,6 +77,11 @@ onKeyUp tagger = on "keyup" (D.map tagger keyCode) +onKeyUpCode : (Maybe KeyCode -> msg) -> Attribute msg +onKeyUpCode tagger = + onKeyUp (intToKeyCode >> tagger) + + onClickk : msg -> Attribute msg onClickk msg = Html.Events.preventDefaultOn "click" (D.map alwaysPreventDefault (D.succeed msg)) diff --git a/modules/webapp/src/main/webjar/docspell.css b/modules/webapp/src/main/webjar/docspell.css index a96aadf8..c09ba8cb 100644 --- a/modules/webapp/src/main/webjar/docspell.css +++ b/modules/webapp/src/main/webjar/docspell.css @@ -88,6 +88,22 @@ right: -8px; } +.default-layout .ui.cards .ui.card .content.search-highlight { + background: rgba(246, 255, 158, 0.1); + font-size: smaller; + max-height: 25em; + overflow: auto; +} +.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .header { +} +.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .description { + color: rgba(0,0,0,.6); + margin-left: 0.75em; +} +.default-layout .ui.cards .ui.card .content.search-highlight .ui.list .item .content .description strong > em { + background: rgba(220, 255, 71, 0.6); +} + .markdown-preview { overflow: auto; max-height: 300px; diff --git a/modules/webapp/src/main/webjar/docspell.js b/modules/webapp/src/main/webjar/docspell.js index 229eecbb..6c1dacc5 100644 --- a/modules/webapp/src/main/webjar/docspell.js +++ b/modules/webapp/src/main/webjar/docspell.js @@ -32,20 +32,20 @@ elmApp.ports.setAllProgress.subscribe(function(input) { }, 100); }); -elmApp.ports.scrollToElem.subscribe(function(id) { - if (id && id != "") { - window.setTimeout(function() { - var el = document.getElementById(id); - if (el) { - if (el["scrollIntoViewIfNeeded"]) { - el.scrollIntoViewIfNeeded(); - } else { - el.scrollIntoView(); - } - } - }, 20); - } -}); +// elmApp.ports.scrollToElem.subscribe(function(id) { +// if (id && id != "") { +// window.setTimeout(function() { +// var el = document.getElementById(id); +// if (el) { +// if (el["scrollIntoViewIfNeeded"]) { +// el.scrollIntoViewIfNeeded(); +// } else { +// el.scrollIntoView(); +// } +// } +// }, 20); +// } +// }); elmApp.ports.saveUiSettings.subscribe(function(args) { if (Array.isArray(args) && args.length == 2) { diff --git a/nix/module-server.nix b/nix/module-server.nix index c1e7dca0..6c40a7ae 100644 --- a/nix/module-server.nix +++ b/nix/module-server.nix @@ -13,7 +13,7 @@ let app-name = "Docspell"; app-id = "rest1"; base-url = "http://localhost:7880"; - max-item-page-size = 500; + max-item-page-size = 200; bind = { address = "localhost"; port = 7880; diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 9b35e722..240cc4fd 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -159,16 +159,22 @@ object Dependencies { "co.fs2" %% "fs2-io" % Fs2Version ) - val http4s = Seq( - "org.http4s" %% "http4s-blaze-server" % Http4sVersion, - "org.http4s" %% "http4s-circe" % Http4sVersion, - "org.http4s" %% "http4s-dsl" % Http4sVersion - ) - val http4sClient = Seq( "org.http4s" %% "http4s-blaze-client" % Http4sVersion ) + val http4sCirce = Seq( + "org.http4s" %% "http4s-circe" % Http4sVersion + ) + + val http4sDsl = Seq( + "org.http4s" %% "http4s-dsl" % Http4sVersion + ) + + val http4sServer = Seq( + "org.http4s" %% "http4s-blaze-server" % Http4sVersion + ) + val circe = Seq( "io.circe" %% "circe-generic" % CirceVersion, "io.circe" %% "circe-parser" % CirceVersion