From 14e4a8f7929732a2da55c7a941d6132d95af2c26 Mon Sep 17 00:00:00 2001 From: eikek Date: Sun, 15 Aug 2021 15:40:33 +0200 Subject: [PATCH] Fixup for deleting items First, when checking for existence of a file, deleted items are not conisdered. The working with fulltext search has been changed: deleted items are removed from fulltext index and are re-added when they are restored. The fulltext index currently doesn't hold the item state and it would mean much more work to introduce it into the index (or, worse, to reprocess the results from the index). Thus, deleted items can only be searched using database queries. It is probably a very rare use case when fulltext search should be applied to deleted items. They have been deleted for a reason and the most likely case is that they are simply removed. Refs: #347 --- .../scala/docspell/backend/BackendApp.scala | 4 +- .../backend/fulltext/CreateIndex.scala | 71 +++++++++++++++++++ .../scala/docspell/backend/ops/OItem.scala | 14 ++-- .../docspell/backend/ops/OSimpleSearch.scala | 17 +++-- .../scala/docspell/ftsclient/TextData.scala | 1 + .../scala/docspell/joex/JoexAppImpl.scala | 8 ++- .../scala/docspell/joex/fts/FtsContext.scala | 5 +- .../scala/docspell/joex/fts/FtsWork.scala | 39 ++-------- .../scala/docspell/joex/fts/Migration.scala | 4 +- .../docspell/joex/fts/MigrationTask.scala | 6 +- .../scala/docspell/joex/fts/ReIndexTask.scala | 8 ++- .../docspell/store/queries/QAttachment.scala | 9 ++- .../scala/docspell/store/queries/QItem.scala | 9 ++- 13 files changed, 137 insertions(+), 58 deletions(-) create mode 100644 modules/backend/src/main/scala/docspell/backend/fulltext/CreateIndex.scala diff --git a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala index 59d27658..b94fa742 100644 --- a/modules/backend/src/main/scala/docspell/backend/BackendApp.scala +++ b/modules/backend/src/main/scala/docspell/backend/BackendApp.scala @@ -11,6 +11,7 @@ import scala.concurrent.ExecutionContext import cats.effect._ import docspell.backend.auth.Login +import docspell.backend.fulltext.CreateIndex import docspell.backend.ops._ import docspell.backend.signup.OSignup import docspell.ftsclient.FtsClient @@ -69,7 +70,8 @@ object BackendApp { uploadImpl <- OUpload(store, queue, cfg.files, joexImpl) nodeImpl <- ONode(store) jobImpl <- OJob(store, joexImpl) - itemImpl <- OItem(store, ftsClient, queue, joexImpl) + createIndex <- CreateIndex.resource(ftsClient, store) + itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl) itemSearchImpl <- OItemSearch(store) fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) javaEmil = diff --git a/modules/backend/src/main/scala/docspell/backend/fulltext/CreateIndex.scala b/modules/backend/src/main/scala/docspell/backend/fulltext/CreateIndex.scala new file mode 100644 index 00000000..e0865cf1 --- /dev/null +++ b/modules/backend/src/main/scala/docspell/backend/fulltext/CreateIndex.scala @@ -0,0 +1,71 @@ +/* + * Copyright 2020 Docspell Contributors + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +package docspell.backend.fulltext + +import cats.data.NonEmptyList +import cats.effect._ + +import docspell.common._ +import docspell.ftsclient.FtsClient +import docspell.ftsclient.TextData +import docspell.store.Store +import docspell.store.queries.QAttachment +import docspell.store.queries.QItem + +trait CreateIndex[F[_]] { + + /** Low-level function to re-index data. It is not submitted as a job, + * but invoked on the current machine. + */ + def reIndexData( + logger: Logger[F], + collective: Option[Ident], + itemIds: Option[NonEmptyList[Ident]], + chunkSize: Int + ): F[Unit] + +} + +object CreateIndex { + + def resource[F[_]](fts: FtsClient[F], store: Store[F]): Resource[F, CreateIndex[F]] = + Resource.pure(apply(fts, store)) + + def apply[F[_]](fts: FtsClient[F], store: Store[F]): CreateIndex[F] = + new CreateIndex[F] { + def reIndexData( + logger: Logger[F], + collective: Option[Ident], + itemIds: Option[NonEmptyList[Ident]], + chunkSize: Int + ): F[Unit] = { + val attachs = store + .transact(QAttachment.allAttachmentMetaAndName(collective, itemIds, chunkSize)) + .map(caa => + TextData + .attachment( + caa.item, + caa.id, + caa.collective, + caa.folder, + caa.lang, + caa.name, + caa.content + ) + ) + + val items = store + .transact(QItem.allNameAndNotes(collective, itemIds, chunkSize)) + .map(nn => + TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes) + ) + + fts.indexData(logger, attachs ++ items) + } + } + +} diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index cc2ebfed..eb82a641 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -11,6 +11,7 @@ import cats.effect.{Async, Resource} import cats.implicits._ import docspell.backend.JobFactory +import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient.FtsClient import docspell.store.queries.{QAttachment, QItem, QMoveAttachment} @@ -212,6 +213,7 @@ object OItem { def apply[F[_]: Async]( store: Store[F], fts: FtsClient[F], + createIndex: CreateIndex[F], queue: JobQueue[F], joex: OJoex[F] ): Resource[F, OItem[F]] = @@ -588,12 +590,13 @@ object OItem { items: NonEmptyList[Ident], collective: Ident ): F[UpdateResult] = - UpdateResult.fromUpdate( - store + UpdateResult.fromUpdate(for { + n <- store .transact( RItem.restoreStateForCollective(items, ItemState.Created, collective) ) - ) + _ <- createIndex.reIndexData(logger, collective.some, items.some, 10) + } yield n) def setItemDate( items: NonEmptyList[Ident], @@ -628,7 +631,10 @@ object OItem { } yield n def setDeletedState(items: NonEmptyList[Ident], collective: Ident): F[Int] = - store.transact(RItem.setState(items, collective, ItemState.Deleted)) + for { + n <- store.transact(RItem.setState(items, collective, ItemState.Deleted)) + _ <- items.traverse(id => fts.removeItem(logger, id)) + } yield n def getProposals(item: Ident, collective: Ident): F[MetaProposalList] = store.transact(QAttachment.getMetaProposals(item, collective)) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OSimpleSearch.scala b/modules/backend/src/main/scala/docspell/backend/ops/OSimpleSearch.scala index 2ae4cd14..a5fab21e 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OSimpleSearch.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OSimpleSearch.scala @@ -231,13 +231,16 @@ object OSimpleSearch { case Some(ftq) if settings.useFTS => if (q.isEmpty) { logger.debug(s"Using index only search: $fulltextQuery") - fts - .findIndexOnly(settings.maxNoteLen)( - OFulltext.FtsInput(ftq), - q.fix.account, - settings.batch - ) - .map(Items.ftsItemsFull(true)) + if (settings.searchMode == SearchMode.Trashed) + Items.ftsItemsFull(true)(Vector.empty).pure[F] + else + fts + .findIndexOnly(settings.maxNoteLen)( + OFulltext.FtsInput(ftq), + q.fix.account, + settings.batch + ) + .map(Items.ftsItemsFull(true)) } else if (settings.resolveDetails) { logger.debug( s"Using index+sql search with tags: $validItemQuery / $fulltextQuery" diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala index cb92e095..16de2515 100644 --- a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala @@ -72,4 +72,5 @@ object TextData { notes: Option[String] ): TextData = Item(item, collective, folder, name, notes) + } diff --git a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala index 37113d5b..e03a05aa 100644 --- a/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala +++ b/modules/joex/src/main/scala/docspell/joex/JoexAppImpl.scala @@ -13,6 +13,7 @@ import cats.implicits._ import fs2.concurrent.SignallingRef import docspell.analysis.TextAnalyser +import docspell.backend.fulltext.CreateIndex import docspell.backend.ops._ import docspell.common._ import docspell.ftsclient.FtsClient @@ -116,7 +117,8 @@ object JoexAppImpl { joex <- OJoex(client, store) upload <- OUpload(store, queue, cfg.files, joex) fts <- createFtsClient(cfg)(httpClient) - itemOps <- OItem(store, fts, queue, joex) + createIndex <- CreateIndex.resource(fts, store) + itemOps <- OItem(store, fts, createIndex, queue, joex) itemSearchOps <- OItemSearch(store) analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig) regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store) @@ -155,14 +157,14 @@ object JoexAppImpl { .withTask( JobTask.json( MigrationTask.taskName, - MigrationTask[F](cfg.fullTextSearch, fts), + MigrationTask[F](cfg.fullTextSearch, fts, createIndex), MigrationTask.onCancel[F] ) ) .withTask( JobTask.json( ReIndexTask.taskName, - ReIndexTask[F](cfg.fullTextSearch, fts), + ReIndexTask[F](cfg.fullTextSearch, fts, createIndex), ReIndexTask.onCancel[F] ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala index 97012dfc..201dbaf7 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsContext.scala @@ -6,6 +6,7 @@ package docspell.joex.fts +import docspell.backend.fulltext.CreateIndex import docspell.common.Logger import docspell.ftsclient.FtsClient import docspell.joex.Config @@ -15,6 +16,7 @@ import docspell.store.Store case class FtsContext[F[_]]( cfg: Config.FullTextSearch, store: Store[F], + fulltext: CreateIndex[F], fts: FtsClient[F], logger: Logger[F] ) @@ -24,7 +26,8 @@ object FtsContext { def apply[F[_]]( cfg: Config.FullTextSearch, fts: FtsClient[F], + fulltext: CreateIndex[F], ctx: Context[F, _] ): FtsContext[F] = - FtsContext(cfg, ctx.store, fts, ctx.logger) + FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger) } diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala index da7fad95..9ce88c28 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala @@ -10,11 +10,11 @@ import cats._ import cats.data.{Kleisli, NonEmptyList} import cats.implicits._ +import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config import docspell.joex.scheduler.Context -import docspell.store.queries.{QAttachment, QItem} object FtsWork { import syntax._ @@ -88,36 +88,8 @@ object FtsWork { log[F](_.info("Inserting all data to index")) ++ FtsWork .all( FtsWork(ctx => - ctx.fts.indexData( - ctx.logger, - ctx.store - .transact( - QAttachment - .allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk) - ) - .map(caa => - TextData - .attachment( - caa.item, - caa.id, - caa.collective, - caa.folder, - caa.lang, - caa.name, - caa.content - ) - ) - ) - ), - FtsWork(ctx => - ctx.fts.indexData( - ctx.logger, - ctx.store - .transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5)) - .map(nn => - TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes) - ) - ) + ctx.fulltext + .reIndexData(ctx.logger, coll, None, ctx.cfg.migration.indexAllChunk) ) ) @@ -133,9 +105,10 @@ object FtsWork { def forContext( cfg: Config.FullTextSearch, - fts: FtsClient[F] + fts: FtsClient[F], + fulltext: CreateIndex[F] ): Kleisli[F, Context[F, _], Unit] = - mt.local(ctx => FtsContext(cfg, fts, ctx)) + mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx)) } } } diff --git a/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala b/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala index a3c64382..d23ab072 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/Migration.scala @@ -11,6 +11,7 @@ import cats.effect._ import cats.implicits._ import cats.{Applicative, FlatMap, Traverse} +import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config @@ -38,9 +39,10 @@ object Migration { cfg: Config.FullTextSearch, fts: FtsClient[F], store: Store[F], + createIndex: CreateIndex[F], logger: Logger[F] ): Kleisli[F, List[Migration[F]], Unit] = { - val ctx = FtsContext(cfg, store, fts, logger) + val ctx = FtsContext(cfg, store, createIndex, fts, logger) Kleisli { migs => if (migs.isEmpty) logger.info("No fulltext search migrations to run.") else Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ()) diff --git a/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala index c303118f..c56e57e2 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/MigrationTask.scala @@ -9,6 +9,7 @@ package docspell.joex.fts import cats.effect._ import cats.implicits._ +import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config @@ -20,7 +21,8 @@ object MigrationTask { def apply[F[_]: Async]( cfg: Config.FullTextSearch, - fts: FtsClient[F] + fts: FtsClient[F], + createIndex: CreateIndex[F] ): Task[F, Unit, Unit] = Task .log[F, Unit](_.info(s"Running full-text-index migrations now")) @@ -28,7 +30,7 @@ object MigrationTask { Task(ctx => for { migs <- migrationTasks[F](fts) - res <- Migration[F](cfg, fts, ctx.store, ctx.logger).run(migs) + res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs) } yield res ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala index e5e7cf48..42a9e9ad 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala @@ -8,6 +8,7 @@ package docspell.joex.fts import cats.effect._ +import docspell.backend.fulltext.CreateIndex import docspell.common._ import docspell.ftsclient._ import docspell.joex.Config @@ -22,12 +23,15 @@ object ReIndexTask { def apply[F[_]: Async]( cfg: Config.FullTextSearch, - fts: FtsClient[F] + fts: FtsClient[F], + fulltext: CreateIndex[F] ): Task[F, Args, Unit] = Task .log[F, Args](_.info(s"Running full-text re-index now")) .flatMap(_ => - Task(ctx => clearData[F](ctx.args.collective).forContext(cfg, fts).run(ctx)) + Task(ctx => + clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx) + ) ) def onCancel[F[_]]: Task[F, Args, Unit] = diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index 2085b62e..81674e8c 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -7,6 +7,7 @@ package docspell.store.queries import cats.data.OptionT +import cats.data.{NonEmptyList => Nel} import cats.effect.Sync import cats.implicits._ import fs2.Stream @@ -174,6 +175,7 @@ object QAttachment { ) def allAttachmentMetaAndName( coll: Option[Ident], + itemIds: Option[Nel[Ident]], chunkSize: Int ): Stream[ConnectionIO, ContentAndName] = Select( @@ -190,8 +192,11 @@ object QAttachment { .innerJoin(am, am.id === a.id) .innerJoin(item, item.id === a.itemId) .innerJoin(c, c.id === item.cid) - ).where(coll.map(cid => item.cid === cid)) - .build + ).where( + item.state.in(ItemState.validStates) &&? + itemIds.map(ids => item.id.in(ids)) &&? + coll.map(cid => item.cid === cid) + ).build .query[ContentAndName] .streamWithChunkSize(chunkSize) diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 0aefa0db..402c9612 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -502,6 +502,7 @@ object QItem { .leftJoin(m3, m3.id === r.fileId), where( i.cid === collective && + i.state.in(ItemState.validStates) && Condition.Or(fms.map(m => m.checksum === checksum)) &&? Nel .fromList(excludeFileMeta.toList) @@ -519,6 +520,7 @@ object QItem { ) def allNameAndNotes( coll: Option[Ident], + itemIds: Option[Nel[Ident]], chunkSize: Int ): Stream[ConnectionIO, NameAndNotes] = { val i = RItem.as("i") @@ -526,8 +528,11 @@ object QItem { Select( select(i.id, i.cid, i.folder, i.name, i.notes), from(i) - ).where(coll.map(cid => i.cid === cid)) - .build + ).where( + i.state.in(ItemState.validStates) &&? + itemIds.map(ids => i.id.in(ids)) &&? + coll.map(cid => i.cid === cid) + ).build .query[NameAndNotes] .streamWithChunkSize(chunkSize) }