Fixup for deleting items

First, when checking for existence of a file, deleted items are not
conisdered.

The working with fulltext search has been changed: deleted items are
removed from fulltext index and are re-added when they are restored.
The fulltext index currently doesn't hold the item state and it would
mean much more work to introduce it into the index (or, worse, to
reprocess the results from the index). Thus, deleted items can only be
searched using database queries. It is probably a very rare use case
when fulltext search should be applied to deleted items. They have
been deleted for a reason and the most likely case is that they are
simply removed.

Refs: #347
This commit is contained in:
eikek 2021-08-15 15:40:33 +02:00
parent 50387cd378
commit 14e4a8f792
13 changed files with 137 additions and 58 deletions

View File

@ -11,6 +11,7 @@ import scala.concurrent.ExecutionContext
import cats.effect._ import cats.effect._
import docspell.backend.auth.Login import docspell.backend.auth.Login
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._ import docspell.backend.ops._
import docspell.backend.signup.OSignup import docspell.backend.signup.OSignup
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
@ -69,7 +70,8 @@ object BackendApp {
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl) uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
nodeImpl <- ONode(store) nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl) jobImpl <- OJob(store, joexImpl)
itemImpl <- OItem(store, ftsClient, queue, joexImpl) createIndex <- CreateIndex.resource(ftsClient, store)
itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl)
itemSearchImpl <- OItemSearch(store) itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl) fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
javaEmil = javaEmil =

View File

@ -0,0 +1,71 @@
/*
* Copyright 2020 Docspell Contributors
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
package docspell.backend.fulltext
import cats.data.NonEmptyList
import cats.effect._
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftsclient.TextData
import docspell.store.Store
import docspell.store.queries.QAttachment
import docspell.store.queries.QItem
trait CreateIndex[F[_]] {
/** Low-level function to re-index data. It is not submitted as a job,
* but invoked on the current machine.
*/
def reIndexData(
logger: Logger[F],
collective: Option[Ident],
itemIds: Option[NonEmptyList[Ident]],
chunkSize: Int
): F[Unit]
}
object CreateIndex {
def resource[F[_]](fts: FtsClient[F], store: Store[F]): Resource[F, CreateIndex[F]] =
Resource.pure(apply(fts, store))
def apply[F[_]](fts: FtsClient[F], store: Store[F]): CreateIndex[F] =
new CreateIndex[F] {
def reIndexData(
logger: Logger[F],
collective: Option[Ident],
itemIds: Option[NonEmptyList[Ident]],
chunkSize: Int
): F[Unit] = {
val attachs = store
.transact(QAttachment.allAttachmentMetaAndName(collective, itemIds, chunkSize))
.map(caa =>
TextData
.attachment(
caa.item,
caa.id,
caa.collective,
caa.folder,
caa.lang,
caa.name,
caa.content
)
)
val items = store
.transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
.map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
fts.indexData(logger, attachs ++ items)
}
}
}

View File

@ -11,6 +11,7 @@ import cats.effect.{Async, Resource}
import cats.implicits._ import cats.implicits._
import docspell.backend.JobFactory import docspell.backend.JobFactory
import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.store.queries.{QAttachment, QItem, QMoveAttachment} import docspell.store.queries.{QAttachment, QItem, QMoveAttachment}
@ -212,6 +213,7 @@ object OItem {
def apply[F[_]: Async]( def apply[F[_]: Async](
store: Store[F], store: Store[F],
fts: FtsClient[F], fts: FtsClient[F],
createIndex: CreateIndex[F],
queue: JobQueue[F], queue: JobQueue[F],
joex: OJoex[F] joex: OJoex[F]
): Resource[F, OItem[F]] = ): Resource[F, OItem[F]] =
@ -588,12 +590,13 @@ object OItem {
items: NonEmptyList[Ident], items: NonEmptyList[Ident],
collective: Ident collective: Ident
): F[UpdateResult] = ): F[UpdateResult] =
UpdateResult.fromUpdate( UpdateResult.fromUpdate(for {
store n <- store
.transact( .transact(
RItem.restoreStateForCollective(items, ItemState.Created, collective) RItem.restoreStateForCollective(items, ItemState.Created, collective)
) )
) _ <- createIndex.reIndexData(logger, collective.some, items.some, 10)
} yield n)
def setItemDate( def setItemDate(
items: NonEmptyList[Ident], items: NonEmptyList[Ident],
@ -628,7 +631,10 @@ object OItem {
} yield n } yield n
def setDeletedState(items: NonEmptyList[Ident], collective: Ident): F[Int] = def setDeletedState(items: NonEmptyList[Ident], collective: Ident): F[Int] =
store.transact(RItem.setState(items, collective, ItemState.Deleted)) for {
n <- store.transact(RItem.setState(items, collective, ItemState.Deleted))
_ <- items.traverse(id => fts.removeItem(logger, id))
} yield n
def getProposals(item: Ident, collective: Ident): F[MetaProposalList] = def getProposals(item: Ident, collective: Ident): F[MetaProposalList] =
store.transact(QAttachment.getMetaProposals(item, collective)) store.transact(QAttachment.getMetaProposals(item, collective))

View File

@ -231,13 +231,16 @@ object OSimpleSearch {
case Some(ftq) if settings.useFTS => case Some(ftq) if settings.useFTS =>
if (q.isEmpty) { if (q.isEmpty) {
logger.debug(s"Using index only search: $fulltextQuery") logger.debug(s"Using index only search: $fulltextQuery")
fts if (settings.searchMode == SearchMode.Trashed)
.findIndexOnly(settings.maxNoteLen)( Items.ftsItemsFull(true)(Vector.empty).pure[F]
OFulltext.FtsInput(ftq), else
q.fix.account, fts
settings.batch .findIndexOnly(settings.maxNoteLen)(
) OFulltext.FtsInput(ftq),
.map(Items.ftsItemsFull(true)) q.fix.account,
settings.batch
)
.map(Items.ftsItemsFull(true))
} else if (settings.resolveDetails) { } else if (settings.resolveDetails) {
logger.debug( logger.debug(
s"Using index+sql search with tags: $validItemQuery / $fulltextQuery" s"Using index+sql search with tags: $validItemQuery / $fulltextQuery"

View File

@ -72,4 +72,5 @@ object TextData {
notes: Option[String] notes: Option[String]
): TextData = ): TextData =
Item(item, collective, folder, name, notes) Item(item, collective, folder, name, notes)
} }

View File

@ -13,6 +13,7 @@ import cats.implicits._
import fs2.concurrent.SignallingRef import fs2.concurrent.SignallingRef
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._ import docspell.backend.ops._
import docspell.common._ import docspell.common._
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
@ -116,7 +117,8 @@ object JoexAppImpl {
joex <- OJoex(client, store) joex <- OJoex(client, store)
upload <- OUpload(store, queue, cfg.files, joex) upload <- OUpload(store, queue, cfg.files, joex)
fts <- createFtsClient(cfg)(httpClient) fts <- createFtsClient(cfg)(httpClient)
itemOps <- OItem(store, fts, queue, joex) createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, queue, joex)
itemSearchOps <- OItemSearch(store) itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig) analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store) regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
@ -155,14 +157,14 @@ object JoexAppImpl {
.withTask( .withTask(
JobTask.json( JobTask.json(
MigrationTask.taskName, MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts), MigrationTask[F](cfg.fullTextSearch, fts, createIndex),
MigrationTask.onCancel[F] MigrationTask.onCancel[F]
) )
) )
.withTask( .withTask(
JobTask.json( JobTask.json(
ReIndexTask.taskName, ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts), ReIndexTask[F](cfg.fullTextSearch, fts, createIndex),
ReIndexTask.onCancel[F] ReIndexTask.onCancel[F]
) )
) )

View File

@ -6,6 +6,7 @@
package docspell.joex.fts package docspell.joex.fts
import docspell.backend.fulltext.CreateIndex
import docspell.common.Logger import docspell.common.Logger
import docspell.ftsclient.FtsClient import docspell.ftsclient.FtsClient
import docspell.joex.Config import docspell.joex.Config
@ -15,6 +16,7 @@ import docspell.store.Store
case class FtsContext[F[_]]( case class FtsContext[F[_]](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
store: Store[F], store: Store[F],
fulltext: CreateIndex[F],
fts: FtsClient[F], fts: FtsClient[F],
logger: Logger[F] logger: Logger[F]
) )
@ -24,7 +26,8 @@ object FtsContext {
def apply[F[_]]( def apply[F[_]](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F], fts: FtsClient[F],
fulltext: CreateIndex[F],
ctx: Context[F, _] ctx: Context[F, _]
): FtsContext[F] = ): FtsContext[F] =
FtsContext(cfg, ctx.store, fts, ctx.logger) FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger)
} }

View File

@ -10,11 +10,11 @@ import cats._
import cats.data.{Kleisli, NonEmptyList} import cats.data.{Kleisli, NonEmptyList}
import cats.implicits._ import cats.implicits._
import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler.Context import docspell.joex.scheduler.Context
import docspell.store.queries.{QAttachment, QItem}
object FtsWork { object FtsWork {
import syntax._ import syntax._
@ -88,36 +88,8 @@ object FtsWork {
log[F](_.info("Inserting all data to index")) ++ FtsWork log[F](_.info("Inserting all data to index")) ++ FtsWork
.all( .all(
FtsWork(ctx => FtsWork(ctx =>
ctx.fts.indexData( ctx.fulltext
ctx.logger, .reIndexData(ctx.logger, coll, None, ctx.cfg.migration.indexAllChunk)
ctx.store
.transact(
QAttachment
.allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk)
)
.map(caa =>
TextData
.attachment(
caa.item,
caa.id,
caa.collective,
caa.folder,
caa.lang,
caa.name,
caa.content
)
)
)
),
FtsWork(ctx =>
ctx.fts.indexData(
ctx.logger,
ctx.store
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
.map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
)
) )
) )
@ -133,9 +105,10 @@ object FtsWork {
def forContext( def forContext(
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F],
fulltext: CreateIndex[F]
): Kleisli[F, Context[F, _], Unit] = ): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, ctx)) mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx))
} }
} }
} }

View File

@ -11,6 +11,7 @@ import cats.effect._
import cats.implicits._ import cats.implicits._
import cats.{Applicative, FlatMap, Traverse} import cats.{Applicative, FlatMap, Traverse}
import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
@ -38,9 +39,10 @@ object Migration {
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F], fts: FtsClient[F],
store: Store[F], store: Store[F],
createIndex: CreateIndex[F],
logger: Logger[F] logger: Logger[F]
): Kleisli[F, List[Migration[F]], Unit] = { ): Kleisli[F, List[Migration[F]], Unit] = {
val ctx = FtsContext(cfg, store, fts, logger) val ctx = FtsContext(cfg, store, createIndex, fts, logger)
Kleisli { migs => Kleisli { migs =>
if (migs.isEmpty) logger.info("No fulltext search migrations to run.") if (migs.isEmpty) logger.info("No fulltext search migrations to run.")
else Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ()) else Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ())

View File

@ -9,6 +9,7 @@ package docspell.joex.fts
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
@ -20,7 +21,8 @@ object MigrationTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F],
createIndex: CreateIndex[F]
): Task[F, Unit, Unit] = ): Task[F, Unit, Unit] =
Task Task
.log[F, Unit](_.info(s"Running full-text-index migrations now")) .log[F, Unit](_.info(s"Running full-text-index migrations now"))
@ -28,7 +30,7 @@ object MigrationTask {
Task(ctx => Task(ctx =>
for { for {
migs <- migrationTasks[F](fts) migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, ctx.logger).run(migs) res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs)
} yield res } yield res
) )
) )

View File

@ -8,6 +8,7 @@ package docspell.joex.fts
import cats.effect._ import cats.effect._
import docspell.backend.fulltext.CreateIndex
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.Config import docspell.joex.Config
@ -22,12 +23,15 @@ object ReIndexTask {
def apply[F[_]: Async]( def apply[F[_]: Async](
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F],
fulltext: CreateIndex[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task Task
.log[F, Args](_.info(s"Running full-text re-index now")) .log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ => .flatMap(_ =>
Task(ctx => clearData[F](ctx.args.collective).forContext(cfg, fts).run(ctx)) Task(ctx =>
clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx)
)
) )
def onCancel[F[_]]: Task[F, Args, Unit] = def onCancel[F[_]]: Task[F, Args, Unit] =

View File

@ -7,6 +7,7 @@
package docspell.store.queries package docspell.store.queries
import cats.data.OptionT import cats.data.OptionT
import cats.data.{NonEmptyList => Nel}
import cats.effect.Sync import cats.effect.Sync
import cats.implicits._ import cats.implicits._
import fs2.Stream import fs2.Stream
@ -174,6 +175,7 @@ object QAttachment {
) )
def allAttachmentMetaAndName( def allAttachmentMetaAndName(
coll: Option[Ident], coll: Option[Ident],
itemIds: Option[Nel[Ident]],
chunkSize: Int chunkSize: Int
): Stream[ConnectionIO, ContentAndName] = ): Stream[ConnectionIO, ContentAndName] =
Select( Select(
@ -190,8 +192,11 @@ object QAttachment {
.innerJoin(am, am.id === a.id) .innerJoin(am, am.id === a.id)
.innerJoin(item, item.id === a.itemId) .innerJoin(item, item.id === a.itemId)
.innerJoin(c, c.id === item.cid) .innerJoin(c, c.id === item.cid)
).where(coll.map(cid => item.cid === cid)) ).where(
.build item.state.in(ItemState.validStates) &&?
itemIds.map(ids => item.id.in(ids)) &&?
coll.map(cid => item.cid === cid)
).build
.query[ContentAndName] .query[ContentAndName]
.streamWithChunkSize(chunkSize) .streamWithChunkSize(chunkSize)

View File

@ -502,6 +502,7 @@ object QItem {
.leftJoin(m3, m3.id === r.fileId), .leftJoin(m3, m3.id === r.fileId),
where( where(
i.cid === collective && i.cid === collective &&
i.state.in(ItemState.validStates) &&
Condition.Or(fms.map(m => m.checksum === checksum)) &&? Condition.Or(fms.map(m => m.checksum === checksum)) &&?
Nel Nel
.fromList(excludeFileMeta.toList) .fromList(excludeFileMeta.toList)
@ -519,6 +520,7 @@ object QItem {
) )
def allNameAndNotes( def allNameAndNotes(
coll: Option[Ident], coll: Option[Ident],
itemIds: Option[Nel[Ident]],
chunkSize: Int chunkSize: Int
): Stream[ConnectionIO, NameAndNotes] = { ): Stream[ConnectionIO, NameAndNotes] = {
val i = RItem.as("i") val i = RItem.as("i")
@ -526,8 +528,11 @@ object QItem {
Select( Select(
select(i.id, i.cid, i.folder, i.name, i.notes), select(i.id, i.cid, i.folder, i.name, i.notes),
from(i) from(i)
).where(coll.map(cid => i.cid === cid)) ).where(
.build i.state.in(ItemState.validStates) &&?
itemIds.map(ids => i.id.in(ids)) &&?
coll.map(cid => i.cid === cid)
).build
.query[NameAndNotes] .query[NameAndNotes]
.streamWithChunkSize(chunkSize) .streamWithChunkSize(chunkSize)
} }