Fixup for deleting items

First, when checking for existence of a file, deleted items are not
conisdered.

The working with fulltext search has been changed: deleted items are
removed from fulltext index and are re-added when they are restored.
The fulltext index currently doesn't hold the item state and it would
mean much more work to introduce it into the index (or, worse, to
reprocess the results from the index). Thus, deleted items can only be
searched using database queries. It is probably a very rare use case
when fulltext search should be applied to deleted items. They have
been deleted for a reason and the most likely case is that they are
simply removed.

Refs: #347
This commit is contained in:
eikek 2021-08-15 15:40:33 +02:00
parent 50387cd378
commit 14e4a8f792
13 changed files with 137 additions and 58 deletions

View File

@ -11,6 +11,7 @@ import scala.concurrent.ExecutionContext
import cats.effect._
import docspell.backend.auth.Login
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._
import docspell.backend.signup.OSignup
import docspell.ftsclient.FtsClient
@ -69,7 +70,8 @@ object BackendApp {
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl)
itemImpl <- OItem(store, ftsClient, queue, joexImpl)
createIndex <- CreateIndex.resource(ftsClient, store)
itemImpl <- OItem(store, ftsClient, createIndex, queue, joexImpl)
itemSearchImpl <- OItemSearch(store)
fulltextImpl <- OFulltext(itemSearchImpl, ftsClient, store, queue, joexImpl)
javaEmil =

View File

@ -0,0 +1,71 @@
/*
* Copyright 2020 Docspell Contributors
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
package docspell.backend.fulltext
import cats.data.NonEmptyList
import cats.effect._
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.ftsclient.TextData
import docspell.store.Store
import docspell.store.queries.QAttachment
import docspell.store.queries.QItem
trait CreateIndex[F[_]] {
/** Low-level function to re-index data. It is not submitted as a job,
* but invoked on the current machine.
*/
def reIndexData(
logger: Logger[F],
collective: Option[Ident],
itemIds: Option[NonEmptyList[Ident]],
chunkSize: Int
): F[Unit]
}
object CreateIndex {
def resource[F[_]](fts: FtsClient[F], store: Store[F]): Resource[F, CreateIndex[F]] =
Resource.pure(apply(fts, store))
def apply[F[_]](fts: FtsClient[F], store: Store[F]): CreateIndex[F] =
new CreateIndex[F] {
def reIndexData(
logger: Logger[F],
collective: Option[Ident],
itemIds: Option[NonEmptyList[Ident]],
chunkSize: Int
): F[Unit] = {
val attachs = store
.transact(QAttachment.allAttachmentMetaAndName(collective, itemIds, chunkSize))
.map(caa =>
TextData
.attachment(
caa.item,
caa.id,
caa.collective,
caa.folder,
caa.lang,
caa.name,
caa.content
)
)
val items = store
.transact(QItem.allNameAndNotes(collective, itemIds, chunkSize))
.map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
fts.indexData(logger, attachs ++ items)
}
}
}

View File

@ -11,6 +11,7 @@ import cats.effect.{Async, Resource}
import cats.implicits._
import docspell.backend.JobFactory
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient.FtsClient
import docspell.store.queries.{QAttachment, QItem, QMoveAttachment}
@ -212,6 +213,7 @@ object OItem {
def apply[F[_]: Async](
store: Store[F],
fts: FtsClient[F],
createIndex: CreateIndex[F],
queue: JobQueue[F],
joex: OJoex[F]
): Resource[F, OItem[F]] =
@ -588,12 +590,13 @@ object OItem {
items: NonEmptyList[Ident],
collective: Ident
): F[UpdateResult] =
UpdateResult.fromUpdate(
store
UpdateResult.fromUpdate(for {
n <- store
.transact(
RItem.restoreStateForCollective(items, ItemState.Created, collective)
)
)
_ <- createIndex.reIndexData(logger, collective.some, items.some, 10)
} yield n)
def setItemDate(
items: NonEmptyList[Ident],
@ -628,7 +631,10 @@ object OItem {
} yield n
def setDeletedState(items: NonEmptyList[Ident], collective: Ident): F[Int] =
store.transact(RItem.setState(items, collective, ItemState.Deleted))
for {
n <- store.transact(RItem.setState(items, collective, ItemState.Deleted))
_ <- items.traverse(id => fts.removeItem(logger, id))
} yield n
def getProposals(item: Ident, collective: Ident): F[MetaProposalList] =
store.transact(QAttachment.getMetaProposals(item, collective))

View File

@ -231,6 +231,9 @@ object OSimpleSearch {
case Some(ftq) if settings.useFTS =>
if (q.isEmpty) {
logger.debug(s"Using index only search: $fulltextQuery")
if (settings.searchMode == SearchMode.Trashed)
Items.ftsItemsFull(true)(Vector.empty).pure[F]
else
fts
.findIndexOnly(settings.maxNoteLen)(
OFulltext.FtsInput(ftq),

View File

@ -72,4 +72,5 @@ object TextData {
notes: Option[String]
): TextData =
Item(item, collective, folder, name, notes)
}

View File

@ -13,6 +13,7 @@ import cats.implicits._
import fs2.concurrent.SignallingRef
import docspell.analysis.TextAnalyser
import docspell.backend.fulltext.CreateIndex
import docspell.backend.ops._
import docspell.common._
import docspell.ftsclient.FtsClient
@ -116,7 +117,8 @@ object JoexAppImpl {
joex <- OJoex(client, store)
upload <- OUpload(store, queue, cfg.files, joex)
fts <- createFtsClient(cfg)(httpClient)
itemOps <- OItem(store, fts, queue, joex)
createIndex <- CreateIndex.resource(fts, store)
itemOps <- OItem(store, fts, createIndex, queue, joex)
itemSearchOps <- OItemSearch(store)
analyser <- TextAnalyser.create[F](cfg.textAnalysis.textAnalysisConfig)
regexNer <- RegexNerFile(cfg.textAnalysis.regexNerFileConfig, store)
@ -155,14 +157,14 @@ object JoexAppImpl {
.withTask(
JobTask.json(
MigrationTask.taskName,
MigrationTask[F](cfg.fullTextSearch, fts),
MigrationTask[F](cfg.fullTextSearch, fts, createIndex),
MigrationTask.onCancel[F]
)
)
.withTask(
JobTask.json(
ReIndexTask.taskName,
ReIndexTask[F](cfg.fullTextSearch, fts),
ReIndexTask[F](cfg.fullTextSearch, fts, createIndex),
ReIndexTask.onCancel[F]
)
)

View File

@ -6,6 +6,7 @@
package docspell.joex.fts
import docspell.backend.fulltext.CreateIndex
import docspell.common.Logger
import docspell.ftsclient.FtsClient
import docspell.joex.Config
@ -15,6 +16,7 @@ import docspell.store.Store
case class FtsContext[F[_]](
cfg: Config.FullTextSearch,
store: Store[F],
fulltext: CreateIndex[F],
fts: FtsClient[F],
logger: Logger[F]
)
@ -24,7 +26,8 @@ object FtsContext {
def apply[F[_]](
cfg: Config.FullTextSearch,
fts: FtsClient[F],
fulltext: CreateIndex[F],
ctx: Context[F, _]
): FtsContext[F] =
FtsContext(cfg, ctx.store, fts, ctx.logger)
FtsContext(cfg, ctx.store, fulltext, fts, ctx.logger)
}

View File

@ -10,11 +10,11 @@ import cats._
import cats.data.{Kleisli, NonEmptyList}
import cats.implicits._
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
import docspell.joex.scheduler.Context
import docspell.store.queries.{QAttachment, QItem}
object FtsWork {
import syntax._
@ -88,36 +88,8 @@ object FtsWork {
log[F](_.info("Inserting all data to index")) ++ FtsWork
.all(
FtsWork(ctx =>
ctx.fts.indexData(
ctx.logger,
ctx.store
.transact(
QAttachment
.allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk)
)
.map(caa =>
TextData
.attachment(
caa.item,
caa.id,
caa.collective,
caa.folder,
caa.lang,
caa.name,
caa.content
)
)
)
),
FtsWork(ctx =>
ctx.fts.indexData(
ctx.logger,
ctx.store
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
.map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
)
ctx.fulltext
.reIndexData(ctx.logger, coll, None, ctx.cfg.migration.indexAllChunk)
)
)
@ -133,9 +105,10 @@ object FtsWork {
def forContext(
cfg: Config.FullTextSearch,
fts: FtsClient[F]
fts: FtsClient[F],
fulltext: CreateIndex[F]
): Kleisli[F, Context[F, _], Unit] =
mt.local(ctx => FtsContext(cfg, fts, ctx))
mt.local(ctx => FtsContext(cfg, fts, fulltext, ctx))
}
}
}

View File

@ -11,6 +11,7 @@ import cats.effect._
import cats.implicits._
import cats.{Applicative, FlatMap, Traverse}
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
@ -38,9 +39,10 @@ object Migration {
cfg: Config.FullTextSearch,
fts: FtsClient[F],
store: Store[F],
createIndex: CreateIndex[F],
logger: Logger[F]
): Kleisli[F, List[Migration[F]], Unit] = {
val ctx = FtsContext(cfg, store, fts, logger)
val ctx = FtsContext(cfg, store, createIndex, fts, logger)
Kleisli { migs =>
if (migs.isEmpty) logger.info("No fulltext search migrations to run.")
else Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ())

View File

@ -9,6 +9,7 @@ package docspell.joex.fts
import cats.effect._
import cats.implicits._
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
@ -20,7 +21,8 @@ object MigrationTask {
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
fts: FtsClient[F],
createIndex: CreateIndex[F]
): Task[F, Unit, Unit] =
Task
.log[F, Unit](_.info(s"Running full-text-index migrations now"))
@ -28,7 +30,7 @@ object MigrationTask {
Task(ctx =>
for {
migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, ctx.logger).run(migs)
res <- Migration[F](cfg, fts, ctx.store, createIndex, ctx.logger).run(migs)
} yield res
)
)

View File

@ -8,6 +8,7 @@ package docspell.joex.fts
import cats.effect._
import docspell.backend.fulltext.CreateIndex
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
@ -22,12 +23,15 @@ object ReIndexTask {
def apply[F[_]: Async](
cfg: Config.FullTextSearch,
fts: FtsClient[F]
fts: FtsClient[F],
fulltext: CreateIndex[F]
): Task[F, Args, Unit] =
Task
.log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ =>
Task(ctx => clearData[F](ctx.args.collective).forContext(cfg, fts).run(ctx))
Task(ctx =>
clearData[F](ctx.args.collective).forContext(cfg, fts, fulltext).run(ctx)
)
)
def onCancel[F[_]]: Task[F, Args, Unit] =

View File

@ -7,6 +7,7 @@
package docspell.store.queries
import cats.data.OptionT
import cats.data.{NonEmptyList => Nel}
import cats.effect.Sync
import cats.implicits._
import fs2.Stream
@ -174,6 +175,7 @@ object QAttachment {
)
def allAttachmentMetaAndName(
coll: Option[Ident],
itemIds: Option[Nel[Ident]],
chunkSize: Int
): Stream[ConnectionIO, ContentAndName] =
Select(
@ -190,8 +192,11 @@ object QAttachment {
.innerJoin(am, am.id === a.id)
.innerJoin(item, item.id === a.itemId)
.innerJoin(c, c.id === item.cid)
).where(coll.map(cid => item.cid === cid))
.build
).where(
item.state.in(ItemState.validStates) &&?
itemIds.map(ids => item.id.in(ids)) &&?
coll.map(cid => item.cid === cid)
).build
.query[ContentAndName]
.streamWithChunkSize(chunkSize)

View File

@ -502,6 +502,7 @@ object QItem {
.leftJoin(m3, m3.id === r.fileId),
where(
i.cid === collective &&
i.state.in(ItemState.validStates) &&
Condition.Or(fms.map(m => m.checksum === checksum)) &&?
Nel
.fromList(excludeFileMeta.toList)
@ -519,6 +520,7 @@ object QItem {
)
def allNameAndNotes(
coll: Option[Ident],
itemIds: Option[Nel[Ident]],
chunkSize: Int
): Stream[ConnectionIO, NameAndNotes] = {
val i = RItem.as("i")
@ -526,8 +528,11 @@ object QItem {
Select(
select(i.id, i.cid, i.folder, i.name, i.notes),
from(i)
).where(coll.map(cid => i.cid === cid))
.build
).where(
i.state.in(ItemState.validStates) &&?
itemIds.map(ids => i.id.in(ids)) &&?
coll.map(cid => i.cid === cid)
).build
.query[NameAndNotes]
.streamWithChunkSize(chunkSize)
}