mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Add task to index current database state
This commit is contained in:
@ -368,6 +368,9 @@ docspell.joex {
|
||||
# Configuration of the full-text search engine.
|
||||
full-text-search {
|
||||
enabled = true
|
||||
migration = {
|
||||
index-all-chunk = 10
|
||||
}
|
||||
solr = {
|
||||
url = "http://localhost:8983/solr/docspell_core"
|
||||
}
|
||||
|
@ -37,5 +37,14 @@ object Config {
|
||||
}
|
||||
case class UserTasks(scanMailbox: ScanMailbox)
|
||||
|
||||
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
|
||||
case class FullTextSearch(
|
||||
enabled: Boolean,
|
||||
migration: FullTextSearch.Migration,
|
||||
solr: SolrConfig
|
||||
)
|
||||
|
||||
object FullTextSearch {
|
||||
|
||||
final case class Migration(indexAllChunk: Int)
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import docspell.common._
|
||||
import docspell.backend.ops._
|
||||
import docspell.joex.hk._
|
||||
import docspell.joex.notify._
|
||||
import docspell.joex.fts.IndexTask
|
||||
import docspell.joex.scanmailbox._
|
||||
import docspell.joex.process.ItemHandler
|
||||
import docspell.joex.scheduler._
|
||||
@ -23,6 +24,7 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
|
||||
cfg: Config,
|
||||
nodeOps: ONode[F],
|
||||
store: Store[F],
|
||||
queue: JobQueue[F],
|
||||
pstore: PeriodicTaskStore[F],
|
||||
termSignal: SignallingRef[F, Boolean],
|
||||
val scheduler: Scheduler[F],
|
||||
@ -52,7 +54,9 @@ final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
|
||||
periodicScheduler.shutdown *> scheduler.shutdown(false) *> termSignal.set(true)
|
||||
|
||||
private def scheduleBackgroundTasks: F[Unit] =
|
||||
HouseKeepingTask.periodicTask[F](cfg.houseKeeping.schedule).flatMap(pstore.insert)
|
||||
HouseKeepingTask
|
||||
.periodicTask[F](cfg.houseKeeping.schedule)
|
||||
.flatMap(pstore.insert) *> IndexTask.job.flatMap(queue.insert)
|
||||
}
|
||||
|
||||
object JoexAppImpl {
|
||||
@ -99,6 +103,13 @@ object JoexAppImpl {
|
||||
ScanMailboxTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
IndexTask.taskName,
|
||||
IndexTask[F](cfg.fullTextSearch, fts),
|
||||
IndexTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
HouseKeepingTask.taskName,
|
||||
@ -115,7 +126,7 @@ object JoexAppImpl {
|
||||
client,
|
||||
Timer[F]
|
||||
)
|
||||
app = new JoexAppImpl(cfg, nodeOps, store, pstore, termSignal, sch, psch)
|
||||
app = new JoexAppImpl(cfg, nodeOps, store, queue, pstore, termSignal, sch, psch)
|
||||
appR <- Resource.make(app.init.map(_ => app))(_.shutdown)
|
||||
} yield appR
|
||||
}
|
||||
|
@ -0,0 +1,48 @@
|
||||
package docspell.joex.fts
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.ftsclient._
|
||||
import docspell.store.records.RJob
|
||||
import docspell.joex.hk.HouseKeepingTask
|
||||
|
||||
object IndexTask {
|
||||
val taskName: Ident = Ident.unsafe("full-text-index")
|
||||
val systemGroup = HouseKeepingTask.systemGroup
|
||||
|
||||
def apply[F[_]: ConcurrentEffect](
|
||||
cfg: Config.FullTextSearch,
|
||||
fts: FtsClient[F]
|
||||
): Task[F, Unit, Unit] =
|
||||
Task
|
||||
.log[F, Unit](_.info(s"Running full-text-index task now"))
|
||||
.flatMap(_ =>
|
||||
Task(ctx =>
|
||||
Migration[F](cfg, ctx.store, fts, ctx.logger)
|
||||
.run(Migration.migrationTasks[F])
|
||||
)
|
||||
)
|
||||
|
||||
def onCancel[F[_]: Sync]: Task[F, Unit, Unit] =
|
||||
Task.log[F, Unit](_.warn("Cancelling full-text-index task"))
|
||||
|
||||
def job[F[_]: Sync]: F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
taskName,
|
||||
systemGroup,
|
||||
(),
|
||||
"Create full-text index",
|
||||
now,
|
||||
systemGroup,
|
||||
Priority.Low,
|
||||
None
|
||||
)
|
||||
|
||||
}
|
110
modules/joex/src/main/scala/docspell/joex/fts/Migration.scala
Normal file
110
modules/joex/src/main/scala/docspell/joex/fts/Migration.scala
Normal file
@ -0,0 +1,110 @@
|
||||
package docspell.joex.fts
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import cats.data.{Kleisli, OptionT}
|
||||
import cats.Traverse
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.store.{AddResult, Store}
|
||||
import docspell.store.records.RFtsMigration
|
||||
import docspell.store.queries.{QAttachment, QItem}
|
||||
import docspell.ftsclient._
|
||||
|
||||
object Migration {
|
||||
private val solrEngine = Ident.unsafe("solr")
|
||||
|
||||
case class MigrateCtx[F[_]](
|
||||
cfg: Config.FullTextSearch,
|
||||
store: Store[F],
|
||||
fts: FtsClient[F],
|
||||
logger: Logger[F]
|
||||
)
|
||||
|
||||
case class Migration[F[_]](
|
||||
version: Int,
|
||||
engine: Ident,
|
||||
description: String,
|
||||
task: Kleisli[F, MigrateCtx[F], Unit]
|
||||
)
|
||||
|
||||
def apply[F[_]: Effect](
|
||||
cfg: Config.FullTextSearch,
|
||||
store: Store[F],
|
||||
fts: FtsClient[F],
|
||||
logger: Logger[F]
|
||||
): Kleisli[F, List[Migration[F]], Unit] = {
|
||||
val ctx = MigrateCtx(cfg, store, fts, logger)
|
||||
Kleisli(migs => Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ()))
|
||||
}
|
||||
|
||||
def applySingle[F[_]: Effect](ctx: MigrateCtx[F])(m: Migration[F]): F[Unit] = {
|
||||
val insertRecord: F[Option[RFtsMigration]] =
|
||||
for {
|
||||
rec <- RFtsMigration.create(m.version, m.engine, m.description)
|
||||
res <- ctx.store.add(
|
||||
RFtsMigration.insert(rec),
|
||||
RFtsMigration.exists(m.version, m.engine)
|
||||
)
|
||||
ret <- res match {
|
||||
case AddResult.Success => rec.some.pure[F]
|
||||
case AddResult.EntityExists(_) => None.pure[F]
|
||||
case AddResult.Failure(ex) => Effect[F].raiseError(ex)
|
||||
}
|
||||
} yield ret
|
||||
|
||||
(for {
|
||||
_ <- OptionT.liftF(ctx.logger.info(s"Apply ${m.version}/${m.description}"))
|
||||
rec <- OptionT(insertRecord)
|
||||
res <- OptionT.liftF(m.task.run(ctx).attempt)
|
||||
_ <- OptionT.liftF(res match {
|
||||
case Right(()) => ().pure[F]
|
||||
case Left(ex) =>
|
||||
ctx.logger.error(ex)(
|
||||
s"Applying index migration ${m.version}/${m.description} failed"
|
||||
) *>
|
||||
ctx.store.transact(RFtsMigration.deleteById(rec.id)) *> Effect[F].raiseError(
|
||||
ex
|
||||
)
|
||||
})
|
||||
} yield ()).getOrElseF(
|
||||
ctx.logger.info(s"Migration ${m.version}/${m.description} already applied.")
|
||||
)
|
||||
}
|
||||
|
||||
def migrationTasks[F[_]]: List[Migration[F]] =
|
||||
List(
|
||||
Migration[F](1, solrEngine, "initialize", Kleisli(ctx => ctx.fts.initialize)),
|
||||
Migration[F](
|
||||
2,
|
||||
solrEngine,
|
||||
"Index all attachments from database",
|
||||
Kleisli(ctx =>
|
||||
ctx.fts.indexData(
|
||||
ctx.logger,
|
||||
ctx.store
|
||||
.transact(
|
||||
QAttachment.allAttachmentMetaAndName(ctx.cfg.migration.indexAllChunk)
|
||||
)
|
||||
.map(caa =>
|
||||
TextData
|
||||
.attachment(caa.item, caa.id, caa.collective, caa.name, caa.content)
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
Migration[F](
|
||||
3,
|
||||
solrEngine,
|
||||
"Index all items from database",
|
||||
Kleisli(ctx =>
|
||||
ctx.fts.indexData(
|
||||
ctx.logger,
|
||||
ctx.store
|
||||
.transact(QItem.allNameAndNotes(ctx.cfg.migration.indexAllChunk * 5))
|
||||
.map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
@ -32,7 +32,7 @@ object TextExtraction {
|
||||
)
|
||||
_ <- ctx.logger.debug("Storing extracted texts")
|
||||
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
|
||||
_ <- fts.indexData(Stream.emits(txt.map(_._2)))
|
||||
_ <- fts.indexData(ctx.logger, Stream.emits(txt.map(_._2)))
|
||||
dur <- start
|
||||
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
|
||||
} yield item.copy(metas = txt.map(_._1))
|
||||
|
Reference in New Issue
Block a user