mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-05-28 19:19:15 +00:00
Add backend operations for re-creating the full-text index
This commit is contained in:
parent
14ea4091c4
commit
0d8b03fc61
@ -61,7 +61,7 @@ object BackendApp {
|
||||
jobImpl <- OJob(store, joexImpl)
|
||||
itemImpl <- OItem(store, solrFts)
|
||||
itemSearchImpl <- OItemSearch(store)
|
||||
fulltextImpl <- OFulltext(itemSearchImpl, solrFts)
|
||||
fulltextImpl <- OFulltext(itemSearchImpl, solrFts, store, queue)
|
||||
javaEmil =
|
||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||
mailImpl <- OMail(store, javaEmil)
|
||||
|
@ -0,0 +1,90 @@
|
||||
package docspell.backend
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
import docspell.store.records.RJob
|
||||
|
||||
object JobFactory {
|
||||
|
||||
def processItem[F[_]: Sync](
|
||||
args: ProcessItemArgs,
|
||||
account: AccountId,
|
||||
prio: Priority,
|
||||
tracker: Option[Ident]
|
||||
): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
job = RJob.newJob(
|
||||
id,
|
||||
ProcessItemArgs.taskName,
|
||||
account.collective,
|
||||
args,
|
||||
args.makeSubject,
|
||||
now,
|
||||
account.user,
|
||||
prio,
|
||||
tracker
|
||||
)
|
||||
} yield job
|
||||
|
||||
def processItems[F[_]: Sync](
|
||||
args: Vector[ProcessItemArgs],
|
||||
account: AccountId,
|
||||
prio: Priority,
|
||||
tracker: Option[Ident]
|
||||
): F[Vector[RJob]] = {
|
||||
def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob =
|
||||
RJob.newJob(
|
||||
id,
|
||||
ProcessItemArgs.taskName,
|
||||
account.collective,
|
||||
arg,
|
||||
arg.makeSubject,
|
||||
now,
|
||||
account.user,
|
||||
prio,
|
||||
tracker
|
||||
)
|
||||
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
jobs = args.map(a => create(id, now, a))
|
||||
} yield jobs
|
||||
}
|
||||
|
||||
def reIndexAll[F[_]: Sync]: F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
ReIndexTaskArgs.taskName,
|
||||
DocspellSystem.taskGroup,
|
||||
ReIndexTaskArgs(None),
|
||||
s"Recreate full-text index",
|
||||
now,
|
||||
DocspellSystem.taskGroup,
|
||||
Priority.Low,
|
||||
Some(DocspellSystem.migrationTaskTracker)
|
||||
)
|
||||
|
||||
def reIndex[F[_]: Sync](account: AccountId): F[RJob] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
args = ReIndexTaskArgs(Some(account.collective))
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
ReIndexTaskArgs.taskName,
|
||||
account.collective,
|
||||
args,
|
||||
s"Recreate full-text index",
|
||||
now,
|
||||
account.user,
|
||||
Priority.Low,
|
||||
Some(ReIndexTaskArgs.tracker(args))
|
||||
)
|
||||
}
|
@ -5,6 +5,10 @@ import cats.implicits._
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.ftsclient._
|
||||
import docspell.backend.JobFactory
|
||||
import docspell.store.Store
|
||||
import docspell.store.records.RJob
|
||||
import docspell.store.queue.JobQueue
|
||||
import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
|
||||
|
||||
trait OFulltext[F[_]] {
|
||||
@ -22,7 +26,7 @@ trait OFulltext[F[_]] {
|
||||
/** Clears the full-text index for the given collective and starts a
|
||||
* task indexing all their data.
|
||||
*/
|
||||
def reindexCollective(collective: Ident): F[Unit]
|
||||
def reindexCollective(account: AccountId): F[Unit]
|
||||
}
|
||||
|
||||
object OFulltext {
|
||||
@ -32,12 +36,27 @@ object OFulltext {
|
||||
|
||||
def apply[F[_]: Effect](
|
||||
itemSearch: OItemSearch[F],
|
||||
fts: FtsClient[F]
|
||||
fts: FtsClient[F],
|
||||
store: Store[F],
|
||||
queue: JobQueue[F]
|
||||
): Resource[F, OFulltext[F]] =
|
||||
Resource.pure[F, OFulltext[F]](new OFulltext[F] {
|
||||
def reindexAll: F[Unit] = ???
|
||||
def reindexAll: F[Unit] =
|
||||
for {
|
||||
job <- JobFactory.reIndexAll[F]
|
||||
_ <- queue.insertIfNew(job)
|
||||
} yield ()
|
||||
|
||||
def reindexCollective(collective: Ident): F[Unit] = ???
|
||||
def reindexCollective(account: AccountId): F[Unit] =
|
||||
for {
|
||||
exist <- store.transact(
|
||||
RJob.findNonFinalByTracker(DocspellSystem.migrationTaskTracker)
|
||||
)
|
||||
job <- JobFactory.reIndex(account)
|
||||
_ <-
|
||||
if (exist.isDefined) ().pure[F]
|
||||
else queue.insertIfNew(job)
|
||||
} yield ()
|
||||
|
||||
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
|
||||
findItemsFts(q, ftsQ, batch, itemSearch.findItems)
|
||||
|
@ -5,7 +5,7 @@ import cats.Functor
|
||||
import cats.data.{EitherT, OptionT}
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.backend.Config
|
||||
import docspell.backend.{Config, JobFactory}
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.all._
|
||||
@ -203,26 +203,7 @@ object OUpload {
|
||||
account: AccountId,
|
||||
prio: Priority,
|
||||
tracker: Option[Ident]
|
||||
): F[Vector[RJob]] = {
|
||||
def create(id: Ident, now: Timestamp, arg: ProcessItemArgs): RJob =
|
||||
RJob.newJob(
|
||||
id,
|
||||
ProcessItemArgs.taskName,
|
||||
account.collective,
|
||||
arg,
|
||||
arg.makeSubject,
|
||||
now,
|
||||
account.user,
|
||||
prio,
|
||||
tracker
|
||||
)
|
||||
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
jobs = args.map(a => create(id, now, a))
|
||||
} yield jobs
|
||||
|
||||
}
|
||||
): F[Vector[RJob]] =
|
||||
JobFactory.processItems[F](args, account, prio, tracker)
|
||||
})
|
||||
}
|
||||
|
@ -0,0 +1,8 @@
|
||||
package docspell.common
|
||||
|
||||
object DocspellSystem {
|
||||
|
||||
val taskGroup = Ident.unsafe("docspell-system")
|
||||
val migrationTaskTracker = Ident.unsafe("full-text-index-tracker")
|
||||
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package docspell.common
|
||||
|
||||
import io.circe._
|
||||
import io.circe.generic.semiauto._
|
||||
|
||||
final case class ReIndexTaskArgs(collective: Option[Ident])
|
||||
|
||||
object ReIndexTaskArgs {
|
||||
val taskName = Ident.unsafe("full-text-reindex")
|
||||
|
||||
def tracker(args: ReIndexTaskArgs): Ident =
|
||||
args.collective match {
|
||||
case Some(cid) =>
|
||||
cid / DocspellSystem.migrationTaskTracker
|
||||
case None =>
|
||||
DocspellSystem.migrationTaskTracker
|
||||
}
|
||||
|
||||
implicit val jsonEncoder: Encoder[ReIndexTaskArgs] =
|
||||
deriveEncoder[ReIndexTaskArgs]
|
||||
|
||||
implicit val jsonDecoder: Decoder[ReIndexTaskArgs] =
|
||||
deriveDecoder[ReIndexTaskArgs]
|
||||
}
|
@ -7,7 +7,7 @@ import docspell.common._
|
||||
import docspell.backend.ops._
|
||||
import docspell.joex.hk._
|
||||
import docspell.joex.notify._
|
||||
import docspell.joex.fts.MigrationTask
|
||||
import docspell.joex.fts.{MigrationTask, ReIndexTask}
|
||||
import docspell.joex.scanmailbox._
|
||||
import docspell.joex.process.ItemHandler
|
||||
import docspell.joex.scheduler._
|
||||
@ -111,6 +111,13 @@ object JoexAppImpl {
|
||||
MigrationTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
ReIndexTask.taskName,
|
||||
ReIndexTask[F](cfg.fullTextSearch, fts),
|
||||
ReIndexTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
HouseKeepingTask.taskName,
|
||||
|
@ -3,10 +3,11 @@ package docspell.joex.fts
|
||||
import cats.effect._
|
||||
import cats.data.{Kleisli, NonEmptyList}
|
||||
import cats.{FlatMap, Semigroup}
|
||||
import docspell.store.queries.{QAttachment, QItem}
|
||||
import docspell.common._
|
||||
import docspell.ftsclient._
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.Config
|
||||
import docspell.store.queries.{QAttachment, QItem}
|
||||
|
||||
object FtsWork {
|
||||
def apply[F[_]](f: FtsContext[F] => F[Unit]): FtsWork[F] =
|
||||
@ -21,23 +22,20 @@ object FtsWork {
|
||||
implicit def semigroup[F[_]: FlatMap]: Semigroup[FtsWork[F]] =
|
||||
Semigroup.instance((mt1, mt2) => mt1.flatMap(_ => mt2))
|
||||
|
||||
implicit final class FtsWorkOps[F[_]](mt: FtsWork[F]) {
|
||||
def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] =
|
||||
all(mt, mn)
|
||||
|
||||
def forContext(
|
||||
cfg: Config.FullTextSearch,
|
||||
fts: FtsClient[F]
|
||||
): Kleisli[F, Context[F, _], Unit] =
|
||||
mt.local(ctx => FtsContext(cfg, fts, ctx))
|
||||
}
|
||||
|
||||
// some tasks
|
||||
|
||||
def initialize[F[_]]: FtsWork[F] =
|
||||
FtsWork(_.fts.initialize)
|
||||
|
||||
def insertAll[F[_]: Effect]: FtsWork[F] =
|
||||
def clearIndex[F[_]](coll: Option[Ident]): FtsWork[F] =
|
||||
coll match {
|
||||
case Some(cid) =>
|
||||
FtsWork(ctx => ctx.fts.clear(ctx.logger, cid))
|
||||
case None =>
|
||||
FtsWork(ctx => ctx.fts.clearAll(ctx.logger))
|
||||
}
|
||||
|
||||
def insertAll[F[_]: Effect](coll: Option[Ident]): FtsWork[F] =
|
||||
FtsWork
|
||||
.all(
|
||||
FtsWork(ctx =>
|
||||
@ -45,7 +43,8 @@ object FtsWork {
|
||||
ctx.logger,
|
||||
ctx.store
|
||||
.transact(
|
||||
QAttachment.allAttachmentMetaAndName(ctx.cfg.migration.indexAllChunk)
|
||||
QAttachment
|
||||
.allAttachmentMetaAndName(coll, ctx.cfg.migration.indexAllChunk)
|
||||
)
|
||||
.map(caa =>
|
||||
TextData
|
||||
@ -64,9 +63,22 @@ object FtsWork {
|
||||
ctx.fts.indexData(
|
||||
ctx.logger,
|
||||
ctx.store
|
||||
.transact(QItem.allNameAndNotes(ctx.cfg.migration.indexAllChunk * 5))
|
||||
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
|
||||
.map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes))
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
object syntax {
|
||||
implicit final class FtsWorkOps[F[_]](mt: FtsWork[F]) {
|
||||
def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] =
|
||||
all(mt, mn)
|
||||
|
||||
def forContext(
|
||||
cfg: Config.FullTextSearch,
|
||||
fts: FtsClient[F]
|
||||
): Kleisli[F, Context[F, _], Unit] =
|
||||
mt.local(ctx => FtsContext(cfg, fts, ctx))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7,12 +7,9 @@ import docspell.joex.Config
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.ftsclient._
|
||||
import docspell.store.records.RJob
|
||||
import docspell.joex.hk.HouseKeepingTask
|
||||
|
||||
object MigrationTask {
|
||||
val taskName = Ident.unsafe("full-text-index")
|
||||
val tracker = Ident.unsafe("full-text-index-tracker")
|
||||
val systemGroup = HouseKeepingTask.systemGroup
|
||||
|
||||
def apply[F[_]: ConcurrentEffect](
|
||||
cfg: Config.FullTextSearch,
|
||||
@ -37,20 +34,20 @@ object MigrationTask {
|
||||
} yield RJob.newJob(
|
||||
id,
|
||||
taskName,
|
||||
systemGroup,
|
||||
DocspellSystem.taskGroup,
|
||||
(),
|
||||
"Create full-text index",
|
||||
now,
|
||||
systemGroup,
|
||||
DocspellSystem.taskGroup,
|
||||
Priority.Low,
|
||||
Some(tracker)
|
||||
Some(DocspellSystem.migrationTaskTracker)
|
||||
)
|
||||
|
||||
private val solrEngine = Ident.unsafe("solr")
|
||||
def migrationTasks[F[_]: Effect]: List[Migration[F]] =
|
||||
List(
|
||||
Migration[F](1, solrEngine, "initialize", FtsWork.initialize[F]),
|
||||
Migration[F](2, solrEngine, "Index all from database", FtsWork.insertAll[F])
|
||||
Migration[F](2, solrEngine, "Index all from database", FtsWork.insertAll[F](None))
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
package docspell.joex.fts
|
||||
|
||||
import cats.effect._
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.ftsclient._
|
||||
import FtsWork.syntax._
|
||||
|
||||
object ReIndexTask {
|
||||
type Args = ReIndexTaskArgs
|
||||
|
||||
val taskName = ReIndexTaskArgs.taskName
|
||||
val tracker = DocspellSystem.migrationTaskTracker
|
||||
|
||||
def apply[F[_]: ConcurrentEffect](
|
||||
cfg: Config.FullTextSearch,
|
||||
fts: FtsClient[F]
|
||||
): Task[F, Args, Unit] =
|
||||
Task
|
||||
.log[F, Args](_.info(s"Running full-text re-index now"))
|
||||
.flatMap(_ =>
|
||||
Task(ctx =>
|
||||
(FtsWork.clearIndex(ctx.args.collective) ++ FtsWork.insertAll[F](
|
||||
ctx.args.collective
|
||||
)).forContext(cfg, fts).run(ctx)
|
||||
)
|
||||
)
|
||||
|
||||
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
|
||||
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
|
||||
|
||||
}
|
@ -11,7 +11,6 @@ import docspell.store.records._
|
||||
|
||||
object HouseKeepingTask {
|
||||
private val periodicId = Ident.unsafe("docspell-houskeeping")
|
||||
val systemGroup: Ident = Ident.unsafe("docspell-system")
|
||||
|
||||
val taskName: Ident = Ident.unsafe("housekeeping")
|
||||
|
||||
@ -29,10 +28,10 @@ object HouseKeepingTask {
|
||||
.createJson(
|
||||
true,
|
||||
taskName,
|
||||
systemGroup,
|
||||
DocspellSystem.taskGroup,
|
||||
(),
|
||||
"Docspell house-keeping",
|
||||
systemGroup,
|
||||
DocspellSystem.taskGroup,
|
||||
Priority.Low,
|
||||
ce
|
||||
)
|
||||
|
@ -147,7 +147,10 @@ object QAttachment {
|
||||
name: Option[String],
|
||||
content: Option[String]
|
||||
)
|
||||
def allAttachmentMetaAndName(chunkSize: Int): Stream[ConnectionIO, ContentAndName] = {
|
||||
def allAttachmentMetaAndName(
|
||||
coll: Option[Ident],
|
||||
chunkSize: Int
|
||||
): Stream[ConnectionIO, ContentAndName] = {
|
||||
val aId = RAttachment.Columns.id.prefix("a")
|
||||
val aItem = RAttachment.Columns.itemId.prefix("a")
|
||||
val aName = RAttachment.Columns.name.prefix("a")
|
||||
@ -164,7 +167,9 @@ object QAttachment {
|
||||
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++
|
||||
fr"INNER JOIN" ++ RCollective.table ++ fr"c ON" ++ cId.is(iColl)
|
||||
|
||||
selectSimple(cols, from, Fragment.empty)
|
||||
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
|
||||
|
||||
selectSimple(cols, from, where)
|
||||
.query[ContentAndName]
|
||||
.streamWithChunkSize(chunkSize)
|
||||
}
|
||||
|
@ -475,14 +475,18 @@ object QItem {
|
||||
name: String,
|
||||
notes: Option[String]
|
||||
)
|
||||
def allNameAndNotes(chunkSize: Int): Stream[ConnectionIO, NameAndNotes] = {
|
||||
def allNameAndNotes(
|
||||
coll: Option[Ident],
|
||||
chunkSize: Int
|
||||
): Stream[ConnectionIO, NameAndNotes] = {
|
||||
val iId = RItem.Columns.id
|
||||
val iColl = RItem.Columns.cid
|
||||
val iName = RItem.Columns.name
|
||||
val iNotes = RItem.Columns.notes
|
||||
|
||||
val cols = Seq(iId, iColl, iName, iNotes)
|
||||
selectSimple(cols, RItem.table, Fragment.empty)
|
||||
val cols = Seq(iId, iColl, iName, iNotes)
|
||||
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
|
||||
selectSimple(cols, RItem.table, where)
|
||||
.query[NameAndNotes]
|
||||
.streamWithChunkSize(chunkSize)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user