Fix global re-index task to re-create the schema

Otherwise new instances could not be re-indexed.
This commit is contained in:
Eike Kettner 2020-06-25 22:55:23 +02:00
parent 4a41168bbb
commit dc8f1a0387
5 changed files with 56 additions and 11 deletions

View File

@ -44,19 +44,19 @@ final class SolrFtsClient[F[_]: Effect](
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
logger.debug(s"Remove item '${itemId.id}' from index") *>
solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}")
solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}", None)
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
logger.debug(s"Remove attachment '${attachId.id}' from index") *>
solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}")
solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}", None)
def clearAll(logger: Logger[F]): F[Unit] =
logger.info("Deleting complete full-text index!") *>
solrUpdate.delete("*:*")
solrUpdate.delete("*:*", Option(0))
def clear(logger: Logger[F], collective: Ident): F[Unit] =
logger.info(s"Deleting full-text index for collective ${collective.id}") *>
solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}")
solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}", Option(0))
}
object SolrFtsClient {

View File

@ -17,7 +17,7 @@ trait SolrUpdate[F[_]] {
def update(tds: List[TextData]): F[Unit]
def delete(q: String): F[Unit]
def delete(q: String, commitWithin: Option[Int]): F[Unit]
}
object SolrUpdate {
@ -42,8 +42,16 @@ object SolrUpdate {
client.expect[Unit](req)
}
def delete(q: String): F[Unit] = {
val req = Method.POST(Delete(q).asJson, url)
def delete(q: String, commitWithin: Option[Int]): F[Unit] = {
val uri = commitWithin match {
case Some(n) =>
if (n <= 0)
url.removeQueryParam("commitWithin").withQueryParam("commit", "true")
else url.withQueryParam("commitWithin", n.toString)
case None =>
url
}
val req = Method.POST(Delete(q).asJson, uri)
client.expect[Unit](req)
}

View File

@ -2,7 +2,8 @@ package docspell.joex.fts
import cats.effect._
import cats.data.{Kleisli, NonEmptyList}
import cats.{FlatMap, Semigroup}
import cats.{ApplicativeError, FlatMap, Semigroup}
import cats.implicits._
import docspell.common._
import docspell.ftsclient._
import docspell.joex.scheduler.Context
@ -24,6 +25,9 @@ object FtsWork {
// some tasks
def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] =
FtsWork(ctx => f(ctx.logger))
def initialize[F[_]]: FtsWork[F] =
FtsWork(_.fts.initialize)
@ -74,6 +78,11 @@ object FtsWork {
def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] =
all(mt, mn)
def recoverWith(
other: FtsWork[F]
)(implicit ev: ApplicativeError[F, Throwable]): FtsWork[F] =
Kleisli(ctx => mt.run(ctx).onError({ case _ => other.run(ctx) }))
def forContext(
cfg: Config.FullTextSearch,
fts: FtsClient[F]

View File

@ -21,13 +21,34 @@ object ReIndexTask {
.log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ =>
Task(ctx =>
(FtsWork.clearIndex(ctx.args.collective) ++ FtsWork.insertAll[F](
ctx.args.collective
)).forContext(cfg, fts).run(ctx)
(clearData[F](ctx.args.collective) ++
FtsWork.log[F](_.info("Inserting data from database")) ++
FtsWork.insertAll[F](
ctx.args.collective
)).forContext(cfg, fts).run(ctx)
)
)
def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] =
FtsWork.log[F](_.info("Clearing index data")) ++
(collective match {
case Some(_) =>
FtsWork
.clearIndex(collective)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
)
case None =>
FtsWork
.clearIndex(None)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++
FtsWork.log[F](_.info("Running index initialize")) ++
FtsWork.initialize[F]
})
}

View File

@ -126,6 +126,13 @@ empty (the default), this REST call is disabled. Otherwise, the POST
request will submit a system task that is executed by a joex instance
eventually.
Using this endpoint, the index will be re-created. This is sometimes
necessary, for example if you upgrade SOLR or delete the core to
provide a new one (see
[here](https://lucene.apache.org/solr/guide/8_4/reindexing.html) for
details). Note that a collective can also re-index their data using a
similiar endpoint; but this is only deleting their data and doesn't do
a full re-index.
### Bind