Fix global re-index task to re-create the schema

Otherwise new instances could not be re-indexed.
This commit is contained in:
Eike Kettner 2020-06-25 22:55:23 +02:00
parent 4a41168bbb
commit dc8f1a0387
5 changed files with 56 additions and 11 deletions

View File

@ -44,19 +44,19 @@ final class SolrFtsClient[F[_]: Effect](
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] = def removeItem(logger: Logger[F], itemId: Ident): F[Unit] =
logger.debug(s"Remove item '${itemId.id}' from index") *> logger.debug(s"Remove item '${itemId.id}' from index") *>
solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}") solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}", None)
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] = def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] =
logger.debug(s"Remove attachment '${attachId.id}' from index") *> logger.debug(s"Remove attachment '${attachId.id}' from index") *>
solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}") solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}", None)
def clearAll(logger: Logger[F]): F[Unit] = def clearAll(logger: Logger[F]): F[Unit] =
logger.info("Deleting complete full-text index!") *> logger.info("Deleting complete full-text index!") *>
solrUpdate.delete("*:*") solrUpdate.delete("*:*", Option(0))
def clear(logger: Logger[F], collective: Ident): F[Unit] = def clear(logger: Logger[F], collective: Ident): F[Unit] =
logger.info(s"Deleting full-text index for collective ${collective.id}") *> logger.info(s"Deleting full-text index for collective ${collective.id}") *>
solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}") solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}", Option(0))
} }
object SolrFtsClient { object SolrFtsClient {

View File

@ -17,7 +17,7 @@ trait SolrUpdate[F[_]] {
def update(tds: List[TextData]): F[Unit] def update(tds: List[TextData]): F[Unit]
def delete(q: String): F[Unit] def delete(q: String, commitWithin: Option[Int]): F[Unit]
} }
object SolrUpdate { object SolrUpdate {
@ -42,8 +42,16 @@ object SolrUpdate {
client.expect[Unit](req) client.expect[Unit](req)
} }
def delete(q: String): F[Unit] = { def delete(q: String, commitWithin: Option[Int]): F[Unit] = {
val req = Method.POST(Delete(q).asJson, url) val uri = commitWithin match {
case Some(n) =>
if (n <= 0)
url.removeQueryParam("commitWithin").withQueryParam("commit", "true")
else url.withQueryParam("commitWithin", n.toString)
case None =>
url
}
val req = Method.POST(Delete(q).asJson, uri)
client.expect[Unit](req) client.expect[Unit](req)
} }

View File

@ -2,7 +2,8 @@ package docspell.joex.fts
import cats.effect._ import cats.effect._
import cats.data.{Kleisli, NonEmptyList} import cats.data.{Kleisli, NonEmptyList}
import cats.{FlatMap, Semigroup} import cats.{ApplicativeError, FlatMap, Semigroup}
import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.joex.scheduler.Context import docspell.joex.scheduler.Context
@ -24,6 +25,9 @@ object FtsWork {
// some tasks // some tasks
def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] =
FtsWork(ctx => f(ctx.logger))
def initialize[F[_]]: FtsWork[F] = def initialize[F[_]]: FtsWork[F] =
FtsWork(_.fts.initialize) FtsWork(_.fts.initialize)
@ -74,6 +78,11 @@ object FtsWork {
def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] = def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] =
all(mt, mn) all(mt, mn)
def recoverWith(
other: FtsWork[F]
)(implicit ev: ApplicativeError[F, Throwable]): FtsWork[F] =
Kleisli(ctx => mt.run(ctx).onError({ case _ => other.run(ctx) }))
def forContext( def forContext(
cfg: Config.FullTextSearch, cfg: Config.FullTextSearch,
fts: FtsClient[F] fts: FtsClient[F]

View File

@ -21,7 +21,9 @@ object ReIndexTask {
.log[F, Args](_.info(s"Running full-text re-index now")) .log[F, Args](_.info(s"Running full-text re-index now"))
.flatMap(_ => .flatMap(_ =>
Task(ctx => Task(ctx =>
(FtsWork.clearIndex(ctx.args.collective) ++ FtsWork.insertAll[F]( (clearData[F](ctx.args.collective) ++
FtsWork.log[F](_.info("Inserting data from database")) ++
FtsWork.insertAll[F](
ctx.args.collective ctx.args.collective
)).forContext(cfg, fts).run(ctx) )).forContext(cfg, fts).run(ctx)
) )
@ -30,4 +32,23 @@ object ReIndexTask {
def onCancel[F[_]: Sync]: Task[F, Args, Unit] = def onCancel[F[_]: Sync]: Task[F, Args, Unit] =
Task.log[F, Args](_.warn("Cancelling full-text re-index task")) Task.log[F, Args](_.warn("Cancelling full-text re-index task"))
private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] =
FtsWork.log[F](_.info("Clearing index data")) ++
(collective match {
case Some(_) =>
FtsWork
.clearIndex(collective)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
)
case None =>
FtsWork
.clearIndex(None)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++
FtsWork.log[F](_.info("Running index initialize")) ++
FtsWork.initialize[F]
})
} }

View File

@ -126,6 +126,13 @@ empty (the default), this REST call is disabled. Otherwise, the POST
request will submit a system task that is executed by a joex instance request will submit a system task that is executed by a joex instance
eventually. eventually.
Using this endpoint, the index will be re-created. This is sometimes
necessary, for example if you upgrade SOLR or delete the core to
provide a new one (see
[here](https://lucene.apache.org/solr/guide/8_4/reindexing.html) for
details). Note that a collective can also re-index their data using a
similiar endpoint; but this is only deleting their data and doesn't do
a full re-index.
### Bind ### Bind