From dc8f1a038715bb5b93cdacd0bb7012ba4db6ce5b Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 25 Jun 2020 22:55:23 +0200 Subject: [PATCH] Fix global re-index task to re-create the schema Otherwise new instances could not be re-indexed. --- .../docspell/ftssolr/SolrFtsClient.scala | 8 +++--- .../scala/docspell/ftssolr/SolrUpdate.scala | 14 +++++++--- .../scala/docspell/joex/fts/FtsWork.scala | 11 +++++++- .../scala/docspell/joex/fts/ReIndexTask.scala | 27 ++++++++++++++++--- modules/microsite/docs/doc/configure.md | 7 +++++ 5 files changed, 56 insertions(+), 11 deletions(-) diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala index a950f1d4..4bdf3fda 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala @@ -44,19 +44,19 @@ final class SolrFtsClient[F[_]: Effect]( def removeItem(logger: Logger[F], itemId: Ident): F[Unit] = logger.debug(s"Remove item '${itemId.id}' from index") *> - solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}") + solrUpdate.delete(s"${Field.itemId.name}:${itemId.id}", None) def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] = logger.debug(s"Remove attachment '${attachId.id}' from index") *> - solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}") + solrUpdate.delete(s"${Field.attachmentId.name}:${attachId.id}", None) def clearAll(logger: Logger[F]): F[Unit] = logger.info("Deleting complete full-text index!") *> - solrUpdate.delete("*:*") + solrUpdate.delete("*:*", Option(0)) def clear(logger: Logger[F], collective: Ident): F[Unit] = logger.info(s"Deleting full-text index for collective ${collective.id}") *> - solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}") + solrUpdate.delete(s"${Field.collectiveId.name}:${collective.id}", Option(0)) } object SolrFtsClient { diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala index 7740bf00..fcfe1151 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala @@ -17,7 +17,7 @@ trait SolrUpdate[F[_]] { def update(tds: List[TextData]): F[Unit] - def delete(q: String): F[Unit] + def delete(q: String, commitWithin: Option[Int]): F[Unit] } object SolrUpdate { @@ -42,8 +42,16 @@ object SolrUpdate { client.expect[Unit](req) } - def delete(q: String): F[Unit] = { - val req = Method.POST(Delete(q).asJson, url) + def delete(q: String, commitWithin: Option[Int]): F[Unit] = { + val uri = commitWithin match { + case Some(n) => + if (n <= 0) + url.removeQueryParam("commitWithin").withQueryParam("commit", "true") + else url.withQueryParam("commitWithin", n.toString) + case None => + url + } + val req = Method.POST(Delete(q).asJson, uri) client.expect[Unit](req) } diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala index b90299f8..5e861b8d 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala @@ -2,7 +2,8 @@ package docspell.joex.fts import cats.effect._ import cats.data.{Kleisli, NonEmptyList} -import cats.{FlatMap, Semigroup} +import cats.{ApplicativeError, FlatMap, Semigroup} +import cats.implicits._ import docspell.common._ import docspell.ftsclient._ import docspell.joex.scheduler.Context @@ -24,6 +25,9 @@ object FtsWork { // some tasks + def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] = + FtsWork(ctx => f(ctx.logger)) + def initialize[F[_]]: FtsWork[F] = FtsWork(_.fts.initialize) @@ -74,6 +78,11 @@ object FtsWork { def ++(mn: FtsWork[F])(implicit ev: FlatMap[F]): FtsWork[F] = all(mt, mn) + def recoverWith( + other: FtsWork[F] + )(implicit ev: ApplicativeError[F, Throwable]): FtsWork[F] = + Kleisli(ctx => mt.run(ctx).onError({ case _ => other.run(ctx) })) + def forContext( cfg: Config.FullTextSearch, fts: FtsClient[F] diff --git a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala index 65654800..edb1cc0d 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/ReIndexTask.scala @@ -21,13 +21,34 @@ object ReIndexTask { .log[F, Args](_.info(s"Running full-text re-index now")) .flatMap(_ => Task(ctx => - (FtsWork.clearIndex(ctx.args.collective) ++ FtsWork.insertAll[F]( - ctx.args.collective - )).forContext(cfg, fts).run(ctx) + (clearData[F](ctx.args.collective) ++ + FtsWork.log[F](_.info("Inserting data from database")) ++ + FtsWork.insertAll[F]( + ctx.args.collective + )).forContext(cfg, fts).run(ctx) ) ) def onCancel[F[_]: Sync]: Task[F, Args, Unit] = Task.log[F, Args](_.warn("Cancelling full-text re-index task")) + private def clearData[F[_]: ConcurrentEffect](collective: Option[Ident]): FtsWork[F] = + FtsWork.log[F](_.info("Clearing index data")) ++ + (collective match { + case Some(_) => + FtsWork + .clearIndex(collective) + .recoverWith( + FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) + ) + + case None => + FtsWork + .clearIndex(None) + .recoverWith( + FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing.")) + ) ++ + FtsWork.log[F](_.info("Running index initialize")) ++ + FtsWork.initialize[F] + }) } diff --git a/modules/microsite/docs/doc/configure.md b/modules/microsite/docs/doc/configure.md index 80f90b00..1f170670 100644 --- a/modules/microsite/docs/doc/configure.md +++ b/modules/microsite/docs/doc/configure.md @@ -126,6 +126,13 @@ empty (the default), this REST call is disabled. Otherwise, the POST request will submit a system task that is executed by a joex instance eventually. +Using this endpoint, the index will be re-created. This is sometimes +necessary, for example if you upgrade SOLR or delete the core to +provide a new one (see +[here](https://lucene.apache.org/solr/guide/8_4/reindexing.html) for +details). Note that a collective can also re-index their data using a +similiar endpoint; but this is only deleting their data and doesn't do +a full re-index. ### Bind