Merge pull request #857 from eikek/solr-migration

Improve Solr migration
This commit is contained in:
mergify[bot] 2021-06-07 20:10:48 +00:00 committed by GitHub
commit ecc1e44e29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 279 additions and 188 deletions

View File

@ -17,12 +17,25 @@ import org.log4s.getLogger
*/
trait FtsClient[F[_]] {
/** Initialization tasks. This is called exactly once at the very
* beginning when initializing the full-text index and then never
* again (except when re-indexing everything). It may be used to
* setup the database.
/** Initialization tasks. This can be used to setup the fulltext
* search engine. The implementation is expected to keep track of
* run migrations, so that running these is idempotent. For
* example, it may be run on each application start.
*
* Initialization may involve re-indexing all data, therefore it
* must run outside the scope of this client. The migration may
* include a task that applies any work and/or it can return a
* result indicating that after this task a re-index is necessary.
*/
def initialize: List[FtsMigration[F]]
def initialize: F[List[FtsMigration[F]]]
/** A list of initialization tasks that can be run when re-creating
* the index.
*
* This is not run on startup, but only when required, for example
* when re-creating the entire index.
*/
def initializeNew: List[FtsMigration[F]]
/** Run a full-text search. */
def search(q: FtsQuery): F[FtsResult]
@ -116,7 +129,10 @@ object FtsClient {
new FtsClient[F] {
private[this] val logger = Logger.log4s[F](getLogger)
def initialize: List[FtsMigration[F]] =
def initialize: F[List[FtsMigration[F]]] =
Sync[F].pure(Nil)
def initializeNew: List[FtsMigration[F]] =
Nil
def search(q: FtsQuery): F[FtsResult] =

View File

@ -53,6 +53,37 @@ trait JsonCodec {
): Encoder[TextData] =
Encoder(_.fold(ae.apply, ie.apply))
implicit def versionDocEncoder: Encoder[VersionDoc] =
new Encoder[VersionDoc] {
final def apply(d: VersionDoc): Json =
Json.fromFields(
List(
(VersionDoc.Fields.id.name, d.id.asJson),
(
VersionDoc.Fields.currentVersion.name,
Map("set" -> d.currentVersion.asJson).asJson
)
)
)
}
implicit def decoderVersionDoc: Decoder[VersionDoc] =
new Decoder[VersionDoc] {
final def apply(c: HCursor): Decoder.Result[VersionDoc] =
for {
id <- c.get[String](VersionDoc.Fields.id.name)
version <- c.get[Int](VersionDoc.Fields.currentVersion.name)
} yield VersionDoc(id, version)
}
implicit def versionDocDecoder: Decoder[Option[VersionDoc]] =
new Decoder[Option[VersionDoc]] {
final def apply(c: HCursor): Decoder.Result[Option[VersionDoc]] =
c.downField("response")
.get[List[VersionDoc]]("docs")
.map(_.headOption)
}
implicit def docIdResultsDecoder: Decoder[DocIdResult] =
new Decoder[DocIdResult] {
final def apply(c: HCursor): Decoder.Result[DocIdResult] =

View File

@ -17,8 +17,11 @@ final class SolrFtsClient[F[_]: Effect](
solrQuery: SolrQuery[F]
) extends FtsClient[F] {
def initialize: List[FtsMigration[F]] =
solrSetup.setupSchema
def initialize: F[List[FtsMigration[F]]] =
solrSetup.remainingSetup.map(_.map(_.value))
def initializeNew: List[FtsMigration[F]] =
solrSetup.setupSchema.map(_.value)
def search(q: FtsQuery): F[FtsResult] =
solrQuery.query(q)

View File

@ -0,0 +1,73 @@
package docspell.ftssolr
import cats.implicits._
import cats.{Applicative, Functor}
import docspell.common._
import docspell.ftsclient.FtsMigration
final case class SolrMigration[F[_]](value: FtsMigration[F], dataChangeOnly: Boolean) {
def isSchemaChange: Boolean = !dataChangeOnly
}
object SolrMigration {
private val solrEngine = Ident.unsafe("solr")
def deleteData[F[_]: Functor](
version: Int,
solrUpdate: SolrUpdate[F]
): SolrMigration[F] =
apply(version, "Delete all data", solrUpdate.delete("*:*", Option(0)))
def writeVersion[F[_]: Functor](
solrUpdate: SolrUpdate[F],
doc: VersionDoc
): SolrMigration[F] =
apply(
Int.MaxValue,
s"Write current version: ${doc.currentVersion}",
solrUpdate.updateVersionDoc(doc)
)
def reIndexAll[F[_]: Applicative](
versionNumber: Int,
description: String
): SolrMigration[F] =
SolrMigration(
FtsMigration(
versionNumber,
solrEngine,
description,
FtsMigration.Result.reIndexAll.pure[F]
),
true
)
def indexAll[F[_]: Applicative](
versionNumber: Int,
description: String
): SolrMigration[F] =
SolrMigration(
FtsMigration(
versionNumber,
solrEngine,
description,
FtsMigration.Result.indexAll.pure[F]
),
true
)
def apply[F[_]: Functor](
version: Int,
description: String,
task: F[Unit]
): SolrMigration[F] =
SolrMigration(
FtsMigration(
version,
solrEngine,
description,
task.map(_ => FtsMigration.Result.workDone)
),
false
)
}

View File

@ -17,6 +17,8 @@ trait SolrQuery[F[_]] {
def query(q: QueryData): F[FtsResult]
def query(q: FtsQuery): F[FtsResult]
def findVersionDoc(id: String): F[Option[VersionDoc]]
}
object SolrQuery {
@ -54,6 +56,16 @@ object SolrQuery {
)
query(fq)
}
def findVersionDoc(id: String): F[Option[VersionDoc]] = {
val fields = List(
Field.id,
Field("current_version_i")
)
val query = QueryData(s"id:$id", "", 1, 0, fields, Map.empty)
val req = Method.POST(query.asJson, url)
client.expect[Option[VersionDoc]](req)
}
}
}
}

View File

@ -4,7 +4,6 @@ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.ftsclient.FtsMigration
import _root_.io.circe._
import _root_.io.circe.generic.semiauto._
@ -16,12 +15,14 @@ import org.http4s.client.dsl.Http4sClientDsl
trait SolrSetup[F[_]] {
def setupSchema: List[FtsMigration[F]]
def setupSchema: List[SolrMigration[F]]
def remainingSetup: F[List[SolrMigration[F]]]
}
object SolrSetup {
private val solrEngine = Ident.unsafe("solr")
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
val dsl = new Http4sClientDsl[F] {}
@ -32,62 +33,76 @@ object SolrSetup {
val url = (Uri.unsafeFromString(cfg.url.asString) / "schema")
.withQueryParam("commitWithin", cfg.commitWithin.toString)
def setupSchema: List[FtsMigration[F]] =
def remainingSetup: F[List[SolrMigration[F]]] =
for {
current <- SolrQuery(cfg, client).findVersionDoc(versionDocId)
migs = current match {
case None => setupSchema
case Some(ver) =>
val verDoc =
VersionDoc(versionDocId, allMigrations.map(_.value.version).max)
val solrUp = SolrUpdate(cfg, client)
val remain = allMigrations.filter(v => v.value.version > ver.currentVersion)
if (remain.isEmpty) remain
else remain :+ SolrMigration.writeVersion(solrUp, verDoc)
}
} yield migs
def setupSchema: List[SolrMigration[F]] = {
val verDoc = VersionDoc(versionDocId, allMigrations.map(_.value.version).max)
val solrUp = SolrUpdate(cfg, client)
val writeVersion = SolrMigration.writeVersion(solrUp, verDoc)
val deleteAll = SolrMigration.deleteData(0, solrUp)
val indexAll = SolrMigration.indexAll[F](Int.MaxValue, "Index all data")
deleteAll :: (allMigrations
.filter(_.isSchemaChange) ::: List(indexAll, writeVersion))
}
private def allMigrations: List[SolrMigration[F]] =
List(
FtsMigration[F](
SolrMigration[F](
1,
solrEngine,
"Initialize",
setupCoreSchema.map(_ => FtsMigration.Result.workDone)
setupCoreSchema
),
FtsMigration[F](
3,
solrEngine,
SolrMigration[F](
2,
"Add folder field",
addFolderField.map(_ => FtsMigration.Result.workDone)
addFolderField
),
FtsMigration[F](
SolrMigration.indexAll(3, "Index all from database after adding folder field"),
SolrMigration[F](
4,
solrEngine,
"Index all from database",
FtsMigration.Result.indexAll.pure[F]
),
FtsMigration[F](
5,
solrEngine,
"Add content_fr field",
addContentField(Language.French).map(_ => FtsMigration.Result.workDone)
addContentField(Language.French)
),
FtsMigration[F](
SolrMigration
.indexAll(5, "Index all from database after adding french content field"),
SolrMigration[F](
6,
solrEngine,
"Index all from database",
FtsMigration.Result.indexAll.pure[F]
),
FtsMigration[F](
7,
solrEngine,
"Add content_it field",
addContentField(Language.Italian).map(_ => FtsMigration.Result.reIndexAll)
addContentField(Language.Italian)
),
FtsMigration[F](
SolrMigration.reIndexAll(7, "Re-Index after adding italian content field"),
SolrMigration[F](
8,
solrEngine,
"Add content_es field",
addContentField(Language.Spanish).map(_ => FtsMigration.Result.reIndexAll)
addContentField(Language.Spanish)
),
FtsMigration[F](
9,
solrEngine,
"Add more content fields",
addMoreContentFields.map(_ => FtsMigration.Result.reIndexAll)
),
FtsMigration[F](
SolrMigration.reIndexAll(9, "Re-Index after adding spanish content field"),
SolrMigration[F](
10,
solrEngine,
"Add more content fields",
addMoreContentFields
),
SolrMigration.reIndexAll(11, "Re-Index after adding more content fields"),
SolrMigration[F](
12,
"Add latvian content field",
addContentField(Language.Latvian).map(_ => FtsMigration.Result.reIndexAll)
)
addContentField(Language.Latvian)
),
SolrMigration.reIndexAll(13, "Re-Index after adding latvian content field")
)
def addFolderField: F[Unit] =

View File

@ -23,6 +23,8 @@ trait SolrUpdate[F[_]] {
def updateFolder(itemId: Ident, collective: Ident, folder: Option[Ident]): F[Unit]
def updateVersionDoc(doc: VersionDoc): F[Unit]
def delete(q: String, commitWithin: Option[Int]): F[Unit]
}
@ -48,6 +50,11 @@ object SolrUpdate {
client.expect[Unit](req)
}
def updateVersionDoc(doc: VersionDoc): F[Unit] = {
val req = Method.POST(List(doc).asJson, url)
client.expect[Unit](req)
}
def updateFolder(
itemId: Ident,
collective: Ident,

View File

@ -0,0 +1,11 @@
package docspell.ftssolr
final case class VersionDoc(id: String, currentVersion: Int)
object VersionDoc {
object Fields {
val id = Field("id")
val currentVersion = Field("current_version_i")
}
}

View File

@ -11,20 +11,23 @@ import docspell.joex.scheduler.Context
import docspell.store.queries.{QAttachment, QItem}
object FtsWork {
import syntax._
def apply[F[_]](f: FtsContext[F] => F[Unit]): FtsWork[F] =
Kleisli(f)
/** Runs all migration tasks unconditionally and inserts all data as last step. */
/** Runs migration tasks to re-create the index. */
def reInitializeTasks[F[_]: Monad]: FtsWork[F] =
FtsWork { ctx =>
val migrations =
ctx.fts.initialize.map(fm => fm.changeResult(_ => FtsMigration.Result.workDone))
val migrations = ctx.fts.initializeNew
NonEmptyList.fromList(migrations) match {
case Some(nel) =>
nel
.map(fm => from[F](fm.task))
.append(insertAll[F](None))
.map(fm =>
log[F](_.debug(s"Apply (${fm.engine.id}): ${fm.description}")) ++ from[F](
fm.task
)
)
.reduce(semigroup[F])
.run(ctx)
case None =>
@ -32,8 +35,6 @@ object FtsWork {
}
}
/**
*/
def from[F[_]: FlatMap: Applicative](t: F[FtsMigration.Result]): FtsWork[F] =
Kleisli.liftF(t).flatMap(transformResult[F])
@ -65,16 +66,20 @@ object FtsWork {
def log[F[_]](f: Logger[F] => F[Unit]): FtsWork[F] =
FtsWork(ctx => f(ctx.logger))
def clearIndex[F[_]](coll: Option[Ident]): FtsWork[F] =
def clearIndex[F[_]: FlatMap](coll: Option[Ident]): FtsWork[F] =
coll match {
case Some(cid) =>
FtsWork(ctx => ctx.fts.clear(ctx.logger, cid))
log[F](_.debug(s"Clearing index data for collective '${cid.id}'")) ++ FtsWork(
ctx => ctx.fts.clear(ctx.logger, cid)
)
case None =>
FtsWork(ctx => ctx.fts.clearAll(ctx.logger))
log[F](_.debug("Clearing all index data!")) ++ FtsWork(ctx =>
ctx.fts.clearAll(ctx.logger)
)
}
def insertAll[F[_]: FlatMap](coll: Option[Ident]): FtsWork[F] =
FtsWork
log[F](_.info("Inserting all data to index")) ++ FtsWork
.all(
FtsWork(ctx =>
ctx.fts.indexData(

View File

@ -1,6 +1,6 @@
package docspell.joex.fts
import cats.data.{Kleisli, OptionT}
import cats.data.Kleisli
import cats.effect._
import cats.implicits._
import cats.{Applicative, FlatMap, Traverse}
@ -8,13 +8,13 @@ import cats.{Applicative, FlatMap, Traverse}
import docspell.common._
import docspell.ftsclient._
import docspell.joex.Config
import docspell.store.records.RFtsMigration
import docspell.store.{AddResult, Store}
import docspell.store.Store
/** Migrating the index from the previous version to this version.
*
* The sql database stores the outcome of a migration task. If this
* task has already been applied, it is skipped.
* The migration asks the fulltext search client for a list of
* migration tasks to run. It may be empty when there is no migration
* required.
*/
case class Migration[F[_]](
version: Int,
@ -35,41 +35,15 @@ object Migration {
logger: Logger[F]
): Kleisli[F, List[Migration[F]], Unit] = {
val ctx = FtsContext(cfg, store, fts, logger)
Kleisli(migs => Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ()))
Kleisli { migs =>
if (migs.isEmpty) logger.info("No fulltext search migrations to run.")
else Traverse[List].sequence(migs.map(applySingle[F](ctx))).map(_ => ())
}
}
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] = {
val insertRecord: F[Option[RFtsMigration]] =
for {
rec <- RFtsMigration.create(m.version, m.engine, m.description)
res <- ctx.store.add(
RFtsMigration.insert(rec),
RFtsMigration.exists(m.version, m.engine)
)
ret <- res match {
case AddResult.Success => rec.some.pure[F]
case AddResult.EntityExists(_) => None.pure[F]
case AddResult.Failure(ex) => Effect[F].raiseError(ex)
}
} yield ret
(for {
_ <- OptionT.liftF(ctx.logger.info(s"Apply ${m.version}/${m.description}"))
rec <- OptionT(insertRecord)
res <- OptionT.liftF(m.task.run(ctx).attempt)
ret <- OptionT.liftF(res match {
case Right(()) => ().pure[F]
case Left(ex) =>
ctx.logger.error(ex)(
s"Applying index migration ${m.version}/${m.description} failed"
) *>
ctx.store.transact(RFtsMigration.deleteById(rec.id)) *> Effect[F]
.raiseError[Unit](
ex
)
})
} yield ret).getOrElseF(
ctx.logger.info(s"Migration ${m.version}/${m.description} already applied.")
)
}
def applySingle[F[_]: Effect](ctx: FtsContext[F])(m: Migration[F]): F[Unit] =
for {
_ <- ctx.logger.info(s"Apply ${m.version}/${m.description}")
_ <- m.task.run(ctx)
} yield ()
}

View File

@ -20,8 +20,10 @@ object MigrationTask {
.log[F, Unit](_.info(s"Running full-text-index migrations now"))
.flatMap(_ =>
Task(ctx =>
Migration[F](cfg, fts, ctx.store, ctx.logger)
.run(migrationTasks[F](fts))
for {
migs <- migrationTasks[F](fts)
res <- Migration[F](cfg, fts, ctx.store, ctx.logger).run(migs)
} yield res
)
)
@ -44,7 +46,7 @@ object MigrationTask {
Some(DocspellSystem.migrationTaskTracker)
)
def migrationTasks[F[_]: Effect](fts: FtsClient[F]): List[Migration[F]] =
fts.initialize.map(fm => Migration.from(fm))
def migrationTasks[F[_]: Effect](fts: FtsClient[F]): F[List[Migration[F]]] =
fts.initialize.map(_.map(fm => Migration.from(fm)))
}

View File

@ -40,12 +40,7 @@ object ReIndexTask {
FtsWork.insertAll[F](collective)
case None =>
FtsWork
.clearIndex(None)
.recoverWith(
FtsWork.log[F](_.info("Clearing data failed. Continue re-indexing."))
) ++
FtsWork.log[F](_.info("Running index initialize")) ++
FtsWork.log[F](_.info("Running re-create index")) ++
FtsWork.reInitializeTasks[F]
})
}

View File

@ -0,0 +1 @@
DROP TABLE "fts_migration";

View File

@ -0,0 +1 @@
DROP TABLE `fts_migration`;

View File

@ -0,0 +1 @@
DROP TABLE "fts_migration";

View File

@ -1,68 +0,0 @@
package docspell.store.records
import cats.data.NonEmptyList
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import doobie._
import doobie.implicits._
final case class RFtsMigration(
id: Ident,
version: Int,
ftsEngine: Ident,
description: String,
created: Timestamp
)
object RFtsMigration {
def create[F[_]: Sync](
version: Int,
ftsEngine: Ident,
description: String
): F[RFtsMigration] =
for {
newId <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RFtsMigration(newId, version, ftsEngine, description, now)
final case class Table(alias: Option[String]) extends TableDef {
val tableName = "fts_migration"
val id = Column[Ident]("id", this)
val version = Column[Int]("version", this)
val ftsEngine = Column[Ident]("fts_engine", this)
val description = Column[String]("description", this)
val created = Column[Timestamp]("created", this)
val all = NonEmptyList.of[Column[_]](id, version, ftsEngine, description, created)
}
val T = Table(None)
def as(alias: String): Table =
Table(Some(alias))
def insert(v: RFtsMigration): ConnectionIO[Int] =
DML
.insertFragment(
T,
T.all,
Seq(fr"${v.id},${v.version},${v.ftsEngine},${v.description},${v.created}")
)
.updateWithLogHandler(LogHandler.nop)
.run
def exists(vers: Int, engine: Ident): ConnectionIO[Boolean] =
run(select(count(T.id)), from(T), T.version === vers && T.ftsEngine === engine)
.query[Int]
.unique
.map(_ > 0)
def deleteById(rId: Ident): ConnectionIO[Int] =
DML.delete(T, T.id === rId)
}

View File

@ -100,7 +100,7 @@ itemsBar texts flags settings model =
defaultMenuBar : Texts -> Flags -> UiSettings -> Model -> Html Msg
defaultMenuBar texts _ settings model =
defaultMenuBar texts flags settings model =
let
btnStyle =
S.secondaryBasicButton ++ " text-sm"
@ -127,12 +127,20 @@ defaultMenuBar texts _ settings model =
, Maybe.map value searchInput
|> Maybe.withDefault (value "")
, class (String.replace "rounded" "" S.textInput)
, class "py-1 text-sm border-r-0 rounded-l"
, class "py-2 text-sm"
, if flags.config.fullTextSearchEnabled then
class " border-r-0 rounded-l"
else
class "border rounded"
]
[]
, a
[ class S.secondaryBasicButtonPlain
, class "text-sm px-4 py-2 border rounded-r"
, classList
[ ( "hidden", not flags.config.fullTextSearchEnabled )
]
, href "#"
, onClick ToggleSearchType
]

View File

@ -191,13 +191,17 @@ it is empty (the default), this call is disabled (all admin routes).
Otherwise, the POST request will submit a system task that is executed
by a joex instance eventually.
Using this endpoint, the index will be re-created. This is sometimes
necessary, for example if you upgrade SOLR or delete the core to
provide a new one (see
Using this endpoint, the entire index (including the schema) will be
re-created. This is sometimes necessary, for example if you upgrade
SOLR or delete the core to provide a new one (see
[here](https://solr.apache.org/guide/8_4/reindexing.html) for
details). Note that a collective can also re-index their data using a
similiar endpoint; but this is only deleting their data and doesn't do
a full re-index.
details). Another way is to restart docspell (while clearing the
index). If docspell detects an empty index at startup, it will submit
a task to build the index automatically.
Note that a collective can also re-index their data using a similiar
endpoint; but this is only deleting their data and doesn't do a full
re-index.
The solr index doesn't contain any new information, it can be
regenerated any time using the above REST call. Thus it doesn't need