mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-05 02:49:32 +00:00
Fix solr setup by adding a text_he field
This field is used for Hebrew language. Solr doesn't support it out of the box. The new field type is just a very basic field using the standard tokenizer and lowercase filter. It is very likely not providing good results. Hebrew is really difficult and it requires at least installing plugins for solr - this is out of scope for docspell. Users can setup their solr however they like and run a re-index afterwards.
This commit is contained in:
parent
3520a2ec26
commit
637f11d0f6
@ -18,6 +18,7 @@ import org.http4s._
|
||||
import org.http4s.circe._
|
||||
import org.http4s.client.Client
|
||||
import org.http4s.client.dsl.Http4sClientDsl
|
||||
import org.log4s.getLogger
|
||||
|
||||
trait SolrSetup[F[_]] {
|
||||
|
||||
@ -29,6 +30,7 @@ trait SolrSetup[F[_]] {
|
||||
|
||||
object SolrSetup {
|
||||
private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
|
||||
private[this] val logger = getLogger
|
||||
|
||||
def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
|
||||
val dsl = new Http4sClientDsl[F] {}
|
||||
@ -117,10 +119,15 @@ object SolrSetup {
|
||||
SolrMigration.reIndexAll(15, "Re-Index after adding japanese content field"),
|
||||
SolrMigration[F](
|
||||
16,
|
||||
"Add new field type for hebrew content",
|
||||
addFieldType(AddFieldType.textHe)
|
||||
),
|
||||
SolrMigration[F](
|
||||
17,
|
||||
"Add hebrew content field",
|
||||
addContentField(Language.Hebrew)
|
||||
),
|
||||
SolrMigration.reIndexAll(17, "Re-Index after adding hebrew content field")
|
||||
SolrMigration.reIndexAll(18, "Re-Index after adding hebrew content field")
|
||||
)
|
||||
|
||||
def addFolderField: F[Unit] =
|
||||
@ -194,6 +201,15 @@ object SolrSetup {
|
||||
run(DeleteField.command(DeleteField(field))).attempt *>
|
||||
run(AddField.command(AddField.textLang(field, lang)))
|
||||
}
|
||||
|
||||
private def addFieldType(ft: AddFieldType): F[Unit] =
|
||||
run(AddFieldType.command(ft)).attempt.flatMap {
|
||||
case Right(_) => ().pure[F]
|
||||
case Left(ex) =>
|
||||
Async[F].delay(
|
||||
logger.warn(s"Adding new field type '$ft' failed: ${ex.getMessage()}")
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,4 +250,52 @@ object SolrSetup {
|
||||
def command(body: DeleteField): Json =
|
||||
Map("delete-field" -> body.asJson).asJson
|
||||
}
|
||||
|
||||
final case class AddFieldType(
|
||||
name: String,
|
||||
`class`: String,
|
||||
analyzer: AddFieldType.Analyzer
|
||||
)
|
||||
object AddFieldType {
|
||||
|
||||
val textHe = AddFieldType(
|
||||
"text_he",
|
||||
"solr.TextField",
|
||||
Analyzer(
|
||||
Tokenizer("solr.StandardTokenizerFactory", Map.empty),
|
||||
List(
|
||||
Filter("solr.LowerCaseFilterFactory", Map.empty)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
final case class Filter(`class`: String, attr: Map[String, String])
|
||||
final case class Tokenizer(`class`: String, attr: Map[String, String])
|
||||
final case class Analyzer(tokenizer: Tokenizer, filter: List[Filter])
|
||||
|
||||
object Filter {
|
||||
implicit val jsonEncoder: Encoder[Filter] =
|
||||
Encoder.encodeJson.contramap { filter =>
|
||||
val m = filter.attr.updated("class", filter.`class`)
|
||||
m.asJson
|
||||
}
|
||||
}
|
||||
object Tokenizer {
|
||||
implicit val jsonEncoder: Encoder[Tokenizer] =
|
||||
Encoder.encodeJson.contramap { tokenizer =>
|
||||
val m = tokenizer.attr.updated("class", tokenizer.`class`)
|
||||
m.asJson
|
||||
}
|
||||
}
|
||||
object Analyzer {
|
||||
implicit val jsonEncoder: Encoder[Analyzer] =
|
||||
deriveEncoder[Analyzer]
|
||||
}
|
||||
|
||||
def command(body: AddFieldType): Json =
|
||||
Map("add-field-type" -> body.asJson).asJson
|
||||
|
||||
implicit val jsonEncoder: Encoder[AddFieldType] =
|
||||
deriveEncoder[AddFieldType]
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user