From 637f11d0f667204416128f7e741b9a7ffa4be8bb Mon Sep 17 00:00:00 2001
From: eikek <eike.kettner@posteo.de>
Date: Sat, 28 Aug 2021 00:10:36 +0200
Subject: [PATCH] Fix solr setup by adding a text_he field

This field is used for Hebrew language. Solr doesn't support it out of
the box. The new field type is just a very basic field using the
standard tokenizer and lowercase filter. It is very likely not
providing good results. Hebrew is really difficult and it requires at
least installing plugins for solr - this is out of scope for docspell.
Users can setup their solr however they like and run a re-index
afterwards.
---
 .../scala/docspell/ftssolr/SolrSetup.scala    | 66 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
index 8ebd323b..02fe1f15 100644
--- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
+++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrSetup.scala
@@ -18,6 +18,7 @@ import org.http4s._
 import org.http4s.circe._
 import org.http4s.client.Client
 import org.http4s.client.dsl.Http4sClientDsl
+import org.log4s.getLogger
 
 trait SolrSetup[F[_]] {
 
@@ -29,6 +30,7 @@ trait SolrSetup[F[_]] {
 
 object SolrSetup {
   private val versionDocId = "6d8f09f4-8d7e-4bc9-98b8-7c89223b36dd"
+  private[this] val logger = getLogger
 
   def apply[F[_]: Async](cfg: SolrConfig, client: Client[F]): SolrSetup[F] = {
     val dsl = new Http4sClientDsl[F] {}
@@ -117,10 +119,15 @@ object SolrSetup {
           SolrMigration.reIndexAll(15, "Re-Index after adding japanese content field"),
           SolrMigration[F](
             16,
+            "Add new field type for hebrew content",
+            addFieldType(AddFieldType.textHe)
+          ),
+          SolrMigration[F](
+            17,
             "Add hebrew content field",
             addContentField(Language.Hebrew)
           ),
-          SolrMigration.reIndexAll(17, "Re-Index after adding hebrew content field")
+          SolrMigration.reIndexAll(18, "Re-Index after adding hebrew content field")
         )
 
       def addFolderField: F[Unit] =
@@ -194,6 +201,15 @@ object SolrSetup {
             run(DeleteField.command(DeleteField(field))).attempt *>
               run(AddField.command(AddField.textLang(field, lang)))
         }
+
+      private def addFieldType(ft: AddFieldType): F[Unit] =
+        run(AddFieldType.command(ft)).attempt.flatMap {
+          case Right(_) => ().pure[F]
+          case Left(ex) =>
+            Async[F].delay(
+              logger.warn(s"Adding new field type '$ft' failed: ${ex.getMessage()}")
+            )
+        }
     }
   }
 
@@ -234,4 +250,52 @@ object SolrSetup {
     def command(body: DeleteField): Json =
       Map("delete-field" -> body.asJson).asJson
   }
+
+  final case class AddFieldType(
+      name: String,
+      `class`: String,
+      analyzer: AddFieldType.Analyzer
+  )
+  object AddFieldType {
+
+    val textHe = AddFieldType(
+      "text_he",
+      "solr.TextField",
+      Analyzer(
+        Tokenizer("solr.StandardTokenizerFactory", Map.empty),
+        List(
+          Filter("solr.LowerCaseFilterFactory", Map.empty)
+        )
+      )
+    )
+
+    final case class Filter(`class`: String, attr: Map[String, String])
+    final case class Tokenizer(`class`: String, attr: Map[String, String])
+    final case class Analyzer(tokenizer: Tokenizer, filter: List[Filter])
+
+    object Filter {
+      implicit val jsonEncoder: Encoder[Filter] =
+        Encoder.encodeJson.contramap { filter =>
+          val m = filter.attr.updated("class", filter.`class`)
+          m.asJson
+        }
+    }
+    object Tokenizer {
+      implicit val jsonEncoder: Encoder[Tokenizer] =
+        Encoder.encodeJson.contramap { tokenizer =>
+          val m = tokenizer.attr.updated("class", tokenizer.`class`)
+          m.asJson
+        }
+    }
+    object Analyzer {
+      implicit val jsonEncoder: Encoder[Analyzer] =
+        deriveEncoder[Analyzer]
+    }
+
+    def command(body: AddFieldType): Json =
+      Map("add-field-type" -> body.asJson).asJson
+
+    implicit val jsonEncoder: Encoder[AddFieldType] =
+      deriveEncoder[AddFieldType]
+  }
 }