mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-10-31 17:50:11 +00:00 
			
		
		
		
	First basic working solr search
This commit is contained in:
		| @@ -20,7 +20,6 @@ object OFulltext { | ||||
|   // then run a query | ||||
|   // check if supported by mariadb, postgres and h2. seems like it is supported everywhere | ||||
|  | ||||
|  | ||||
|   def apply[F[_]: Effect]( | ||||
|       itemSearch: OItemSearch[F], | ||||
|       fts: FtsClient[F] | ||||
| @@ -43,21 +42,21 @@ object OFulltext { | ||||
|           .compile | ||||
|           .toVector | ||||
|  | ||||
|  | ||||
|       private def findItemsFts[A]( | ||||
|           q: Query, | ||||
|           ftsQ: String, | ||||
|           batch: Batch, | ||||
|           search: (Query, Batch) => F[Vector[A]] | ||||
|       ): Stream[F, A] = { | ||||
|         val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset) | ||||
|         val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset, Nil) | ||||
|  | ||||
|         val qres = | ||||
|           for { | ||||
|             items <- | ||||
|               fts | ||||
|                 .searchBasic(fq) | ||||
|                 .map(_.item) | ||||
|                 .flatMap(r => Stream.emits(r.results)) | ||||
|                 .map(_.itemId) | ||||
|                 .compile | ||||
|                 .toVector | ||||
|                 .map(_.toSet) | ||||
|   | ||||
| @@ -25,6 +25,9 @@ case class Duration(nanos: Long) { | ||||
|  | ||||
|   def formatExact: String = | ||||
|     s"$millis ms" | ||||
|  | ||||
|   override def toString(): String = | ||||
|     s"Duration(${millis}ms)" | ||||
| } | ||||
|  | ||||
| object Duration { | ||||
|   | ||||
| @@ -17,7 +17,7 @@ case class Ident(id: String) { | ||||
|     !isEmpty | ||||
|  | ||||
|   def / (next: Ident): Ident = | ||||
|     new Ident(id + "/" + next.id) | ||||
|     new Ident(id + "." + next.id) | ||||
| } | ||||
|  | ||||
| object Ident { | ||||
|   | ||||
| @@ -1,19 +0,0 @@ | ||||
| package docspell.ftsclient | ||||
|  | ||||
| import cats.data.NonEmptyList | ||||
| import cats.implicits._ | ||||
| import docspell.common._ | ||||
|  | ||||
| import FtsBasicResult.AttachmentMatch | ||||
|  | ||||
| final case class FtsBasicResult(item: Ident, attachments: NonEmptyList[AttachmentMatch]) { | ||||
|  | ||||
|   def score: Double = | ||||
|     attachments.map(_.score).toList.max | ||||
| } | ||||
|  | ||||
| object FtsBasicResult { | ||||
|  | ||||
|   case class AttachmentMatch(id: Ident, score: Double) | ||||
|  | ||||
| } | ||||
| @@ -19,7 +19,7 @@ trait FtsClient[F[_]] { | ||||
|     */ | ||||
|   def initialize: F[Unit] | ||||
|  | ||||
|   def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] | ||||
|   def searchBasic(q: FtsQuery): Stream[F, FtsResult] | ||||
|  | ||||
|   def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] | ||||
| } | ||||
|   | ||||
| @@ -7,4 +7,4 @@ import docspell.common._ | ||||
|   * The query itself is a raw string. Each implementation may | ||||
|   * interpret it according to the system in use. | ||||
|   */ | ||||
| final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int) | ||||
| final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int, items: List[Ident]) | ||||
|   | ||||
| @@ -0,0 +1,28 @@ | ||||
| package docspell.ftsclient | ||||
|  | ||||
| import docspell.common._ | ||||
|  | ||||
| import FtsResult.ItemMatch | ||||
|  | ||||
| final case class FtsResult( | ||||
|     qtime: Duration, | ||||
|     count: Int, | ||||
|     maxScore: Double, | ||||
|     highlight: Map[Ident, List[String]], | ||||
|     results: List[ItemMatch] | ||||
| ) {} | ||||
|  | ||||
| object FtsResult { | ||||
|  | ||||
|   sealed trait MatchData | ||||
|   case class AttachmentData(attachId: Ident) extends MatchData | ||||
|   case object ItemData                       extends MatchData | ||||
|  | ||||
|   case class ItemMatch( | ||||
|       id: Ident, | ||||
|       itemId: Ident, | ||||
|       collectiveId: Ident, | ||||
|       score: Double, | ||||
|       data: MatchData | ||||
|   ) | ||||
| } | ||||
							
								
								
									
										29
									
								
								modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| package docspell.ftssolr | ||||
|  | ||||
| import io.circe._ | ||||
|  | ||||
| final class Field(val name: String) extends AnyVal { | ||||
|  | ||||
|   override def toString(): String = s"Field($name)" | ||||
| } | ||||
|  | ||||
| object Field { | ||||
|  | ||||
|   def apply(name: String): Field = | ||||
|     new Field(name) | ||||
|  | ||||
|  | ||||
|   val discriminator = Field("discriminator") | ||||
|   val id = Field("id") | ||||
|   val itemId = Field("itemId") | ||||
|   val collectiveId = Field("collectiveId") | ||||
|   val attachmentId = Field("attachmentId") | ||||
|   val attachmentName = Field("attachmentName") | ||||
|   val content = Field("content") | ||||
|   val itemName = Field("itemName") | ||||
|   val itemNotes = Field("itemNotes") | ||||
|  | ||||
|  | ||||
|   implicit val jsonEncoder: Encoder[Field] = | ||||
|     Encoder.encodeString.contramap(_.name) | ||||
| } | ||||
| @@ -1,19 +0,0 @@ | ||||
| package docspell.ftssolr | ||||
|  | ||||
| object Fields { | ||||
|   val discriminator = "discriminator" | ||||
|   val id = "id" | ||||
|   val itemId = "itemId" | ||||
|   val collectiveId = "collectiveId" | ||||
|  | ||||
|   object Attachment { | ||||
|     val attachmentId = "attachmentId" | ||||
|     val attachmentName = "attachmentName" | ||||
|     val content = "content" | ||||
|   } | ||||
|  | ||||
|   object Item { | ||||
|     val itemName = "itemName" | ||||
|     val itemNotes = "itemNotes" | ||||
|   } | ||||
| } | ||||
| @@ -1,43 +1,92 @@ | ||||
| package docspell.ftssolr | ||||
|  | ||||
| //import cats.implicits._ | ||||
| import io.circe._ | ||||
| import docspell.common._ | ||||
| import docspell.ftsclient._ | ||||
| import io.circe._ | ||||
| import Fields.{Item, Attachment} | ||||
|  | ||||
| trait JsonCodec { | ||||
|  | ||||
|   implicit def attachmentEncoder: Encoder[TextData.Attachment] = | ||||
|   implicit def attachmentEncoder(implicit | ||||
|       enc: Encoder[Ident] | ||||
|   ): Encoder[TextData.Attachment] = | ||||
|     new Encoder[TextData.Attachment] { | ||||
|       final def apply(td: TextData.Attachment): Json = Json.obj( | ||||
|         (Fields.id, Ident.encodeIdent(td.id)), | ||||
|         (Fields.itemId, Ident.encodeIdent(td.item)), | ||||
|         (Fields.collectiveId, Ident.encodeIdent(td.collective)), | ||||
|         (Attachment.attachmentId, Ident.encodeIdent(td.attachId)), | ||||
|         (Attachment.attachmentName, Json.fromString(td.name.getOrElse(""))), | ||||
|         (Attachment.content, Json.fromString(td.text.getOrElse(""))), | ||||
|         (Fields.discriminator, Json.fromString("attachment")) | ||||
|       ) | ||||
|       final def apply(td: TextData.Attachment): Json = | ||||
|         Json.obj( | ||||
|           (Field.id.name, enc(td.id)), | ||||
|           (Field.itemId.name, enc(td.item)), | ||||
|           (Field.collectiveId.name, enc(td.collective)), | ||||
|           (Field.attachmentId.name, enc(td.attachId)), | ||||
|           (Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))), | ||||
|           (Field.content.name, Json.fromString(td.text.getOrElse(""))), | ||||
|           (Field.discriminator.name, Json.fromString("attachment")) | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|   implicit def itemEncoder: Encoder[TextData.Item] = | ||||
|   implicit def itemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] = | ||||
|     new Encoder[TextData.Item] { | ||||
|       final def apply(td: TextData.Item): Json = Json.obj( | ||||
|         (Fields.id, Ident.encodeIdent(td.id)), | ||||
|         (Fields.itemId, Ident.encodeIdent(td.item)), | ||||
|         (Fields.collectiveId, Ident.encodeIdent(td.collective)), | ||||
|         (Item.itemName, Json.fromString(td.name.getOrElse(""))), | ||||
|         (Item.itemNotes, Json.fromString(td.notes.getOrElse(""))), | ||||
|         (Fields.discriminator, Json.fromString("item")) | ||||
|       ) | ||||
|       final def apply(td: TextData.Item): Json = | ||||
|         Json.obj( | ||||
|           (Field.id.name, enc(td.id)), | ||||
|           (Field.itemId.name, enc(td.item)), | ||||
|           (Field.collectiveId.name, enc(td.collective)), | ||||
|           (Field.itemName.name, Json.fromString(td.name.getOrElse(""))), | ||||
|           (Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))), | ||||
|           (Field.discriminator.name, Json.fromString("item")) | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|  | ||||
|   implicit def textDataEncoder(implicit | ||||
|       ae: Encoder[TextData.Attachment], | ||||
|       ie: Encoder[TextData.Item] | ||||
|   ): Encoder[TextData] = | ||||
|     Encoder(_.fold(ae.apply, ie.apply)) | ||||
|  | ||||
|   implicit def ftsResultDecoder: Decoder[FtsResult] = | ||||
|     new Decoder[FtsResult] { | ||||
|       final def apply(c: HCursor): Decoder.Result[FtsResult] = | ||||
|         for { | ||||
|           qtime    <- c.downField("responseHeader").get[Duration]("QTime") | ||||
|           count    <- c.downField("response").get[Int]("numFound") | ||||
|           maxScore <- c.downField("response").get[Double]("maxScore") | ||||
|           results  <- c.downField("response").get[List[FtsResult.ItemMatch]]("docs") | ||||
|           highligh <- c.get[Map[Ident, Map[String, List[String]]]]("highlighting") | ||||
|           highline = highligh.map(kv => kv._1 -> kv._2.values.flatten.toList) | ||||
|         } yield FtsResult(qtime, count, maxScore, highline, results) | ||||
|     } | ||||
|  | ||||
|   implicit def decodeItemMatch: Decoder[FtsResult.ItemMatch] = | ||||
|     new Decoder[FtsResult.ItemMatch] { | ||||
|       final def apply(c: HCursor): Decoder.Result[FtsResult.ItemMatch] = | ||||
|         for { | ||||
|           itemId <- c.get[Ident]("itemId") | ||||
|           id     <- c.get[Ident]("id") | ||||
|           coll   <- c.get[Ident]("collectiveId") | ||||
|           score  <- c.get[Double]("score") | ||||
|           md     <- decodeMatchData(c) | ||||
|         } yield FtsResult.ItemMatch(id, itemId, coll, score, md) | ||||
|     } | ||||
|  | ||||
|   def decodeMatchData: Decoder[FtsResult.MatchData] = | ||||
|     new Decoder[FtsResult.MatchData] { | ||||
|       final def apply(c: HCursor): Decoder.Result[FtsResult.MatchData] = | ||||
|         for { | ||||
|           disc <- c.get[String]("discriminator") | ||||
|           md <- | ||||
|             if ("attachment" == disc) | ||||
|               c.get[Ident]("attachmentId").map(FtsResult.AttachmentData.apply) | ||||
|             else Right(FtsResult.ItemData) | ||||
|         } yield md | ||||
|     } | ||||
|  | ||||
|   implicit def identKeyEncoder: KeyEncoder[Ident] = | ||||
|     new KeyEncoder[Ident] { | ||||
|       override def apply(ident: Ident): String = ident.id | ||||
|     } | ||||
|   implicit def identKeyDecoder: KeyDecoder[Ident] = | ||||
|     new KeyDecoder[Ident] { | ||||
|       override def apply(ident: String): Option[Ident] = Ident(ident).toOption | ||||
|     } | ||||
| } | ||||
|  | ||||
| object JsonCodec extends JsonCodec | ||||
|   | ||||
| @@ -0,0 +1,56 @@ | ||||
| package docspell.ftssolr | ||||
|  | ||||
| import io.circe._ | ||||
| import io.circe.generic.semiauto._ | ||||
| import docspell.ftsclient.FtsQuery | ||||
|  | ||||
| final case class QueryData( | ||||
|     query: String, | ||||
|     filter: String, | ||||
|     limit: Int, | ||||
|     offset: Int, | ||||
|     fields: List[Field], | ||||
|     params: Map[String, String] | ||||
| ) { | ||||
|  | ||||
|   def nextPage: QueryData = | ||||
|     copy(offset = offset + limit) | ||||
|  | ||||
|   def withHighLight(fields: List[Field], pre: String, post: String): QueryData = | ||||
|     copy(params = | ||||
|       params ++ Map( | ||||
|         "hl"                   -> "on", | ||||
|         "hl.requireFieldMatch" -> "true", | ||||
|         "hl.fl"                -> fields.map(_.name).mkString(","), | ||||
|         "hl.simple.pre"        -> pre, | ||||
|         "hl.simple.post"       -> post | ||||
|       ) | ||||
|     ) | ||||
| } | ||||
|  | ||||
| object QueryData { | ||||
|  | ||||
|   implicit val jsonEncoder: Encoder[QueryData] = | ||||
|     deriveEncoder[QueryData] | ||||
|  | ||||
|   def apply(search: List[Field], fields: List[Field], fq: FtsQuery): QueryData = { | ||||
|     val q     = sanitize(fq.q) | ||||
|     val extQ  = search.map(f => s"${f.name}:($q)").mkString(" OR ") | ||||
|     val items = fq.items.map(_.id).mkString(" ") | ||||
|     val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}"""" | ||||
|     val filterQ = fq.items match { | ||||
|       case Nil => | ||||
|         collQ | ||||
|       case _ => | ||||
|         (collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ") | ||||
|     } | ||||
|     QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight( | ||||
|       search, | ||||
|       "**", | ||||
|       "**" | ||||
|     ) | ||||
|   } | ||||
|  | ||||
|   private def sanitize(q: String): String = | ||||
|     q.replaceAll("[\\(,\\)]+", " ") | ||||
| } | ||||
| @@ -5,32 +5,20 @@ import cats.effect._ | ||||
| import cats.implicits._ | ||||
| import org.http4s.client.Client | ||||
|  | ||||
| import cats.data.NonEmptyList | ||||
| import docspell.common._ | ||||
| import docspell.ftsclient._ | ||||
| import docspell.ftsclient.FtsBasicResult._ | ||||
|  | ||||
| final class SolrFtsClient[F[_]: Effect]( | ||||
|     solrUpdate: SolrUpdate[F], | ||||
|     solrSetup: SolrSetup[F] | ||||
|     solrSetup: SolrSetup[F], | ||||
|     solrQuery: SolrQuery[F] | ||||
| ) extends FtsClient[F] { | ||||
|  | ||||
|   def initialize: F[Unit] = | ||||
|     solrSetup.setupSchema | ||||
|  | ||||
|   def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] = | ||||
|     Stream.emits( | ||||
|       Seq( | ||||
|         FtsBasicResult( | ||||
|           Ident.unsafe("5J4zvCiTE2j-UEznDUsUCsA-5px6ftrSwfs-FpUWCaHh2Ei"), | ||||
|           NonEmptyList.of(AttachmentMatch(Ident.unsafe("a"), 0.2)) | ||||
|         ), | ||||
|         FtsBasicResult( | ||||
|           Ident.unsafe("8B8UNoC1U4y-dqnqjdFG7ue-LG5ktz9pWVt-diFemCLrLAa"), | ||||
|           NonEmptyList.of(AttachmentMatch(Ident.unsafe("b"), 0.5)) | ||||
|         ) | ||||
|       ) | ||||
|     ) | ||||
|   def searchBasic(q: FtsQuery): Stream[F, FtsResult] = | ||||
|     Stream.eval(solrQuery.query(q)) | ||||
|  | ||||
|   def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = | ||||
|     (for { | ||||
| @@ -53,7 +41,11 @@ object SolrFtsClient { | ||||
|       httpClient: Client[F] | ||||
|   ): Resource[F, FtsClient[F]] = | ||||
|     Resource.pure[F, FtsClient[F]]( | ||||
|       new SolrFtsClient(SolrUpdate(cfg, httpClient), SolrSetup(cfg, httpClient)) | ||||
|       new SolrFtsClient( | ||||
|         SolrUpdate(cfg, httpClient), | ||||
|         SolrSetup(cfg, httpClient), | ||||
|         SolrQuery(cfg, httpClient) | ||||
|       ) | ||||
|     ) | ||||
|  | ||||
| } | ||||
|   | ||||
| @@ -0,0 +1,54 @@ | ||||
| package docspell.ftssolr | ||||
|  | ||||
| import cats.effect._ | ||||
| import org.http4s._ | ||||
| import org.http4s.client.Client | ||||
| import org.http4s.circe._ | ||||
| import org.http4s.circe.CirceEntityDecoder._ | ||||
| import org.http4s.client.dsl.Http4sClientDsl | ||||
| import _root_.io.circe.syntax._ | ||||
| import org.log4s.getLogger | ||||
|  | ||||
| import docspell.ftsclient._ | ||||
| import JsonCodec._ | ||||
|  | ||||
| trait SolrQuery[F[_]] { | ||||
|  | ||||
|   def query(q: QueryData): F[FtsResult] | ||||
|  | ||||
|   def query(q: FtsQuery): F[FtsResult] = { | ||||
|     val fq = QueryData( | ||||
|       List(Field.content, Field.itemName, Field.itemNotes, Field.attachmentName), | ||||
|       List( | ||||
|         Field.id, | ||||
|         Field.itemId, | ||||
|         Field.collectiveId, | ||||
|         Field("score"), | ||||
|         Field.attachmentId, | ||||
|         Field.discriminator | ||||
|       ), | ||||
|       q | ||||
|     ) | ||||
|     query(fq) | ||||
|   } | ||||
| } | ||||
|  | ||||
| object SolrQuery { | ||||
|   private[this] val logger = getLogger | ||||
|  | ||||
|   def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = { | ||||
|     val dsl = new Http4sClientDsl[F] {} | ||||
|     import dsl._ | ||||
|  | ||||
|     new SolrQuery[F] { | ||||
|       val url = Uri.unsafeFromString(cfg.url.asString) / "query" | ||||
|  | ||||
|       def query(q: QueryData): F[FtsResult] = { | ||||
|         val req = Method.POST(q.asJson, url) | ||||
|         logger.debug(s"Running query: $req : ${q.asJson}") | ||||
|         client.expect[FtsResult](req) | ||||
|       } | ||||
|  | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @@ -11,8 +11,6 @@ import _root_.io.circe.syntax._ | ||||
| import _root_.io.circe._ | ||||
| import _root_.io.circe.generic.semiauto._ | ||||
|  | ||||
| import Fields.{Attachment, Item} | ||||
|  | ||||
| trait SolrSetup[F[_]] { | ||||
|  | ||||
|   def setupSchema: F[Unit] | ||||
| @@ -33,18 +31,18 @@ object SolrSetup { | ||||
|       def setupSchema: F[Unit] = { | ||||
|         val cmds0 = | ||||
|           List( | ||||
|             Fields.id, | ||||
|             Fields.itemId, | ||||
|             Fields.collectiveId, | ||||
|             Fields.discriminator, | ||||
|             Attachment.attachmentId | ||||
|             Field.id, | ||||
|             Field.itemId, | ||||
|             Field.collectiveId, | ||||
|             Field.discriminator, | ||||
|             Field.attachmentId | ||||
|           ) | ||||
|             .traverse(addStringField) | ||||
|         val cmds1 = List( | ||||
|           Attachment.attachmentName, | ||||
|           Attachment.content, | ||||
|           Item.itemName, | ||||
|           Item.itemNotes | ||||
|           Field.attachmentName, | ||||
|           Field.content, | ||||
|           Field.itemName, | ||||
|           Field.itemNotes | ||||
|         ) | ||||
|           .traverse(addTextField) | ||||
|  | ||||
| @@ -57,13 +55,13 @@ object SolrSetup { | ||||
|         client.expect[String](req).map(r => logger.debug(s"Response: $r")) | ||||
|       } | ||||
|  | ||||
|       private def addStringField(name: String): F[Unit] = | ||||
|         run(DeleteField.command(DeleteField(name))).attempt *> | ||||
|           run(AddField.command(AddField.string(name))) | ||||
|       private def addStringField(field: Field): F[Unit] = | ||||
|         run(DeleteField.command(DeleteField(field))).attempt *> | ||||
|           run(AddField.command(AddField.string(field))) | ||||
|  | ||||
|       private def addTextField(name: String): F[Unit] = | ||||
|         run(DeleteField.command(DeleteField(name))).attempt *> | ||||
|           run(AddField.command(AddField.text(name))) | ||||
|       private def addTextField(field: Field): F[Unit] = | ||||
|         run(DeleteField.command(DeleteField(field))).attempt *> | ||||
|           run(AddField.command(AddField.text(field))) | ||||
|  | ||||
|     } | ||||
|   } | ||||
| @@ -71,7 +69,7 @@ object SolrSetup { | ||||
|   // Schema Commands | ||||
|  | ||||
|   case class AddField( | ||||
|       name: String, | ||||
|       name: Field, | ||||
|       `type`: String, | ||||
|       stored: Boolean, | ||||
|       indexed: Boolean, | ||||
| @@ -84,14 +82,14 @@ object SolrSetup { | ||||
|     def command(body: AddField): Json = | ||||
|       Map("add-field" -> body.asJson).asJson | ||||
|  | ||||
|     def string(name: String): AddField = | ||||
|       AddField(name, "string", true, true, false) | ||||
|     def string(field: Field): AddField = | ||||
|       AddField(field, "string", true, true, false) | ||||
|  | ||||
|     def text(name: String): AddField = | ||||
|       AddField(name, "text_general", true, true, false) | ||||
|     def text(field: Field): AddField = | ||||
|       AddField(field, "text_general", true, true, false) | ||||
|   } | ||||
|  | ||||
|   case class DeleteField(name: String) | ||||
|   case class DeleteField(name: Field) | ||||
|   object DeleteField { | ||||
|     implicit val encoder: Encoder[DeleteField] = | ||||
|       deriveEncoder[DeleteField] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user