mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-03 05:52:51 +00:00
First basic working solr search
This commit is contained in:
parent
2a0bf24088
commit
3576c45d1a
@ -20,7 +20,6 @@ object OFulltext {
|
||||
// then run a query
|
||||
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
|
||||
|
||||
|
||||
def apply[F[_]: Effect](
|
||||
itemSearch: OItemSearch[F],
|
||||
fts: FtsClient[F]
|
||||
@ -43,21 +42,21 @@ object OFulltext {
|
||||
.compile
|
||||
.toVector
|
||||
|
||||
|
||||
private def findItemsFts[A](
|
||||
q: Query,
|
||||
ftsQ: String,
|
||||
batch: Batch,
|
||||
search: (Query, Batch) => F[Vector[A]]
|
||||
): Stream[F, A] = {
|
||||
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset)
|
||||
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset, Nil)
|
||||
|
||||
val qres =
|
||||
for {
|
||||
items <-
|
||||
fts
|
||||
.searchBasic(fq)
|
||||
.map(_.item)
|
||||
.flatMap(r => Stream.emits(r.results))
|
||||
.map(_.itemId)
|
||||
.compile
|
||||
.toVector
|
||||
.map(_.toSet)
|
||||
|
@ -25,6 +25,9 @@ case class Duration(nanos: Long) {
|
||||
|
||||
def formatExact: String =
|
||||
s"$millis ms"
|
||||
|
||||
override def toString(): String =
|
||||
s"Duration(${millis}ms)"
|
||||
}
|
||||
|
||||
object Duration {
|
||||
|
@ -17,7 +17,7 @@ case class Ident(id: String) {
|
||||
!isEmpty
|
||||
|
||||
def / (next: Ident): Ident =
|
||||
new Ident(id + "/" + next.id)
|
||||
new Ident(id + "." + next.id)
|
||||
}
|
||||
|
||||
object Ident {
|
||||
|
@ -1,19 +0,0 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
|
||||
import FtsBasicResult.AttachmentMatch
|
||||
|
||||
final case class FtsBasicResult(item: Ident, attachments: NonEmptyList[AttachmentMatch]) {
|
||||
|
||||
def score: Double =
|
||||
attachments.map(_.score).toList.max
|
||||
}
|
||||
|
||||
object FtsBasicResult {
|
||||
|
||||
case class AttachmentMatch(id: Ident, score: Double)
|
||||
|
||||
}
|
@ -19,7 +19,7 @@ trait FtsClient[F[_]] {
|
||||
*/
|
||||
def initialize: F[Unit]
|
||||
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsResult]
|
||||
|
||||
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit]
|
||||
}
|
||||
|
@ -7,4 +7,4 @@ import docspell.common._
|
||||
* The query itself is a raw string. Each implementation may
|
||||
* interpret it according to the system in use.
|
||||
*/
|
||||
final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int)
|
||||
final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int, items: List[Ident])
|
||||
|
@ -0,0 +1,28 @@
|
||||
package docspell.ftsclient
|
||||
|
||||
import docspell.common._
|
||||
|
||||
import FtsResult.ItemMatch
|
||||
|
||||
final case class FtsResult(
|
||||
qtime: Duration,
|
||||
count: Int,
|
||||
maxScore: Double,
|
||||
highlight: Map[Ident, List[String]],
|
||||
results: List[ItemMatch]
|
||||
) {}
|
||||
|
||||
object FtsResult {
|
||||
|
||||
sealed trait MatchData
|
||||
case class AttachmentData(attachId: Ident) extends MatchData
|
||||
case object ItemData extends MatchData
|
||||
|
||||
case class ItemMatch(
|
||||
id: Ident,
|
||||
itemId: Ident,
|
||||
collectiveId: Ident,
|
||||
score: Double,
|
||||
data: MatchData
|
||||
)
|
||||
}
|
29
modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
Normal file
29
modules/fts-solr/src/main/scala/docspell/ftssolr/Field.scala
Normal file
@ -0,0 +1,29 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
import io.circe._
|
||||
|
||||
final class Field(val name: String) extends AnyVal {
|
||||
|
||||
override def toString(): String = s"Field($name)"
|
||||
}
|
||||
|
||||
object Field {
|
||||
|
||||
def apply(name: String): Field =
|
||||
new Field(name)
|
||||
|
||||
|
||||
val discriminator = Field("discriminator")
|
||||
val id = Field("id")
|
||||
val itemId = Field("itemId")
|
||||
val collectiveId = Field("collectiveId")
|
||||
val attachmentId = Field("attachmentId")
|
||||
val attachmentName = Field("attachmentName")
|
||||
val content = Field("content")
|
||||
val itemName = Field("itemName")
|
||||
val itemNotes = Field("itemNotes")
|
||||
|
||||
|
||||
implicit val jsonEncoder: Encoder[Field] =
|
||||
Encoder.encodeString.contramap(_.name)
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
object Fields {
|
||||
val discriminator = "discriminator"
|
||||
val id = "id"
|
||||
val itemId = "itemId"
|
||||
val collectiveId = "collectiveId"
|
||||
|
||||
object Attachment {
|
||||
val attachmentId = "attachmentId"
|
||||
val attachmentName = "attachmentName"
|
||||
val content = "content"
|
||||
}
|
||||
|
||||
object Item {
|
||||
val itemName = "itemName"
|
||||
val itemNotes = "itemNotes"
|
||||
}
|
||||
}
|
@ -1,43 +1,92 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
//import cats.implicits._
|
||||
import io.circe._
|
||||
import docspell.common._
|
||||
import docspell.ftsclient._
|
||||
import io.circe._
|
||||
import Fields.{Item, Attachment}
|
||||
|
||||
trait JsonCodec {
|
||||
|
||||
implicit def attachmentEncoder: Encoder[TextData.Attachment] =
|
||||
implicit def attachmentEncoder(implicit
|
||||
enc: Encoder[Ident]
|
||||
): Encoder[TextData.Attachment] =
|
||||
new Encoder[TextData.Attachment] {
|
||||
final def apply(td: TextData.Attachment): Json = Json.obj(
|
||||
(Fields.id, Ident.encodeIdent(td.id)),
|
||||
(Fields.itemId, Ident.encodeIdent(td.item)),
|
||||
(Fields.collectiveId, Ident.encodeIdent(td.collective)),
|
||||
(Attachment.attachmentId, Ident.encodeIdent(td.attachId)),
|
||||
(Attachment.attachmentName, Json.fromString(td.name.getOrElse(""))),
|
||||
(Attachment.content, Json.fromString(td.text.getOrElse(""))),
|
||||
(Fields.discriminator, Json.fromString("attachment"))
|
||||
)
|
||||
final def apply(td: TextData.Attachment): Json =
|
||||
Json.obj(
|
||||
(Field.id.name, enc(td.id)),
|
||||
(Field.itemId.name, enc(td.item)),
|
||||
(Field.collectiveId.name, enc(td.collective)),
|
||||
(Field.attachmentId.name, enc(td.attachId)),
|
||||
(Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))),
|
||||
(Field.content.name, Json.fromString(td.text.getOrElse(""))),
|
||||
(Field.discriminator.name, Json.fromString("attachment"))
|
||||
)
|
||||
}
|
||||
|
||||
implicit def itemEncoder: Encoder[TextData.Item] =
|
||||
implicit def itemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] =
|
||||
new Encoder[TextData.Item] {
|
||||
final def apply(td: TextData.Item): Json = Json.obj(
|
||||
(Fields.id, Ident.encodeIdent(td.id)),
|
||||
(Fields.itemId, Ident.encodeIdent(td.item)),
|
||||
(Fields.collectiveId, Ident.encodeIdent(td.collective)),
|
||||
(Item.itemName, Json.fromString(td.name.getOrElse(""))),
|
||||
(Item.itemNotes, Json.fromString(td.notes.getOrElse(""))),
|
||||
(Fields.discriminator, Json.fromString("item"))
|
||||
)
|
||||
final def apply(td: TextData.Item): Json =
|
||||
Json.obj(
|
||||
(Field.id.name, enc(td.id)),
|
||||
(Field.itemId.name, enc(td.item)),
|
||||
(Field.collectiveId.name, enc(td.collective)),
|
||||
(Field.itemName.name, Json.fromString(td.name.getOrElse(""))),
|
||||
(Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))),
|
||||
(Field.discriminator.name, Json.fromString("item"))
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
implicit def textDataEncoder(implicit
|
||||
ae: Encoder[TextData.Attachment],
|
||||
ie: Encoder[TextData.Item]
|
||||
): Encoder[TextData] =
|
||||
Encoder(_.fold(ae.apply, ie.apply))
|
||||
|
||||
implicit def ftsResultDecoder: Decoder[FtsResult] =
|
||||
new Decoder[FtsResult] {
|
||||
final def apply(c: HCursor): Decoder.Result[FtsResult] =
|
||||
for {
|
||||
qtime <- c.downField("responseHeader").get[Duration]("QTime")
|
||||
count <- c.downField("response").get[Int]("numFound")
|
||||
maxScore <- c.downField("response").get[Double]("maxScore")
|
||||
results <- c.downField("response").get[List[FtsResult.ItemMatch]]("docs")
|
||||
highligh <- c.get[Map[Ident, Map[String, List[String]]]]("highlighting")
|
||||
highline = highligh.map(kv => kv._1 -> kv._2.values.flatten.toList)
|
||||
} yield FtsResult(qtime, count, maxScore, highline, results)
|
||||
}
|
||||
|
||||
implicit def decodeItemMatch: Decoder[FtsResult.ItemMatch] =
|
||||
new Decoder[FtsResult.ItemMatch] {
|
||||
final def apply(c: HCursor): Decoder.Result[FtsResult.ItemMatch] =
|
||||
for {
|
||||
itemId <- c.get[Ident]("itemId")
|
||||
id <- c.get[Ident]("id")
|
||||
coll <- c.get[Ident]("collectiveId")
|
||||
score <- c.get[Double]("score")
|
||||
md <- decodeMatchData(c)
|
||||
} yield FtsResult.ItemMatch(id, itemId, coll, score, md)
|
||||
}
|
||||
|
||||
def decodeMatchData: Decoder[FtsResult.MatchData] =
|
||||
new Decoder[FtsResult.MatchData] {
|
||||
final def apply(c: HCursor): Decoder.Result[FtsResult.MatchData] =
|
||||
for {
|
||||
disc <- c.get[String]("discriminator")
|
||||
md <-
|
||||
if ("attachment" == disc)
|
||||
c.get[Ident]("attachmentId").map(FtsResult.AttachmentData.apply)
|
||||
else Right(FtsResult.ItemData)
|
||||
} yield md
|
||||
}
|
||||
|
||||
implicit def identKeyEncoder: KeyEncoder[Ident] =
|
||||
new KeyEncoder[Ident] {
|
||||
override def apply(ident: Ident): String = ident.id
|
||||
}
|
||||
implicit def identKeyDecoder: KeyDecoder[Ident] =
|
||||
new KeyDecoder[Ident] {
|
||||
override def apply(ident: String): Option[Ident] = Ident(ident).toOption
|
||||
}
|
||||
}
|
||||
|
||||
object JsonCodec extends JsonCodec
|
||||
|
@ -0,0 +1,56 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
import io.circe._
|
||||
import io.circe.generic.semiauto._
|
||||
import docspell.ftsclient.FtsQuery
|
||||
|
||||
final case class QueryData(
|
||||
query: String,
|
||||
filter: String,
|
||||
limit: Int,
|
||||
offset: Int,
|
||||
fields: List[Field],
|
||||
params: Map[String, String]
|
||||
) {
|
||||
|
||||
def nextPage: QueryData =
|
||||
copy(offset = offset + limit)
|
||||
|
||||
def withHighLight(fields: List[Field], pre: String, post: String): QueryData =
|
||||
copy(params =
|
||||
params ++ Map(
|
||||
"hl" -> "on",
|
||||
"hl.requireFieldMatch" -> "true",
|
||||
"hl.fl" -> fields.map(_.name).mkString(","),
|
||||
"hl.simple.pre" -> pre,
|
||||
"hl.simple.post" -> post
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
object QueryData {
|
||||
|
||||
implicit val jsonEncoder: Encoder[QueryData] =
|
||||
deriveEncoder[QueryData]
|
||||
|
||||
def apply(search: List[Field], fields: List[Field], fq: FtsQuery): QueryData = {
|
||||
val q = sanitize(fq.q)
|
||||
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
|
||||
val items = fq.items.map(_.id).mkString(" ")
|
||||
val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}""""
|
||||
val filterQ = fq.items match {
|
||||
case Nil =>
|
||||
collQ
|
||||
case _ =>
|
||||
(collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ")
|
||||
}
|
||||
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
|
||||
search,
|
||||
"**",
|
||||
"**"
|
||||
)
|
||||
}
|
||||
|
||||
private def sanitize(q: String): String =
|
||||
q.replaceAll("[\\(,\\)]+", " ")
|
||||
}
|
@ -5,32 +5,20 @@ import cats.effect._
|
||||
import cats.implicits._
|
||||
import org.http4s.client.Client
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import docspell.common._
|
||||
import docspell.ftsclient._
|
||||
import docspell.ftsclient.FtsBasicResult._
|
||||
|
||||
final class SolrFtsClient[F[_]: Effect](
|
||||
solrUpdate: SolrUpdate[F],
|
||||
solrSetup: SolrSetup[F]
|
||||
solrSetup: SolrSetup[F],
|
||||
solrQuery: SolrQuery[F]
|
||||
) extends FtsClient[F] {
|
||||
|
||||
def initialize: F[Unit] =
|
||||
solrSetup.setupSchema
|
||||
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
|
||||
Stream.emits(
|
||||
Seq(
|
||||
FtsBasicResult(
|
||||
Ident.unsafe("5J4zvCiTE2j-UEznDUsUCsA-5px6ftrSwfs-FpUWCaHh2Ei"),
|
||||
NonEmptyList.of(AttachmentMatch(Ident.unsafe("a"), 0.2))
|
||||
),
|
||||
FtsBasicResult(
|
||||
Ident.unsafe("8B8UNoC1U4y-dqnqjdFG7ue-LG5ktz9pWVt-diFemCLrLAa"),
|
||||
NonEmptyList.of(AttachmentMatch(Ident.unsafe("b"), 0.5))
|
||||
)
|
||||
)
|
||||
)
|
||||
def searchBasic(q: FtsQuery): Stream[F, FtsResult] =
|
||||
Stream.eval(solrQuery.query(q))
|
||||
|
||||
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
|
||||
(for {
|
||||
@ -53,7 +41,11 @@ object SolrFtsClient {
|
||||
httpClient: Client[F]
|
||||
): Resource[F, FtsClient[F]] =
|
||||
Resource.pure[F, FtsClient[F]](
|
||||
new SolrFtsClient(SolrUpdate(cfg, httpClient), SolrSetup(cfg, httpClient))
|
||||
new SolrFtsClient(
|
||||
SolrUpdate(cfg, httpClient),
|
||||
SolrSetup(cfg, httpClient),
|
||||
SolrQuery(cfg, httpClient)
|
||||
)
|
||||
)
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,54 @@
|
||||
package docspell.ftssolr
|
||||
|
||||
import cats.effect._
|
||||
import org.http4s._
|
||||
import org.http4s.client.Client
|
||||
import org.http4s.circe._
|
||||
import org.http4s.circe.CirceEntityDecoder._
|
||||
import org.http4s.client.dsl.Http4sClientDsl
|
||||
import _root_.io.circe.syntax._
|
||||
import org.log4s.getLogger
|
||||
|
||||
import docspell.ftsclient._
|
||||
import JsonCodec._
|
||||
|
||||
trait SolrQuery[F[_]] {
|
||||
|
||||
def query(q: QueryData): F[FtsResult]
|
||||
|
||||
def query(q: FtsQuery): F[FtsResult] = {
|
||||
val fq = QueryData(
|
||||
List(Field.content, Field.itemName, Field.itemNotes, Field.attachmentName),
|
||||
List(
|
||||
Field.id,
|
||||
Field.itemId,
|
||||
Field.collectiveId,
|
||||
Field("score"),
|
||||
Field.attachmentId,
|
||||
Field.discriminator
|
||||
),
|
||||
q
|
||||
)
|
||||
query(fq)
|
||||
}
|
||||
}
|
||||
|
||||
object SolrQuery {
|
||||
private[this] val logger = getLogger
|
||||
|
||||
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
|
||||
val dsl = new Http4sClientDsl[F] {}
|
||||
import dsl._
|
||||
|
||||
new SolrQuery[F] {
|
||||
val url = Uri.unsafeFromString(cfg.url.asString) / "query"
|
||||
|
||||
def query(q: QueryData): F[FtsResult] = {
|
||||
val req = Method.POST(q.asJson, url)
|
||||
logger.debug(s"Running query: $req : ${q.asJson}")
|
||||
client.expect[FtsResult](req)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -11,8 +11,6 @@ import _root_.io.circe.syntax._
|
||||
import _root_.io.circe._
|
||||
import _root_.io.circe.generic.semiauto._
|
||||
|
||||
import Fields.{Attachment, Item}
|
||||
|
||||
trait SolrSetup[F[_]] {
|
||||
|
||||
def setupSchema: F[Unit]
|
||||
@ -33,18 +31,18 @@ object SolrSetup {
|
||||
def setupSchema: F[Unit] = {
|
||||
val cmds0 =
|
||||
List(
|
||||
Fields.id,
|
||||
Fields.itemId,
|
||||
Fields.collectiveId,
|
||||
Fields.discriminator,
|
||||
Attachment.attachmentId
|
||||
Field.id,
|
||||
Field.itemId,
|
||||
Field.collectiveId,
|
||||
Field.discriminator,
|
||||
Field.attachmentId
|
||||
)
|
||||
.traverse(addStringField)
|
||||
val cmds1 = List(
|
||||
Attachment.attachmentName,
|
||||
Attachment.content,
|
||||
Item.itemName,
|
||||
Item.itemNotes
|
||||
Field.attachmentName,
|
||||
Field.content,
|
||||
Field.itemName,
|
||||
Field.itemNotes
|
||||
)
|
||||
.traverse(addTextField)
|
||||
|
||||
@ -57,13 +55,13 @@ object SolrSetup {
|
||||
client.expect[String](req).map(r => logger.debug(s"Response: $r"))
|
||||
}
|
||||
|
||||
private def addStringField(name: String): F[Unit] =
|
||||
run(DeleteField.command(DeleteField(name))).attempt *>
|
||||
run(AddField.command(AddField.string(name)))
|
||||
private def addStringField(field: Field): F[Unit] =
|
||||
run(DeleteField.command(DeleteField(field))).attempt *>
|
||||
run(AddField.command(AddField.string(field)))
|
||||
|
||||
private def addTextField(name: String): F[Unit] =
|
||||
run(DeleteField.command(DeleteField(name))).attempt *>
|
||||
run(AddField.command(AddField.text(name)))
|
||||
private def addTextField(field: Field): F[Unit] =
|
||||
run(DeleteField.command(DeleteField(field))).attempt *>
|
||||
run(AddField.command(AddField.text(field)))
|
||||
|
||||
}
|
||||
}
|
||||
@ -71,7 +69,7 @@ object SolrSetup {
|
||||
// Schema Commands
|
||||
|
||||
case class AddField(
|
||||
name: String,
|
||||
name: Field,
|
||||
`type`: String,
|
||||
stored: Boolean,
|
||||
indexed: Boolean,
|
||||
@ -84,14 +82,14 @@ object SolrSetup {
|
||||
def command(body: AddField): Json =
|
||||
Map("add-field" -> body.asJson).asJson
|
||||
|
||||
def string(name: String): AddField =
|
||||
AddField(name, "string", true, true, false)
|
||||
def string(field: Field): AddField =
|
||||
AddField(field, "string", true, true, false)
|
||||
|
||||
def text(name: String): AddField =
|
||||
AddField(name, "text_general", true, true, false)
|
||||
def text(field: Field): AddField =
|
||||
AddField(field, "text_general", true, true, false)
|
||||
}
|
||||
|
||||
case class DeleteField(name: String)
|
||||
case class DeleteField(name: Field)
|
||||
object DeleteField {
|
||||
implicit val encoder: Encoder[DeleteField] =
|
||||
deriveEncoder[DeleteField]
|
||||
|
Loading…
x
Reference in New Issue
Block a user