First basic working solr search

This commit is contained in:
Eike Kettner 2020-06-20 01:57:24 +02:00
parent 2a0bf24088
commit 3576c45d1a
14 changed files with 277 additions and 107 deletions

View File

@ -20,7 +20,6 @@ object OFulltext {
// then run a query
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
def apply[F[_]: Effect](
itemSearch: OItemSearch[F],
fts: FtsClient[F]
@ -43,21 +42,21 @@ object OFulltext {
.compile
.toVector
private def findItemsFts[A](
q: Query,
ftsQ: String,
batch: Batch,
search: (Query, Batch) => F[Vector[A]]
): Stream[F, A] = {
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset)
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset, Nil)
val qres =
for {
items <-
fts
.searchBasic(fq)
.map(_.item)
.flatMap(r => Stream.emits(r.results))
.map(_.itemId)
.compile
.toVector
.map(_.toSet)

View File

@ -25,6 +25,9 @@ case class Duration(nanos: Long) {
def formatExact: String =
s"$millis ms"
override def toString(): String =
s"Duration(${millis}ms)"
}
object Duration {

View File

@ -17,7 +17,7 @@ case class Ident(id: String) {
!isEmpty
def / (next: Ident): Ident =
new Ident(id + "/" + next.id)
new Ident(id + "." + next.id)
}
object Ident {

View File

@ -1,19 +0,0 @@
package docspell.ftsclient
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common._
import FtsBasicResult.AttachmentMatch
final case class FtsBasicResult(item: Ident, attachments: NonEmptyList[AttachmentMatch]) {
def score: Double =
attachments.map(_.score).toList.max
}
object FtsBasicResult {
case class AttachmentMatch(id: Ident, score: Double)
}

View File

@ -19,7 +19,7 @@ trait FtsClient[F[_]] {
*/
def initialize: F[Unit]
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
def searchBasic(q: FtsQuery): Stream[F, FtsResult]
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit]
}

View File

@ -7,4 +7,4 @@ import docspell.common._
* The query itself is a raw string. Each implementation may
* interpret it according to the system in use.
*/
final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int)
final case class FtsQuery(q: String, collective: Ident, limit: Int, offset: Int, items: List[Ident])

View File

@ -0,0 +1,28 @@
package docspell.ftsclient
import docspell.common._
import FtsResult.ItemMatch
final case class FtsResult(
qtime: Duration,
count: Int,
maxScore: Double,
highlight: Map[Ident, List[String]],
results: List[ItemMatch]
) {}
object FtsResult {
sealed trait MatchData
case class AttachmentData(attachId: Ident) extends MatchData
case object ItemData extends MatchData
case class ItemMatch(
id: Ident,
itemId: Ident,
collectiveId: Ident,
score: Double,
data: MatchData
)
}

View File

@ -0,0 +1,29 @@
package docspell.ftssolr
import io.circe._
final class Field(val name: String) extends AnyVal {
override def toString(): String = s"Field($name)"
}
object Field {
def apply(name: String): Field =
new Field(name)
val discriminator = Field("discriminator")
val id = Field("id")
val itemId = Field("itemId")
val collectiveId = Field("collectiveId")
val attachmentId = Field("attachmentId")
val attachmentName = Field("attachmentName")
val content = Field("content")
val itemName = Field("itemName")
val itemNotes = Field("itemNotes")
implicit val jsonEncoder: Encoder[Field] =
Encoder.encodeString.contramap(_.name)
}

View File

@ -1,19 +0,0 @@
package docspell.ftssolr
object Fields {
val discriminator = "discriminator"
val id = "id"
val itemId = "itemId"
val collectiveId = "collectiveId"
object Attachment {
val attachmentId = "attachmentId"
val attachmentName = "attachmentName"
val content = "content"
}
object Item {
val itemName = "itemName"
val itemNotes = "itemNotes"
}
}

View File

@ -1,43 +1,92 @@
package docspell.ftssolr
//import cats.implicits._
import io.circe._
import docspell.common._
import docspell.ftsclient._
import io.circe._
import Fields.{Item, Attachment}
trait JsonCodec {
implicit def attachmentEncoder: Encoder[TextData.Attachment] =
implicit def attachmentEncoder(implicit
enc: Encoder[Ident]
): Encoder[TextData.Attachment] =
new Encoder[TextData.Attachment] {
final def apply(td: TextData.Attachment): Json = Json.obj(
(Fields.id, Ident.encodeIdent(td.id)),
(Fields.itemId, Ident.encodeIdent(td.item)),
(Fields.collectiveId, Ident.encodeIdent(td.collective)),
(Attachment.attachmentId, Ident.encodeIdent(td.attachId)),
(Attachment.attachmentName, Json.fromString(td.name.getOrElse(""))),
(Attachment.content, Json.fromString(td.text.getOrElse(""))),
(Fields.discriminator, Json.fromString("attachment"))
)
final def apply(td: TextData.Attachment): Json =
Json.obj(
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.attachmentId.name, enc(td.attachId)),
(Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))),
(Field.content.name, Json.fromString(td.text.getOrElse(""))),
(Field.discriminator.name, Json.fromString("attachment"))
)
}
implicit def itemEncoder: Encoder[TextData.Item] =
implicit def itemEncoder(implicit enc: Encoder[Ident]): Encoder[TextData.Item] =
new Encoder[TextData.Item] {
final def apply(td: TextData.Item): Json = Json.obj(
(Fields.id, Ident.encodeIdent(td.id)),
(Fields.itemId, Ident.encodeIdent(td.item)),
(Fields.collectiveId, Ident.encodeIdent(td.collective)),
(Item.itemName, Json.fromString(td.name.getOrElse(""))),
(Item.itemNotes, Json.fromString(td.notes.getOrElse(""))),
(Fields.discriminator, Json.fromString("item"))
)
final def apply(td: TextData.Item): Json =
Json.obj(
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.itemName.name, Json.fromString(td.name.getOrElse(""))),
(Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))),
(Field.discriminator.name, Json.fromString("item"))
)
}
implicit def textDataEncoder(implicit
ae: Encoder[TextData.Attachment],
ie: Encoder[TextData.Item]
): Encoder[TextData] =
Encoder(_.fold(ae.apply, ie.apply))
implicit def ftsResultDecoder: Decoder[FtsResult] =
new Decoder[FtsResult] {
final def apply(c: HCursor): Decoder.Result[FtsResult] =
for {
qtime <- c.downField("responseHeader").get[Duration]("QTime")
count <- c.downField("response").get[Int]("numFound")
maxScore <- c.downField("response").get[Double]("maxScore")
results <- c.downField("response").get[List[FtsResult.ItemMatch]]("docs")
highligh <- c.get[Map[Ident, Map[String, List[String]]]]("highlighting")
highline = highligh.map(kv => kv._1 -> kv._2.values.flatten.toList)
} yield FtsResult(qtime, count, maxScore, highline, results)
}
implicit def decodeItemMatch: Decoder[FtsResult.ItemMatch] =
new Decoder[FtsResult.ItemMatch] {
final def apply(c: HCursor): Decoder.Result[FtsResult.ItemMatch] =
for {
itemId <- c.get[Ident]("itemId")
id <- c.get[Ident]("id")
coll <- c.get[Ident]("collectiveId")
score <- c.get[Double]("score")
md <- decodeMatchData(c)
} yield FtsResult.ItemMatch(id, itemId, coll, score, md)
}
def decodeMatchData: Decoder[FtsResult.MatchData] =
new Decoder[FtsResult.MatchData] {
final def apply(c: HCursor): Decoder.Result[FtsResult.MatchData] =
for {
disc <- c.get[String]("discriminator")
md <-
if ("attachment" == disc)
c.get[Ident]("attachmentId").map(FtsResult.AttachmentData.apply)
else Right(FtsResult.ItemData)
} yield md
}
implicit def identKeyEncoder: KeyEncoder[Ident] =
new KeyEncoder[Ident] {
override def apply(ident: Ident): String = ident.id
}
implicit def identKeyDecoder: KeyDecoder[Ident] =
new KeyDecoder[Ident] {
override def apply(ident: String): Option[Ident] = Ident(ident).toOption
}
}
object JsonCodec extends JsonCodec

View File

@ -0,0 +1,56 @@
package docspell.ftssolr
import io.circe._
import io.circe.generic.semiauto._
import docspell.ftsclient.FtsQuery
final case class QueryData(
query: String,
filter: String,
limit: Int,
offset: Int,
fields: List[Field],
params: Map[String, String]
) {
def nextPage: QueryData =
copy(offset = offset + limit)
def withHighLight(fields: List[Field], pre: String, post: String): QueryData =
copy(params =
params ++ Map(
"hl" -> "on",
"hl.requireFieldMatch" -> "true",
"hl.fl" -> fields.map(_.name).mkString(","),
"hl.simple.pre" -> pre,
"hl.simple.post" -> post
)
)
}
object QueryData {
implicit val jsonEncoder: Encoder[QueryData] =
deriveEncoder[QueryData]
def apply(search: List[Field], fields: List[Field], fq: FtsQuery): QueryData = {
val q = sanitize(fq.q)
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
val items = fq.items.map(_.id).mkString(" ")
val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}""""
val filterQ = fq.items match {
case Nil =>
collQ
case _ =>
(collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ")
}
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
search,
"**",
"**"
)
}
private def sanitize(q: String): String =
q.replaceAll("[\\(,\\)]+", " ")
}

View File

@ -5,32 +5,20 @@ import cats.effect._
import cats.implicits._
import org.http4s.client.Client
import cats.data.NonEmptyList
import docspell.common._
import docspell.ftsclient._
import docspell.ftsclient.FtsBasicResult._
final class SolrFtsClient[F[_]: Effect](
solrUpdate: SolrUpdate[F],
solrSetup: SolrSetup[F]
solrSetup: SolrSetup[F],
solrQuery: SolrQuery[F]
) extends FtsClient[F] {
def initialize: F[Unit] =
solrSetup.setupSchema
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
Stream.emits(
Seq(
FtsBasicResult(
Ident.unsafe("5J4zvCiTE2j-UEznDUsUCsA-5px6ftrSwfs-FpUWCaHh2Ei"),
NonEmptyList.of(AttachmentMatch(Ident.unsafe("a"), 0.2))
),
FtsBasicResult(
Ident.unsafe("8B8UNoC1U4y-dqnqjdFG7ue-LG5ktz9pWVt-diFemCLrLAa"),
NonEmptyList.of(AttachmentMatch(Ident.unsafe("b"), 0.5))
)
)
)
def searchBasic(q: FtsQuery): Stream[F, FtsResult] =
Stream.eval(solrQuery.query(q))
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
(for {
@ -53,7 +41,11 @@ object SolrFtsClient {
httpClient: Client[F]
): Resource[F, FtsClient[F]] =
Resource.pure[F, FtsClient[F]](
new SolrFtsClient(SolrUpdate(cfg, httpClient), SolrSetup(cfg, httpClient))
new SolrFtsClient(
SolrUpdate(cfg, httpClient),
SolrSetup(cfg, httpClient),
SolrQuery(cfg, httpClient)
)
)
}

View File

@ -0,0 +1,54 @@
package docspell.ftssolr
import cats.effect._
import org.http4s._
import org.http4s.client.Client
import org.http4s.circe._
import org.http4s.circe.CirceEntityDecoder._
import org.http4s.client.dsl.Http4sClientDsl
import _root_.io.circe.syntax._
import org.log4s.getLogger
import docspell.ftsclient._
import JsonCodec._
trait SolrQuery[F[_]] {
def query(q: QueryData): F[FtsResult]
def query(q: FtsQuery): F[FtsResult] = {
val fq = QueryData(
List(Field.content, Field.itemName, Field.itemNotes, Field.attachmentName),
List(
Field.id,
Field.itemId,
Field.collectiveId,
Field("score"),
Field.attachmentId,
Field.discriminator
),
q
)
query(fq)
}
}
object SolrQuery {
private[this] val logger = getLogger
def apply[F[_]: ConcurrentEffect](cfg: SolrConfig, client: Client[F]): SolrQuery[F] = {
val dsl = new Http4sClientDsl[F] {}
import dsl._
new SolrQuery[F] {
val url = Uri.unsafeFromString(cfg.url.asString) / "query"
def query(q: QueryData): F[FtsResult] = {
val req = Method.POST(q.asJson, url)
logger.debug(s"Running query: $req : ${q.asJson}")
client.expect[FtsResult](req)
}
}
}
}

View File

@ -11,8 +11,6 @@ import _root_.io.circe.syntax._
import _root_.io.circe._
import _root_.io.circe.generic.semiauto._
import Fields.{Attachment, Item}
trait SolrSetup[F[_]] {
def setupSchema: F[Unit]
@ -33,18 +31,18 @@ object SolrSetup {
def setupSchema: F[Unit] = {
val cmds0 =
List(
Fields.id,
Fields.itemId,
Fields.collectiveId,
Fields.discriminator,
Attachment.attachmentId
Field.id,
Field.itemId,
Field.collectiveId,
Field.discriminator,
Field.attachmentId
)
.traverse(addStringField)
val cmds1 = List(
Attachment.attachmentName,
Attachment.content,
Item.itemName,
Item.itemNotes
Field.attachmentName,
Field.content,
Field.itemName,
Field.itemNotes
)
.traverse(addTextField)
@ -57,13 +55,13 @@ object SolrSetup {
client.expect[String](req).map(r => logger.debug(s"Response: $r"))
}
private def addStringField(name: String): F[Unit] =
run(DeleteField.command(DeleteField(name))).attempt *>
run(AddField.command(AddField.string(name)))
private def addStringField(field: Field): F[Unit] =
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.string(field)))
private def addTextField(name: String): F[Unit] =
run(DeleteField.command(DeleteField(name))).attempt *>
run(AddField.command(AddField.text(name)))
private def addTextField(field: Field): F[Unit] =
run(DeleteField.command(DeleteField(field))).attempt *>
run(AddField.command(AddField.text(field)))
}
}
@ -71,7 +69,7 @@ object SolrSetup {
// Schema Commands
case class AddField(
name: String,
name: Field,
`type`: String,
stored: Boolean,
indexed: Boolean,
@ -84,14 +82,14 @@ object SolrSetup {
def command(body: AddField): Json =
Map("add-field" -> body.asJson).asJson
def string(name: String): AddField =
AddField(name, "string", true, true, false)
def string(field: Field): AddField =
AddField(field, "string", true, true, false)
def text(name: String): AddField =
AddField(name, "text_general", true, true, false)
def text(field: Field): AddField =
AddField(field, "text_general", true, true, false)
}
case class DeleteField(name: String)
case class DeleteField(name: Field)
object DeleteField {
implicit val encoder: Encoder[DeleteField] =
deriveEncoder[DeleteField]