Transport highlighting information to the client

This commit is contained in:
Eike Kettner 2020-06-22 22:54:39 +02:00
parent a58ffd11e1
commit ffbb16db45
8 changed files with 202 additions and 48 deletions

View File

@ -13,10 +13,18 @@ import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
trait OFulltext[F[_]] { trait OFulltext[F[_]] {
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]] def findItems(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItem]]
/** Same as `findItems` but does more queries per item to find all tags. */ /** Same as `findItems` but does more queries per item to find all tags. */
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]] def findItemsWithTags(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItemWithTags]]
/** Clears the full-text index completely and launches a task that /** Clears the full-text index completely and launches a task that
* indexes all data. * indexes all data.
@ -30,9 +38,26 @@ trait OFulltext[F[_]] {
} }
object OFulltext { object OFulltext {
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
// then run a query case class FtsInput(
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere query: String,
highlightPre: String = "***",
highlightPost: String = "***"
)
case class FtsDataItem(
score: Double,
matchData: FtsResult.MatchData,
context: List[String]
)
case class FtsData(
maxScore: Double,
count: Int,
qtime: Duration,
items: List[FtsDataItem]
)
case class FtsItem(item: ListItem, ftsData: FtsData)
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
def apply[F[_]: Effect]( def apply[F[_]: Effect](
itemSearch: OItemSearch[F], itemSearch: OItemSearch[F],
@ -59,53 +84,83 @@ object OFulltext {
else queue.insertIfNew(job) *> joex.notifyAllNodes else queue.insertIfNew(job) *> joex.notifyAllNodes
} yield () } yield ()
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] = def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems) findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem])
.drop(batch.offset.toLong) .drop(batch.offset.toLong)
.take(batch.limit.toLong) .take(batch.limit.toLong)
.map({ case (li, fd) => FtsItem(li, fd) })
.compile .compile
.toVector .toVector
def findItemsWithTags( def findItemsWithTags(
q: Query, q: Query,
ftsQ: String, ftsQ: FtsInput,
batch: Batch batch: Batch
): F[Vector[ListItemWithTags]] = ): F[Vector[FtsItemWithTags]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItemsWithTags) findItemsFts(
q,
ftsQ,
batch.first,
itemSearch.findItemsWithTags,
convertFtsData[ListItemWithTags]
)
.drop(batch.offset.toLong) .drop(batch.offset.toLong)
.take(batch.limit.toLong) .take(batch.limit.toLong)
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
.compile .compile
.toVector .toVector
private def findItemsFts[A: ItemId]( private def findItemsFts[A: ItemId, B](
q: Query, q: Query,
ftsQ: String, ftsQ: FtsInput,
batch: Batch, batch: Batch,
search: (Query, Batch) => F[Vector[A]] search: (Query, Batch) => F[Vector[A]],
): Stream[F, A] = { convert: (
FtsResult,
Map[Ident, List[FtsResult.ItemMatch]]
) => PartialFunction[A, (A, FtsData)]
): Stream[F, (A, FtsData)] = {
val sqlResult = search(q, batch) val sqlResult = search(q, batch)
val fq = FtsQuery(ftsQ, q.collective, Set.empty, batch.limit, batch.offset) val fq = FtsQuery(
ftsQ.query,
q.collective,
Set.empty,
batch.limit,
batch.offset,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
)
val qres = val qres =
for { for {
items <- sqlResult items <- sqlResult
ids = items.map(a => ItemId[A].itemId(a)) ids = items.map(a => ItemId[A].itemId(a))
ftsQ = fq.copy(items = ids.toSet) ftsQ = fq.copy(items = ids.toSet)
ftsR <- ftsR <- fts.search(ftsQ)
fts ftsItems = ftsR.results.groupBy(_.itemId)
.search(ftsQ) res = items.collect(convert(ftsR, ftsItems))
.map(_.results.map(_.itemId))
.map(_.toSet)
res = items.filter(a => ftsR.contains(ItemId[A].itemId(a)))
} yield res } yield res
Stream.eval(qres).flatMap { v => Stream.eval(qres).flatMap { v =>
val results = Stream.emits(v) val results = Stream.emits(v)
if (v.size < batch.limit) results if (v.size < batch.limit) results
else results ++ findItemsFts(q, ftsQ, batch.next, search) else results ++ findItemsFts(q, ftsQ, batch.next, search, convert)
} }
} }
private def convertFtsData[A: ItemId](
ftr: FtsResult,
ftrItems: Map[Ident, List[FtsResult.ItemMatch]]
): PartialFunction[A, (A, FtsData)] = {
case a if ftrItems.contains(ItemId[A].itemId(a)) =>
val ftsDataItems = ftrItems
.get(ItemId[A].itemId(a))
.getOrElse(Nil)
.map(im =>
FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil))
)
(a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems))
}
}) })
trait ItemId[A] { trait ItemId[A] {

View File

@ -15,9 +15,19 @@ final case class FtsQuery(
collective: Ident, collective: Ident,
items: Set[Ident], items: Set[Ident],
limit: Int, limit: Int,
offset: Int offset: Int,
highlight: FtsQuery.HighlightSetting
) { ) {
def nextPage: FtsQuery = def nextPage: FtsQuery =
copy(offset = limit + offset) copy(offset = limit + offset)
} }
object FtsQuery {
case class HighlightSetting(pre: String, post: String)
object HighlightSetting {
val default = HighlightSetting("**", "**")
}
}

View File

@ -46,8 +46,8 @@ object QueryData {
} }
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight( QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
search, search,
"**", fq.highlight.pre,
"**" fq.highlight.post
) )
} }

View File

@ -367,12 +367,12 @@ docspell.joex {
# Configuration of the full-text search engine. # Configuration of the full-text search engine.
full-text-search { full-text-search {
enabled = true enabled = false
migration = { migration = {
index-all-chunk = 10 index-all-chunk = 10
} }
solr = { solr = {
url = "http://localhost:8983/solr/docspell_core" url = "http://localhost:8983/solr/docspell"
commit-within = 1000 commit-within = 1000
} }
} }

View File

@ -3502,6 +3502,29 @@ components:
type: array type: array
items: items:
$ref: "#/components/schemas/Tag" $ref: "#/components/schemas/Tag"
highlighting:
description: |
Optional contextual information of a search query. Each
item refers to some field where a search match was found
(e.g. the name of an attachment or the item notes) and a
list of lines giving surrounding context of the macth.
type: array
items:
$ref: "#/components/schemas/HighlightEntry"
HighlightEntry:
description: |
Highlighting information for a single field (maybe attachment
name or item notes).
required:
- name
- lines
properties:
name:
type: string
lines:
type: array
items:
type: string
IdName: IdName:
description: | description: |
The identifier and a human readable name of some entity. The identifier and a human readable name of some entity.

View File

@ -91,7 +91,7 @@ docspell.server {
# memory and disk space. It can be enabled later any time. # memory and disk space. It can be enabled later any time.
# #
# Currently the SOLR search platform is supported. # Currently the SOLR search platform is supported.
enabled = true enabled = false
# When re-creating the complete index via a REST call, this key # When re-creating the complete index via a REST call, this key
# is required. If left empty (the default), recreating the index # is required. If left empty (the default), recreating the index
@ -103,7 +103,7 @@ docspell.server {
# Configuration for the SOLR backend. # Configuration for the SOLR backend.
solr = { solr = {
url = "http://localhost:8983/solr/docspell_core" url = "http://localhost:8983/solr/docspell"
commit-within = 1000 commit-within = 1000
} }
} }

View File

@ -14,8 +14,9 @@ import bitpeace.FileMeta
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult} import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
import docspell.backend.ops.OJob.JobCancelResult import docspell.backend.ops.OJob.JobCancelResult
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult} import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload} import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload}
import docspell.store.AddResult import docspell.store.AddResult
import docspell.ftsclient.FtsResult
import org.http4s.multipart.Multipart import org.http4s.multipart.Multipart
import org.http4s.headers.`Content-Type` import org.http4s.headers.`Content-Type`
import org.log4s.Logger import org.log4s.Logger
@ -139,6 +140,17 @@ trait Conversions {
ItemLightList(gs) ItemLightList(gs)
} }
def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = {
val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLight).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = { def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7)) val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
@ -150,6 +162,17 @@ trait Conversions {
ItemLightList(gs) ItemLightList(gs)
} }
def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemLight(i: OItemSearch.ListItem): ItemLight = def mkItemLight(i: OItemSearch.ListItem): ItemLight =
ItemLight( ItemLight(
i.id, i.id,
@ -164,12 +187,35 @@ trait Conversions {
i.concPerson.map(mkIdName), i.concPerson.map(mkIdName),
i.concEquip.map(mkIdName), i.concEquip.map(mkIdName),
i.fileCount, i.fileCount,
Nil,
Nil Nil
) )
def mkItemLight(i: OFulltext.FtsItem): ItemLight = {
val il = mkItemLight(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight = def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
mkItemLight(i.item).copy(tags = i.tags.map(mkTag)) mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = {
val il = mkItemLightWithTags(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] =
ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi =>
fdi.matchData match {
case FtsResult.AttachmentData(_, aName) =>
HighlightEntry(aName, fdi.context)
case FtsResult.ItemData =>
HighlightEntry("Item", fdi.context)
}
}
// job // job
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = { def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = { def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {

View File

@ -5,6 +5,7 @@ import cats.implicits._
import docspell.backend.BackendApp import docspell.backend.BackendApp
import docspell.backend.auth.AuthToken import docspell.backend.auth.AuthToken
import docspell.backend.ops.OItemSearch.Batch import docspell.backend.ops.OItemSearch.Batch
import docspell.backend.ops.OFulltext
import docspell.common.{Ident, ItemState} import docspell.common.{Ident, ItemState}
import org.http4s.HttpRoutes import org.http4s.HttpRoutes
import org.http4s.dsl.Http4sDsl import org.http4s.dsl.Http4sDsl
@ -34,11 +35,25 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask") _ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective) query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query") _ <- logger.ftrace(s"Running query: $query")
items <- backend.itemSearch.findItems( resp <- mask.fullText match {
query, case Some(fq) if cfg.fullTextSearch.enabled =>
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) for {
) items <- backend.fulltext.findItems(
resp <- Ok(Conversions.mkItemList(items)) query,
OFulltext.FtsInput(fq),
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListFts(items))
} yield ok
case _ =>
for {
items <- backend.itemSearch.findItems(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemList(items))
} yield ok
}
} yield resp } yield resp
case req @ POST -> Root / "searchWithTags" => case req @ POST -> Root / "searchWithTags" =>
@ -47,20 +62,25 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask") _ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective) query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query") _ <- logger.ftrace(s"Running query: $query")
items <- mask.fullText match { resp <- mask.fullText match {
case None => case Some(fq) if cfg.fullTextSearch.enabled =>
backend.itemSearch.findItemsWithTags( for {
query, items <- backend.fulltext.findItemsWithTags(
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) query,
) OFulltext.FtsInput(fq),
case Some(fq) => Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
backend.fulltext.findItemsWithTags( )
query, ok <- Ok(Conversions.mkItemListWithTagsFts(items))
fq, } yield ok
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize) case _ =>
) for {
items <- backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTags(items))
} yield ok
} }
resp <- Ok(Conversions.mkItemListWithTags(items))
} yield resp } yield resp
case GET -> Root / Ident(id) => case GET -> Root / Ident(id) =>