Transport highlighting information to the client

This commit is contained in:
Eike Kettner 2020-06-22 22:54:39 +02:00
parent a58ffd11e1
commit ffbb16db45
8 changed files with 202 additions and 48 deletions

View File

@ -13,10 +13,18 @@ import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
trait OFulltext[F[_]] {
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]]
def findItems(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItem]]
/** Same as `findItems` but does more queries per item to find all tags. */
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]]
def findItemsWithTags(
q: Query,
fts: OFulltext.FtsInput,
batch: Batch
): F[Vector[OFulltext.FtsItemWithTags]]
/** Clears the full-text index completely and launches a task that
* indexes all data.
@ -30,9 +38,26 @@ trait OFulltext[F[_]] {
}
object OFulltext {
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
// then run a query
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
case class FtsInput(
query: String,
highlightPre: String = "***",
highlightPost: String = "***"
)
case class FtsDataItem(
score: Double,
matchData: FtsResult.MatchData,
context: List[String]
)
case class FtsData(
maxScore: Double,
count: Int,
qtime: Duration,
items: List[FtsDataItem]
)
case class FtsItem(item: ListItem, ftsData: FtsData)
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
def apply[F[_]: Effect](
itemSearch: OItemSearch[F],
@ -59,53 +84,83 @@ object OFulltext {
else queue.insertIfNew(job) *> joex.notifyAllNodes
} yield ()
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems)
def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem])
.drop(batch.offset.toLong)
.take(batch.limit.toLong)
.map({ case (li, fd) => FtsItem(li, fd) })
.compile
.toVector
def findItemsWithTags(
q: Query,
ftsQ: String,
ftsQ: FtsInput,
batch: Batch
): F[Vector[ListItemWithTags]] =
findItemsFts(q, ftsQ, batch.first, itemSearch.findItemsWithTags)
): F[Vector[FtsItemWithTags]] =
findItemsFts(
q,
ftsQ,
batch.first,
itemSearch.findItemsWithTags,
convertFtsData[ListItemWithTags]
)
.drop(batch.offset.toLong)
.take(batch.limit.toLong)
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
.compile
.toVector
private def findItemsFts[A: ItemId](
private def findItemsFts[A: ItemId, B](
q: Query,
ftsQ: String,
ftsQ: FtsInput,
batch: Batch,
search: (Query, Batch) => F[Vector[A]]
): Stream[F, A] = {
search: (Query, Batch) => F[Vector[A]],
convert: (
FtsResult,
Map[Ident, List[FtsResult.ItemMatch]]
) => PartialFunction[A, (A, FtsData)]
): Stream[F, (A, FtsData)] = {
val sqlResult = search(q, batch)
val fq = FtsQuery(ftsQ, q.collective, Set.empty, batch.limit, batch.offset)
val fq = FtsQuery(
ftsQ.query,
q.collective,
Set.empty,
batch.limit,
batch.offset,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
)
val qres =
for {
items <- sqlResult
ids = items.map(a => ItemId[A].itemId(a))
ftsQ = fq.copy(items = ids.toSet)
ftsR <-
fts
.search(ftsQ)
.map(_.results.map(_.itemId))
.map(_.toSet)
res = items.filter(a => ftsR.contains(ItemId[A].itemId(a)))
ftsR <- fts.search(ftsQ)
ftsItems = ftsR.results.groupBy(_.itemId)
res = items.collect(convert(ftsR, ftsItems))
} yield res
Stream.eval(qres).flatMap { v =>
val results = Stream.emits(v)
if (v.size < batch.limit) results
else results ++ findItemsFts(q, ftsQ, batch.next, search)
else results ++ findItemsFts(q, ftsQ, batch.next, search, convert)
}
}
private def convertFtsData[A: ItemId](
ftr: FtsResult,
ftrItems: Map[Ident, List[FtsResult.ItemMatch]]
): PartialFunction[A, (A, FtsData)] = {
case a if ftrItems.contains(ItemId[A].itemId(a)) =>
val ftsDataItems = ftrItems
.get(ItemId[A].itemId(a))
.getOrElse(Nil)
.map(im =>
FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil))
)
(a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems))
}
})
trait ItemId[A] {

View File

@ -15,9 +15,19 @@ final case class FtsQuery(
collective: Ident,
items: Set[Ident],
limit: Int,
offset: Int
offset: Int,
highlight: FtsQuery.HighlightSetting
) {
def nextPage: FtsQuery =
copy(offset = limit + offset)
}
object FtsQuery {
case class HighlightSetting(pre: String, post: String)
object HighlightSetting {
val default = HighlightSetting("**", "**")
}
}

View File

@ -46,8 +46,8 @@ object QueryData {
}
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
search,
"**",
"**"
fq.highlight.pre,
fq.highlight.post
)
}

View File

@ -367,12 +367,12 @@ docspell.joex {
# Configuration of the full-text search engine.
full-text-search {
enabled = true
enabled = false
migration = {
index-all-chunk = 10
}
solr = {
url = "http://localhost:8983/solr/docspell_core"
url = "http://localhost:8983/solr/docspell"
commit-within = 1000
}
}

View File

@ -3502,6 +3502,29 @@ components:
type: array
items:
$ref: "#/components/schemas/Tag"
highlighting:
description: |
Optional contextual information of a search query. Each
item refers to some field where a search match was found
(e.g. the name of an attachment or the item notes) and a
list of lines giving surrounding context of the macth.
type: array
items:
$ref: "#/components/schemas/HighlightEntry"
HighlightEntry:
description: |
Highlighting information for a single field (maybe attachment
name or item notes).
required:
- name
- lines
properties:
name:
type: string
lines:
type: array
items:
type: string
IdName:
description: |
The identifier and a human readable name of some entity.

View File

@ -91,7 +91,7 @@ docspell.server {
# memory and disk space. It can be enabled later any time.
#
# Currently the SOLR search platform is supported.
enabled = true
enabled = false
# When re-creating the complete index via a REST call, this key
# is required. If left empty (the default), recreating the index
@ -103,7 +103,7 @@ docspell.server {
# Configuration for the SOLR backend.
solr = {
url = "http://localhost:8983/solr/docspell_core"
url = "http://localhost:8983/solr/docspell"
commit-within = 1000
}
}

View File

@ -14,8 +14,9 @@ import bitpeace.FileMeta
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
import docspell.backend.ops.OJob.JobCancelResult
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload}
import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload}
import docspell.store.AddResult
import docspell.ftsclient.FtsResult
import org.http4s.multipart.Multipart
import org.http4s.headers.`Content-Type`
import org.log4s.Logger
@ -139,6 +140,17 @@ trait Conversions {
ItemLightList(gs)
}
def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = {
val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLight).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
@ -150,6 +162,17 @@ trait Conversions {
ItemLightList(gs)
}
def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = {
val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7))
def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup =
ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList)
val gs =
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
ItemLightList(gs)
}
def mkItemLight(i: OItemSearch.ListItem): ItemLight =
ItemLight(
i.id,
@ -164,12 +187,35 @@ trait Conversions {
i.concPerson.map(mkIdName),
i.concEquip.map(mkIdName),
i.fileCount,
Nil,
Nil
)
def mkItemLight(i: OFulltext.FtsItem): ItemLight = {
val il = mkItemLight(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = {
val il = mkItemLightWithTags(i.item)
val highlight = mkHighlight(i.ftsData)
il.copy(highlighting = highlight)
}
private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] =
ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi =>
fdi.matchData match {
case FtsResult.AttachmentData(_, aName) =>
HighlightEntry(aName, fdi.context)
case FtsResult.ItemData =>
HighlightEntry("Item", fdi.context)
}
}
// job
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {

View File

@ -5,6 +5,7 @@ import cats.implicits._
import docspell.backend.BackendApp
import docspell.backend.auth.AuthToken
import docspell.backend.ops.OItemSearch.Batch
import docspell.backend.ops.OFulltext
import docspell.common.{Ident, ItemState}
import org.http4s.HttpRoutes
import org.http4s.dsl.Http4sDsl
@ -34,11 +35,25 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query")
items <- backend.itemSearch.findItems(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
resp <- Ok(Conversions.mkItemList(items))
resp <- mask.fullText match {
case Some(fq) if cfg.fullTextSearch.enabled =>
for {
items <- backend.fulltext.findItems(
query,
OFulltext.FtsInput(fq),
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListFts(items))
} yield ok
case _ =>
for {
items <- backend.itemSearch.findItems(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemList(items))
} yield ok
}
} yield resp
case req @ POST -> Root / "searchWithTags" =>
@ -47,20 +62,25 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query")
items <- mask.fullText match {
case None =>
backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
case Some(fq) =>
backend.fulltext.findItemsWithTags(
query,
fq,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
resp <- mask.fullText match {
case Some(fq) if cfg.fullTextSearch.enabled =>
for {
items <- backend.fulltext.findItemsWithTags(
query,
OFulltext.FtsInput(fq),
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTagsFts(items))
} yield ok
case _ =>
for {
items <- backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
ok <- Ok(Conversions.mkItemListWithTags(items))
} yield ok
}
resp <- Ok(Conversions.mkItemListWithTags(items))
} yield resp
case GET -> Root / Ident(id) =>