mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-02-22 14:03:26 +00:00
Transport highlighting information to the client
This commit is contained in:
parent
a58ffd11e1
commit
ffbb16db45
@ -13,10 +13,18 @@ import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
|
||||
|
||||
trait OFulltext[F[_]] {
|
||||
|
||||
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]]
|
||||
def findItems(
|
||||
q: Query,
|
||||
fts: OFulltext.FtsInput,
|
||||
batch: Batch
|
||||
): F[Vector[OFulltext.FtsItem]]
|
||||
|
||||
/** Same as `findItems` but does more queries per item to find all tags. */
|
||||
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]]
|
||||
def findItemsWithTags(
|
||||
q: Query,
|
||||
fts: OFulltext.FtsInput,
|
||||
batch: Batch
|
||||
): F[Vector[OFulltext.FtsItemWithTags]]
|
||||
|
||||
/** Clears the full-text index completely and launches a task that
|
||||
* indexes all data.
|
||||
@ -30,9 +38,26 @@ trait OFulltext[F[_]] {
|
||||
}
|
||||
|
||||
object OFulltext {
|
||||
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
|
||||
// then run a query
|
||||
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
|
||||
|
||||
case class FtsInput(
|
||||
query: String,
|
||||
highlightPre: String = "***",
|
||||
highlightPost: String = "***"
|
||||
)
|
||||
|
||||
case class FtsDataItem(
|
||||
score: Double,
|
||||
matchData: FtsResult.MatchData,
|
||||
context: List[String]
|
||||
)
|
||||
case class FtsData(
|
||||
maxScore: Double,
|
||||
count: Int,
|
||||
qtime: Duration,
|
||||
items: List[FtsDataItem]
|
||||
)
|
||||
case class FtsItem(item: ListItem, ftsData: FtsData)
|
||||
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
|
||||
|
||||
def apply[F[_]: Effect](
|
||||
itemSearch: OItemSearch[F],
|
||||
@ -59,53 +84,83 @@ object OFulltext {
|
||||
else queue.insertIfNew(job) *> joex.notifyAllNodes
|
||||
} yield ()
|
||||
|
||||
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
|
||||
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems)
|
||||
def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] =
|
||||
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem])
|
||||
.drop(batch.offset.toLong)
|
||||
.take(batch.limit.toLong)
|
||||
.map({ case (li, fd) => FtsItem(li, fd) })
|
||||
.compile
|
||||
.toVector
|
||||
|
||||
def findItemsWithTags(
|
||||
q: Query,
|
||||
ftsQ: String,
|
||||
ftsQ: FtsInput,
|
||||
batch: Batch
|
||||
): F[Vector[ListItemWithTags]] =
|
||||
findItemsFts(q, ftsQ, batch.first, itemSearch.findItemsWithTags)
|
||||
): F[Vector[FtsItemWithTags]] =
|
||||
findItemsFts(
|
||||
q,
|
||||
ftsQ,
|
||||
batch.first,
|
||||
itemSearch.findItemsWithTags,
|
||||
convertFtsData[ListItemWithTags]
|
||||
)
|
||||
.drop(batch.offset.toLong)
|
||||
.take(batch.limit.toLong)
|
||||
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
|
||||
.compile
|
||||
.toVector
|
||||
|
||||
private def findItemsFts[A: ItemId](
|
||||
private def findItemsFts[A: ItemId, B](
|
||||
q: Query,
|
||||
ftsQ: String,
|
||||
ftsQ: FtsInput,
|
||||
batch: Batch,
|
||||
search: (Query, Batch) => F[Vector[A]]
|
||||
): Stream[F, A] = {
|
||||
search: (Query, Batch) => F[Vector[A]],
|
||||
convert: (
|
||||
FtsResult,
|
||||
Map[Ident, List[FtsResult.ItemMatch]]
|
||||
) => PartialFunction[A, (A, FtsData)]
|
||||
): Stream[F, (A, FtsData)] = {
|
||||
|
||||
val sqlResult = search(q, batch)
|
||||
val fq = FtsQuery(ftsQ, q.collective, Set.empty, batch.limit, batch.offset)
|
||||
val fq = FtsQuery(
|
||||
ftsQ.query,
|
||||
q.collective,
|
||||
Set.empty,
|
||||
batch.limit,
|
||||
batch.offset,
|
||||
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
|
||||
)
|
||||
|
||||
val qres =
|
||||
for {
|
||||
items <- sqlResult
|
||||
ids = items.map(a => ItemId[A].itemId(a))
|
||||
ftsQ = fq.copy(items = ids.toSet)
|
||||
ftsR <-
|
||||
fts
|
||||
.search(ftsQ)
|
||||
.map(_.results.map(_.itemId))
|
||||
.map(_.toSet)
|
||||
res = items.filter(a => ftsR.contains(ItemId[A].itemId(a)))
|
||||
ftsR <- fts.search(ftsQ)
|
||||
ftsItems = ftsR.results.groupBy(_.itemId)
|
||||
res = items.collect(convert(ftsR, ftsItems))
|
||||
} yield res
|
||||
|
||||
Stream.eval(qres).flatMap { v =>
|
||||
val results = Stream.emits(v)
|
||||
if (v.size < batch.limit) results
|
||||
else results ++ findItemsFts(q, ftsQ, batch.next, search)
|
||||
else results ++ findItemsFts(q, ftsQ, batch.next, search, convert)
|
||||
}
|
||||
}
|
||||
|
||||
private def convertFtsData[A: ItemId](
|
||||
ftr: FtsResult,
|
||||
ftrItems: Map[Ident, List[FtsResult.ItemMatch]]
|
||||
): PartialFunction[A, (A, FtsData)] = {
|
||||
case a if ftrItems.contains(ItemId[A].itemId(a)) =>
|
||||
val ftsDataItems = ftrItems
|
||||
.get(ItemId[A].itemId(a))
|
||||
.getOrElse(Nil)
|
||||
.map(im =>
|
||||
FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil))
|
||||
)
|
||||
(a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems))
|
||||
}
|
||||
})
|
||||
|
||||
trait ItemId[A] {
|
||||
|
@ -15,9 +15,19 @@ final case class FtsQuery(
|
||||
collective: Ident,
|
||||
items: Set[Ident],
|
||||
limit: Int,
|
||||
offset: Int
|
||||
offset: Int,
|
||||
highlight: FtsQuery.HighlightSetting
|
||||
) {
|
||||
|
||||
def nextPage: FtsQuery =
|
||||
copy(offset = limit + offset)
|
||||
}
|
||||
|
||||
object FtsQuery {
|
||||
|
||||
case class HighlightSetting(pre: String, post: String)
|
||||
|
||||
object HighlightSetting {
|
||||
val default = HighlightSetting("**", "**")
|
||||
}
|
||||
}
|
||||
|
@ -46,8 +46,8 @@ object QueryData {
|
||||
}
|
||||
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
|
||||
search,
|
||||
"**",
|
||||
"**"
|
||||
fq.highlight.pre,
|
||||
fq.highlight.post
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -367,12 +367,12 @@ docspell.joex {
|
||||
|
||||
# Configuration of the full-text search engine.
|
||||
full-text-search {
|
||||
enabled = true
|
||||
enabled = false
|
||||
migration = {
|
||||
index-all-chunk = 10
|
||||
}
|
||||
solr = {
|
||||
url = "http://localhost:8983/solr/docspell_core"
|
||||
url = "http://localhost:8983/solr/docspell"
|
||||
commit-within = 1000
|
||||
}
|
||||
}
|
||||
|
@ -3502,6 +3502,29 @@ components:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/Tag"
|
||||
highlighting:
|
||||
description: |
|
||||
Optional contextual information of a search query. Each
|
||||
item refers to some field where a search match was found
|
||||
(e.g. the name of an attachment or the item notes) and a
|
||||
list of lines giving surrounding context of the macth.
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/HighlightEntry"
|
||||
HighlightEntry:
|
||||
description: |
|
||||
Highlighting information for a single field (maybe attachment
|
||||
name or item notes).
|
||||
required:
|
||||
- name
|
||||
- lines
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
lines:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
IdName:
|
||||
description: |
|
||||
The identifier and a human readable name of some entity.
|
||||
|
@ -91,7 +91,7 @@ docspell.server {
|
||||
# memory and disk space. It can be enabled later any time.
|
||||
#
|
||||
# Currently the SOLR search platform is supported.
|
||||
enabled = true
|
||||
enabled = false
|
||||
|
||||
# When re-creating the complete index via a REST call, this key
|
||||
# is required. If left empty (the default), recreating the index
|
||||
@ -103,7 +103,7 @@ docspell.server {
|
||||
|
||||
# Configuration for the SOLR backend.
|
||||
solr = {
|
||||
url = "http://localhost:8983/solr/docspell_core"
|
||||
url = "http://localhost:8983/solr/docspell"
|
||||
commit-within = 1000
|
||||
}
|
||||
}
|
||||
|
@ -14,8 +14,9 @@ import bitpeace.FileMeta
|
||||
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
|
||||
import docspell.backend.ops.OJob.JobCancelResult
|
||||
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
|
||||
import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload}
|
||||
import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload}
|
||||
import docspell.store.AddResult
|
||||
import docspell.ftsclient.FtsResult
|
||||
import org.http4s.multipart.Multipart
|
||||
import org.http4s.headers.`Content-Type`
|
||||
import org.log4s.Logger
|
||||
@ -139,6 +140,17 @@ trait Conversions {
|
||||
ItemLightList(gs)
|
||||
}
|
||||
|
||||
def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = {
|
||||
val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7))
|
||||
|
||||
def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup =
|
||||
ItemLightGroup(g._1, g._2.map(mkItemLight).toList)
|
||||
|
||||
val gs =
|
||||
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
|
||||
ItemLightList(gs)
|
||||
}
|
||||
|
||||
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
|
||||
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
|
||||
|
||||
@ -150,6 +162,17 @@ trait Conversions {
|
||||
ItemLightList(gs)
|
||||
}
|
||||
|
||||
def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = {
|
||||
val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7))
|
||||
|
||||
def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup =
|
||||
ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList)
|
||||
|
||||
val gs =
|
||||
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
|
||||
ItemLightList(gs)
|
||||
}
|
||||
|
||||
def mkItemLight(i: OItemSearch.ListItem): ItemLight =
|
||||
ItemLight(
|
||||
i.id,
|
||||
@ -164,12 +187,35 @@ trait Conversions {
|
||||
i.concPerson.map(mkIdName),
|
||||
i.concEquip.map(mkIdName),
|
||||
i.fileCount,
|
||||
Nil,
|
||||
Nil
|
||||
)
|
||||
|
||||
def mkItemLight(i: OFulltext.FtsItem): ItemLight = {
|
||||
val il = mkItemLight(i.item)
|
||||
val highlight = mkHighlight(i.ftsData)
|
||||
il.copy(highlighting = highlight)
|
||||
}
|
||||
|
||||
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
|
||||
mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
|
||||
|
||||
def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = {
|
||||
val il = mkItemLightWithTags(i.item)
|
||||
val highlight = mkHighlight(i.ftsData)
|
||||
il.copy(highlighting = highlight)
|
||||
}
|
||||
|
||||
private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] =
|
||||
ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi =>
|
||||
fdi.matchData match {
|
||||
case FtsResult.AttachmentData(_, aName) =>
|
||||
HighlightEntry(aName, fdi.context)
|
||||
case FtsResult.ItemData =>
|
||||
HighlightEntry("Item", fdi.context)
|
||||
}
|
||||
}
|
||||
|
||||
// job
|
||||
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
|
||||
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {
|
||||
|
@ -5,6 +5,7 @@ import cats.implicits._
|
||||
import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.backend.ops.OItemSearch.Batch
|
||||
import docspell.backend.ops.OFulltext
|
||||
import docspell.common.{Ident, ItemState}
|
||||
import org.http4s.HttpRoutes
|
||||
import org.http4s.dsl.Http4sDsl
|
||||
@ -34,11 +35,25 @@ object ItemRoutes {
|
||||
_ <- logger.ftrace(s"Got search mask: $mask")
|
||||
query = Conversions.mkQuery(mask, user.account.collective)
|
||||
_ <- logger.ftrace(s"Running query: $query")
|
||||
items <- backend.itemSearch.findItems(
|
||||
query,
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
resp <- Ok(Conversions.mkItemList(items))
|
||||
resp <- mask.fullText match {
|
||||
case Some(fq) if cfg.fullTextSearch.enabled =>
|
||||
for {
|
||||
items <- backend.fulltext.findItems(
|
||||
query,
|
||||
OFulltext.FtsInput(fq),
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
ok <- Ok(Conversions.mkItemListFts(items))
|
||||
} yield ok
|
||||
case _ =>
|
||||
for {
|
||||
items <- backend.itemSearch.findItems(
|
||||
query,
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
ok <- Ok(Conversions.mkItemList(items))
|
||||
} yield ok
|
||||
}
|
||||
} yield resp
|
||||
|
||||
case req @ POST -> Root / "searchWithTags" =>
|
||||
@ -47,20 +62,25 @@ object ItemRoutes {
|
||||
_ <- logger.ftrace(s"Got search mask: $mask")
|
||||
query = Conversions.mkQuery(mask, user.account.collective)
|
||||
_ <- logger.ftrace(s"Running query: $query")
|
||||
items <- mask.fullText match {
|
||||
case None =>
|
||||
backend.itemSearch.findItemsWithTags(
|
||||
query,
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
case Some(fq) =>
|
||||
backend.fulltext.findItemsWithTags(
|
||||
query,
|
||||
fq,
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
resp <- mask.fullText match {
|
||||
case Some(fq) if cfg.fullTextSearch.enabled =>
|
||||
for {
|
||||
items <- backend.fulltext.findItemsWithTags(
|
||||
query,
|
||||
OFulltext.FtsInput(fq),
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
ok <- Ok(Conversions.mkItemListWithTagsFts(items))
|
||||
} yield ok
|
||||
case _ =>
|
||||
for {
|
||||
items <- backend.itemSearch.findItemsWithTags(
|
||||
query,
|
||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||
)
|
||||
ok <- Ok(Conversions.mkItemListWithTags(items))
|
||||
} yield ok
|
||||
}
|
||||
resp <- Ok(Conversions.mkItemListWithTags(items))
|
||||
} yield resp
|
||||
|
||||
case GET -> Root / Ident(id) =>
|
||||
|
Loading…
Reference in New Issue
Block a user