mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Transport highlighting information to the client
This commit is contained in:
parent
a58ffd11e1
commit
ffbb16db45
@ -13,10 +13,18 @@ import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
|
|||||||
|
|
||||||
trait OFulltext[F[_]] {
|
trait OFulltext[F[_]] {
|
||||||
|
|
||||||
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]]
|
def findItems(
|
||||||
|
q: Query,
|
||||||
|
fts: OFulltext.FtsInput,
|
||||||
|
batch: Batch
|
||||||
|
): F[Vector[OFulltext.FtsItem]]
|
||||||
|
|
||||||
/** Same as `findItems` but does more queries per item to find all tags. */
|
/** Same as `findItems` but does more queries per item to find all tags. */
|
||||||
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]]
|
def findItemsWithTags(
|
||||||
|
q: Query,
|
||||||
|
fts: OFulltext.FtsInput,
|
||||||
|
batch: Batch
|
||||||
|
): F[Vector[OFulltext.FtsItemWithTags]]
|
||||||
|
|
||||||
/** Clears the full-text index completely and launches a task that
|
/** Clears the full-text index completely and launches a task that
|
||||||
* indexes all data.
|
* indexes all data.
|
||||||
@ -30,9 +38,26 @@ trait OFulltext[F[_]] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
object OFulltext {
|
object OFulltext {
|
||||||
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
|
|
||||||
// then run a query
|
case class FtsInput(
|
||||||
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
|
query: String,
|
||||||
|
highlightPre: String = "***",
|
||||||
|
highlightPost: String = "***"
|
||||||
|
)
|
||||||
|
|
||||||
|
case class FtsDataItem(
|
||||||
|
score: Double,
|
||||||
|
matchData: FtsResult.MatchData,
|
||||||
|
context: List[String]
|
||||||
|
)
|
||||||
|
case class FtsData(
|
||||||
|
maxScore: Double,
|
||||||
|
count: Int,
|
||||||
|
qtime: Duration,
|
||||||
|
items: List[FtsDataItem]
|
||||||
|
)
|
||||||
|
case class FtsItem(item: ListItem, ftsData: FtsData)
|
||||||
|
case class FtsItemWithTags(item: ListItemWithTags, ftsData: FtsData)
|
||||||
|
|
||||||
def apply[F[_]: Effect](
|
def apply[F[_]: Effect](
|
||||||
itemSearch: OItemSearch[F],
|
itemSearch: OItemSearch[F],
|
||||||
@ -59,53 +84,83 @@ object OFulltext {
|
|||||||
else queue.insertIfNew(job) *> joex.notifyAllNodes
|
else queue.insertIfNew(job) *> joex.notifyAllNodes
|
||||||
} yield ()
|
} yield ()
|
||||||
|
|
||||||
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
|
def findItems(q: Query, ftsQ: FtsInput, batch: Batch): F[Vector[FtsItem]] =
|
||||||
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems)
|
findItemsFts(q, ftsQ, batch.first, itemSearch.findItems, convertFtsData[ListItem])
|
||||||
.drop(batch.offset.toLong)
|
.drop(batch.offset.toLong)
|
||||||
.take(batch.limit.toLong)
|
.take(batch.limit.toLong)
|
||||||
|
.map({ case (li, fd) => FtsItem(li, fd) })
|
||||||
.compile
|
.compile
|
||||||
.toVector
|
.toVector
|
||||||
|
|
||||||
def findItemsWithTags(
|
def findItemsWithTags(
|
||||||
q: Query,
|
q: Query,
|
||||||
ftsQ: String,
|
ftsQ: FtsInput,
|
||||||
batch: Batch
|
batch: Batch
|
||||||
): F[Vector[ListItemWithTags]] =
|
): F[Vector[FtsItemWithTags]] =
|
||||||
findItemsFts(q, ftsQ, batch.first, itemSearch.findItemsWithTags)
|
findItemsFts(
|
||||||
|
q,
|
||||||
|
ftsQ,
|
||||||
|
batch.first,
|
||||||
|
itemSearch.findItemsWithTags,
|
||||||
|
convertFtsData[ListItemWithTags]
|
||||||
|
)
|
||||||
.drop(batch.offset.toLong)
|
.drop(batch.offset.toLong)
|
||||||
.take(batch.limit.toLong)
|
.take(batch.limit.toLong)
|
||||||
|
.map({ case (li, fd) => FtsItemWithTags(li, fd) })
|
||||||
.compile
|
.compile
|
||||||
.toVector
|
.toVector
|
||||||
|
|
||||||
private def findItemsFts[A: ItemId](
|
private def findItemsFts[A: ItemId, B](
|
||||||
q: Query,
|
q: Query,
|
||||||
ftsQ: String,
|
ftsQ: FtsInput,
|
||||||
batch: Batch,
|
batch: Batch,
|
||||||
search: (Query, Batch) => F[Vector[A]]
|
search: (Query, Batch) => F[Vector[A]],
|
||||||
): Stream[F, A] = {
|
convert: (
|
||||||
|
FtsResult,
|
||||||
|
Map[Ident, List[FtsResult.ItemMatch]]
|
||||||
|
) => PartialFunction[A, (A, FtsData)]
|
||||||
|
): Stream[F, (A, FtsData)] = {
|
||||||
|
|
||||||
val sqlResult = search(q, batch)
|
val sqlResult = search(q, batch)
|
||||||
val fq = FtsQuery(ftsQ, q.collective, Set.empty, batch.limit, batch.offset)
|
val fq = FtsQuery(
|
||||||
|
ftsQ.query,
|
||||||
|
q.collective,
|
||||||
|
Set.empty,
|
||||||
|
batch.limit,
|
||||||
|
batch.offset,
|
||||||
|
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
|
||||||
|
)
|
||||||
|
|
||||||
val qres =
|
val qres =
|
||||||
for {
|
for {
|
||||||
items <- sqlResult
|
items <- sqlResult
|
||||||
ids = items.map(a => ItemId[A].itemId(a))
|
ids = items.map(a => ItemId[A].itemId(a))
|
||||||
ftsQ = fq.copy(items = ids.toSet)
|
ftsQ = fq.copy(items = ids.toSet)
|
||||||
ftsR <-
|
ftsR <- fts.search(ftsQ)
|
||||||
fts
|
ftsItems = ftsR.results.groupBy(_.itemId)
|
||||||
.search(ftsQ)
|
res = items.collect(convert(ftsR, ftsItems))
|
||||||
.map(_.results.map(_.itemId))
|
|
||||||
.map(_.toSet)
|
|
||||||
res = items.filter(a => ftsR.contains(ItemId[A].itemId(a)))
|
|
||||||
} yield res
|
} yield res
|
||||||
|
|
||||||
Stream.eval(qres).flatMap { v =>
|
Stream.eval(qres).flatMap { v =>
|
||||||
val results = Stream.emits(v)
|
val results = Stream.emits(v)
|
||||||
if (v.size < batch.limit) results
|
if (v.size < batch.limit) results
|
||||||
else results ++ findItemsFts(q, ftsQ, batch.next, search)
|
else results ++ findItemsFts(q, ftsQ, batch.next, search, convert)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def convertFtsData[A: ItemId](
|
||||||
|
ftr: FtsResult,
|
||||||
|
ftrItems: Map[Ident, List[FtsResult.ItemMatch]]
|
||||||
|
): PartialFunction[A, (A, FtsData)] = {
|
||||||
|
case a if ftrItems.contains(ItemId[A].itemId(a)) =>
|
||||||
|
val ftsDataItems = ftrItems
|
||||||
|
.get(ItemId[A].itemId(a))
|
||||||
|
.getOrElse(Nil)
|
||||||
|
.map(im =>
|
||||||
|
FtsDataItem(im.score, im.data, ftr.highlight.get(im.id).getOrElse(Nil))
|
||||||
|
)
|
||||||
|
(a, FtsData(ftr.maxScore, ftr.count, ftr.qtime, ftsDataItems))
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
trait ItemId[A] {
|
trait ItemId[A] {
|
||||||
|
@ -15,9 +15,19 @@ final case class FtsQuery(
|
|||||||
collective: Ident,
|
collective: Ident,
|
||||||
items: Set[Ident],
|
items: Set[Ident],
|
||||||
limit: Int,
|
limit: Int,
|
||||||
offset: Int
|
offset: Int,
|
||||||
|
highlight: FtsQuery.HighlightSetting
|
||||||
) {
|
) {
|
||||||
|
|
||||||
def nextPage: FtsQuery =
|
def nextPage: FtsQuery =
|
||||||
copy(offset = limit + offset)
|
copy(offset = limit + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
object FtsQuery {
|
||||||
|
|
||||||
|
case class HighlightSetting(pre: String, post: String)
|
||||||
|
|
||||||
|
object HighlightSetting {
|
||||||
|
val default = HighlightSetting("**", "**")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -46,8 +46,8 @@ object QueryData {
|
|||||||
}
|
}
|
||||||
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
|
QueryData(extQ, filterQ, fq.limit, fq.offset, fields, Map.empty).withHighLight(
|
||||||
search,
|
search,
|
||||||
"**",
|
fq.highlight.pre,
|
||||||
"**"
|
fq.highlight.post
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,12 +367,12 @@ docspell.joex {
|
|||||||
|
|
||||||
# Configuration of the full-text search engine.
|
# Configuration of the full-text search engine.
|
||||||
full-text-search {
|
full-text-search {
|
||||||
enabled = true
|
enabled = false
|
||||||
migration = {
|
migration = {
|
||||||
index-all-chunk = 10
|
index-all-chunk = 10
|
||||||
}
|
}
|
||||||
solr = {
|
solr = {
|
||||||
url = "http://localhost:8983/solr/docspell_core"
|
url = "http://localhost:8983/solr/docspell"
|
||||||
commit-within = 1000
|
commit-within = 1000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3502,6 +3502,29 @@ components:
|
|||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: "#/components/schemas/Tag"
|
$ref: "#/components/schemas/Tag"
|
||||||
|
highlighting:
|
||||||
|
description: |
|
||||||
|
Optional contextual information of a search query. Each
|
||||||
|
item refers to some field where a search match was found
|
||||||
|
(e.g. the name of an attachment or the item notes) and a
|
||||||
|
list of lines giving surrounding context of the macth.
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: "#/components/schemas/HighlightEntry"
|
||||||
|
HighlightEntry:
|
||||||
|
description: |
|
||||||
|
Highlighting information for a single field (maybe attachment
|
||||||
|
name or item notes).
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- lines
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
lines:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
IdName:
|
IdName:
|
||||||
description: |
|
description: |
|
||||||
The identifier and a human readable name of some entity.
|
The identifier and a human readable name of some entity.
|
||||||
|
@ -91,7 +91,7 @@ docspell.server {
|
|||||||
# memory and disk space. It can be enabled later any time.
|
# memory and disk space. It can be enabled later any time.
|
||||||
#
|
#
|
||||||
# Currently the SOLR search platform is supported.
|
# Currently the SOLR search platform is supported.
|
||||||
enabled = true
|
enabled = false
|
||||||
|
|
||||||
# When re-creating the complete index via a REST call, this key
|
# When re-creating the complete index via a REST call, this key
|
||||||
# is required. If left empty (the default), recreating the index
|
# is required. If left empty (the default), recreating the index
|
||||||
@ -103,7 +103,7 @@ docspell.server {
|
|||||||
|
|
||||||
# Configuration for the SOLR backend.
|
# Configuration for the SOLR backend.
|
||||||
solr = {
|
solr = {
|
||||||
url = "http://localhost:8983/solr/docspell_core"
|
url = "http://localhost:8983/solr/docspell"
|
||||||
commit-within = 1000
|
commit-within = 1000
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,9 @@ import bitpeace.FileMeta
|
|||||||
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
|
import docspell.backend.ops.OCollective.{InsightData, PassChangeResult}
|
||||||
import docspell.backend.ops.OJob.JobCancelResult
|
import docspell.backend.ops.OJob.JobCancelResult
|
||||||
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
|
import docspell.backend.ops.OUpload.{UploadData, UploadMeta, UploadResult}
|
||||||
import docspell.backend.ops.{OItemSearch, OJob, OOrganization, OUpload}
|
import docspell.backend.ops.{OFulltext, OItemSearch, OJob, OOrganization, OUpload}
|
||||||
import docspell.store.AddResult
|
import docspell.store.AddResult
|
||||||
|
import docspell.ftsclient.FtsResult
|
||||||
import org.http4s.multipart.Multipart
|
import org.http4s.multipart.Multipart
|
||||||
import org.http4s.headers.`Content-Type`
|
import org.http4s.headers.`Content-Type`
|
||||||
import org.log4s.Logger
|
import org.log4s.Logger
|
||||||
@ -139,6 +140,17 @@ trait Conversions {
|
|||||||
ItemLightList(gs)
|
ItemLightList(gs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def mkItemListFts(v: Vector[OFulltext.FtsItem]): ItemLightList = {
|
||||||
|
val groups = v.groupBy(item => item.item.date.toUtcDate.toString.substring(0, 7))
|
||||||
|
|
||||||
|
def mkGroup(g: (String, Vector[OFulltext.FtsItem])): ItemLightGroup =
|
||||||
|
ItemLightGroup(g._1, g._2.map(mkItemLight).toList)
|
||||||
|
|
||||||
|
val gs =
|
||||||
|
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
|
||||||
|
ItemLightList(gs)
|
||||||
|
}
|
||||||
|
|
||||||
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
|
def mkItemListWithTags(v: Vector[OItemSearch.ListItemWithTags]): ItemLightList = {
|
||||||
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
|
val groups = v.groupBy(ti => ti.item.date.toUtcDate.toString.substring(0, 7))
|
||||||
|
|
||||||
@ -150,6 +162,17 @@ trait Conversions {
|
|||||||
ItemLightList(gs)
|
ItemLightList(gs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def mkItemListWithTagsFts(v: Vector[OFulltext.FtsItemWithTags]): ItemLightList = {
|
||||||
|
val groups = v.groupBy(ti => ti.item.item.date.toUtcDate.toString.substring(0, 7))
|
||||||
|
|
||||||
|
def mkGroup(g: (String, Vector[OFulltext.FtsItemWithTags])): ItemLightGroup =
|
||||||
|
ItemLightGroup(g._1, g._2.map(mkItemLightWithTags).toList)
|
||||||
|
|
||||||
|
val gs =
|
||||||
|
groups.map(mkGroup _).toList.sortWith((g1, g2) => g1.name.compareTo(g2.name) >= 0)
|
||||||
|
ItemLightList(gs)
|
||||||
|
}
|
||||||
|
|
||||||
def mkItemLight(i: OItemSearch.ListItem): ItemLight =
|
def mkItemLight(i: OItemSearch.ListItem): ItemLight =
|
||||||
ItemLight(
|
ItemLight(
|
||||||
i.id,
|
i.id,
|
||||||
@ -164,12 +187,35 @@ trait Conversions {
|
|||||||
i.concPerson.map(mkIdName),
|
i.concPerson.map(mkIdName),
|
||||||
i.concEquip.map(mkIdName),
|
i.concEquip.map(mkIdName),
|
||||||
i.fileCount,
|
i.fileCount,
|
||||||
|
Nil,
|
||||||
Nil
|
Nil
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def mkItemLight(i: OFulltext.FtsItem): ItemLight = {
|
||||||
|
val il = mkItemLight(i.item)
|
||||||
|
val highlight = mkHighlight(i.ftsData)
|
||||||
|
il.copy(highlighting = highlight)
|
||||||
|
}
|
||||||
|
|
||||||
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
|
def mkItemLightWithTags(i: OItemSearch.ListItemWithTags): ItemLight =
|
||||||
mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
|
mkItemLight(i.item).copy(tags = i.tags.map(mkTag))
|
||||||
|
|
||||||
|
def mkItemLightWithTags(i: OFulltext.FtsItemWithTags): ItemLight = {
|
||||||
|
val il = mkItemLightWithTags(i.item)
|
||||||
|
val highlight = mkHighlight(i.ftsData)
|
||||||
|
il.copy(highlighting = highlight)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def mkHighlight(ftsData: OFulltext.FtsData): List[HighlightEntry] =
|
||||||
|
ftsData.items.filter(_.context.nonEmpty).sortBy(-_.score).map { fdi =>
|
||||||
|
fdi.matchData match {
|
||||||
|
case FtsResult.AttachmentData(_, aName) =>
|
||||||
|
HighlightEntry(aName, fdi.context)
|
||||||
|
case FtsResult.ItemData =>
|
||||||
|
HighlightEntry("Item", fdi.context)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// job
|
// job
|
||||||
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
|
def mkJobQueueState(state: OJob.CollectiveQueueState): JobQueueState = {
|
||||||
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {
|
def desc(f: JobDetail => Option[Timestamp])(j1: JobDetail, j2: JobDetail): Boolean = {
|
||||||
|
@ -5,6 +5,7 @@ import cats.implicits._
|
|||||||
import docspell.backend.BackendApp
|
import docspell.backend.BackendApp
|
||||||
import docspell.backend.auth.AuthToken
|
import docspell.backend.auth.AuthToken
|
||||||
import docspell.backend.ops.OItemSearch.Batch
|
import docspell.backend.ops.OItemSearch.Batch
|
||||||
|
import docspell.backend.ops.OFulltext
|
||||||
import docspell.common.{Ident, ItemState}
|
import docspell.common.{Ident, ItemState}
|
||||||
import org.http4s.HttpRoutes
|
import org.http4s.HttpRoutes
|
||||||
import org.http4s.dsl.Http4sDsl
|
import org.http4s.dsl.Http4sDsl
|
||||||
@ -34,11 +35,25 @@ object ItemRoutes {
|
|||||||
_ <- logger.ftrace(s"Got search mask: $mask")
|
_ <- logger.ftrace(s"Got search mask: $mask")
|
||||||
query = Conversions.mkQuery(mask, user.account.collective)
|
query = Conversions.mkQuery(mask, user.account.collective)
|
||||||
_ <- logger.ftrace(s"Running query: $query")
|
_ <- logger.ftrace(s"Running query: $query")
|
||||||
items <- backend.itemSearch.findItems(
|
resp <- mask.fullText match {
|
||||||
query,
|
case Some(fq) if cfg.fullTextSearch.enabled =>
|
||||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
for {
|
||||||
)
|
items <- backend.fulltext.findItems(
|
||||||
resp <- Ok(Conversions.mkItemList(items))
|
query,
|
||||||
|
OFulltext.FtsInput(fq),
|
||||||
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
|
)
|
||||||
|
ok <- Ok(Conversions.mkItemListFts(items))
|
||||||
|
} yield ok
|
||||||
|
case _ =>
|
||||||
|
for {
|
||||||
|
items <- backend.itemSearch.findItems(
|
||||||
|
query,
|
||||||
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
|
)
|
||||||
|
ok <- Ok(Conversions.mkItemList(items))
|
||||||
|
} yield ok
|
||||||
|
}
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
case req @ POST -> Root / "searchWithTags" =>
|
case req @ POST -> Root / "searchWithTags" =>
|
||||||
@ -47,20 +62,25 @@ object ItemRoutes {
|
|||||||
_ <- logger.ftrace(s"Got search mask: $mask")
|
_ <- logger.ftrace(s"Got search mask: $mask")
|
||||||
query = Conversions.mkQuery(mask, user.account.collective)
|
query = Conversions.mkQuery(mask, user.account.collective)
|
||||||
_ <- logger.ftrace(s"Running query: $query")
|
_ <- logger.ftrace(s"Running query: $query")
|
||||||
items <- mask.fullText match {
|
resp <- mask.fullText match {
|
||||||
case None =>
|
case Some(fq) if cfg.fullTextSearch.enabled =>
|
||||||
backend.itemSearch.findItemsWithTags(
|
for {
|
||||||
query,
|
items <- backend.fulltext.findItemsWithTags(
|
||||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
query,
|
||||||
)
|
OFulltext.FtsInput(fq),
|
||||||
case Some(fq) =>
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
backend.fulltext.findItemsWithTags(
|
)
|
||||||
query,
|
ok <- Ok(Conversions.mkItemListWithTagsFts(items))
|
||||||
fq,
|
} yield ok
|
||||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
case _ =>
|
||||||
)
|
for {
|
||||||
|
items <- backend.itemSearch.findItemsWithTags(
|
||||||
|
query,
|
||||||
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
|
)
|
||||||
|
ok <- Ok(Conversions.mkItemListWithTags(items))
|
||||||
|
} yield ok
|
||||||
}
|
}
|
||||||
resp <- Ok(Conversions.mkItemListWithTags(items))
|
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
case GET -> Root / Ident(id) =>
|
case GET -> Root / Ident(id) =>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user