Merge pull request #824 from eikek/fix/faster-search

Fix/faster search
This commit is contained in:
mergify[bot] 2021-05-23 12:25:01 +00:00 committed by GitHub
commit b445f5cd8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 203 additions and 115 deletions

View File

@ -1,5 +1,6 @@
package docspell.backend.ops
import cats.Applicative
import cats.effect.Sync
import cats.implicits._
@ -10,24 +11,53 @@ import docspell.store.qb.Batch
import docspell.store.queries.Query
import docspell.store.queries.SearchSummary
import org.log4s.getLogger
/** A "porcelain" api on top of OFulltext and OItemSearch. */
trait OSimpleSearch[F[_]] {
/** Search for items using the given query and optional fulltext
* search.
*
* When using fulltext search only (the query is empty), only the
* index is searched. It is assumed that the index doesn't contain
* "invalid" items. When using a query, then a condition to select
* only valid items is added to it.
*/
def search(settings: Settings)(q: Query, fulltextQuery: Option[String]): F[Items]
/** Using the same arguments as in `search`, this returns a summary
* and not the results.
*/
def searchSummary(
useFTS: Boolean
)(q: Query, fulltextQuery: Option[String]): F[SearchSummary]
def searchByString(
/** Calls `search` by parsing the given query string into a query that
* is then amended wtih the given `fix` query.
*/
final def searchByString(
settings: Settings
)(fix: Query.Fix, q: ItemQueryString): F[StringSearchResult[Items]]
def searchSummaryByString(
useFTS: Boolean
)(fix: Query.Fix, q: ItemQueryString): F[StringSearchResult[SearchSummary]]
)(fix: Query.Fix, q: ItemQueryString)(implicit
F: Applicative[F]
): F[StringSearchResult[Items]] =
OSimpleSearch.applySearch[F, Items](fix, q)((iq, fts) => search(settings)(iq, fts))
/** Same as `searchByString` but returning a summary instead of the
* results.
*/
final def searchSummaryByString(
useFTS: Boolean
)(fix: Query.Fix, q: ItemQueryString)(implicit
F: Applicative[F]
): F[StringSearchResult[SearchSummary]] =
OSimpleSearch.applySearch[F, SearchSummary](fix, q)((iq, fts) =>
searchSummary(useFTS)(iq, fts)
)
}
object OSimpleSearch {
private[this] val logger = getLogger
sealed trait StringSearchResult[+A]
object StringSearchResult {
@ -118,51 +148,115 @@ object OSimpleSearch {
def apply[F[_]: Sync](fts: OFulltext[F], is: OItemSearch[F]): OSimpleSearch[F] =
new Impl(fts, is)
final class Impl[F[_]: Sync](fts: OFulltext[F], is: OItemSearch[F])
extends OSimpleSearch[F] {
def searchByString(
settings: Settings
)(fix: Query.Fix, q: ItemQueryString): F[StringSearchResult[Items]] = {
val parsed: Either[StringSearchResult[Items], ItemQuery] =
ItemQueryParser.parse(q.query).leftMap(StringSearchResult.parseFailed)
/** Parses the query and calls `run` with the result, which searches items. */
private def applySearch[F[_]: Applicative, A](fix: Query.Fix, q: ItemQueryString)(
run: (Query, Option[String]) => F[A]
): F[StringSearchResult[A]] = {
val parsed: Either[StringSearchResult[A], Option[ItemQuery]] =
if (q.isEmpty) Right(None)
else
ItemQueryParser
.parse(q.query)
.leftMap(StringSearchResult.parseFailed)
.map(_.some)
def makeQuery(iq: ItemQuery): F[StringSearchResult[Items]] =
iq.findFulltext match {
case FulltextExtract.Result.Success(expr, ftq) =>
search(settings)(Query(fix, Query.QueryExpr(expr.some)), ftq)
.map(StringSearchResult.Success.apply)
case other: FulltextExtract.FailureResult =>
StringSearchResult.fulltextMismatch[Items](other).pure[F]
}
parsed match {
case Right(iq) =>
makeQuery(iq)
case Left(err) =>
err.pure[F]
def makeQuery(itemQuery: Option[ItemQuery]): F[StringSearchResult[A]] =
runQuery[F, A](itemQuery) {
case Some(s) =>
run(Query(fix, Query.QueryExpr(s.getExprPart)), s.getFulltextPart)
case None =>
run(Query(fix), None)
}
parsed match {
case Right(iq) =>
makeQuery(iq)
case Left(err) =>
err.pure[F]
}
}
/** Calls `run` with one of the success results when extracting the
* fulltext search node from the query.
*/
private def runQuery[F[_]: Applicative, A](
itemQuery: Option[ItemQuery]
)(run: Option[FulltextExtract.SuccessResult] => F[A]): F[StringSearchResult[A]] =
itemQuery match {
case Some(iq) =>
iq.findFulltext match {
case s: FulltextExtract.SuccessResult =>
run(Some(s)).map(StringSearchResult.Success.apply)
case other: FulltextExtract.FailureResult =>
StringSearchResult.fulltextMismatch[A](other).pure[F]
}
case None =>
run(None).map(StringSearchResult.Success.apply)
}
def searchSummaryByString(
useFTS: Boolean
)(fix: Query.Fix, q: ItemQueryString): F[StringSearchResult[SearchSummary]] = {
val parsed: Either[StringSearchResult[SearchSummary], ItemQuery] =
ItemQueryParser.parse(q.query).leftMap(StringSearchResult.parseFailed)
final class Impl[F[_]: Sync](fts: OFulltext[F], is: OItemSearch[F])
extends OSimpleSearch[F] {
def makeQuery(iq: ItemQuery): F[StringSearchResult[SearchSummary]] =
iq.findFulltext match {
case FulltextExtract.Result.Success(expr, ftq) =>
searchSummary(useFTS)(Query(fix, Query.QueryExpr(expr.some)), ftq)
.map(StringSearchResult.Success.apply)
case other: FulltextExtract.FailureResult =>
StringSearchResult.fulltextMismatch[SearchSummary](other).pure[F]
}
parsed match {
case Right(iq) =>
makeQuery(iq)
case Left(err) =>
err.pure[F]
/** Implements searching like this: it exploits the fact that teh
* fulltext index only contains valid items. When searching via
* sql the query expression selecting only valid items is added
* here.
*/
def search(
settings: Settings
)(q: Query, fulltextQuery: Option[String]): F[Items] = {
// 1. fulltext only if fulltextQuery.isDefined && q.isEmpty && useFTS
// 2. sql+fulltext if fulltextQuery.isDefined && q.nonEmpty && useFTS
// 3. sql-only else (if fulltextQuery.isEmpty || !useFTS)
val validItemQuery = q.withFix(_.andQuery(ItemQuery.Expr.ValidItemStates))
fulltextQuery match {
case Some(ftq) if settings.useFTS =>
if (q.isEmpty) {
logger.debug(s"Using index only search: $fulltextQuery")
fts
.findIndexOnly(settings.maxNoteLen)(
OFulltext.FtsInput(ftq),
q.fix.account,
settings.batch
)
.map(Items.ftsItemsFull(true))
} else if (settings.resolveDetails) {
logger.debug(
s"Using index+sql search with tags: $validItemQuery / $fulltextQuery"
)
fts
.findItemsWithTags(settings.maxNoteLen)(
validItemQuery,
OFulltext.FtsInput(ftq),
settings.batch
)
.map(Items.ftsItemsFull(false))
} else {
logger.debug(
s"Using index+sql search no tags: $validItemQuery / $fulltextQuery"
)
fts
.findItems(settings.maxNoteLen)(
validItemQuery,
OFulltext.FtsInput(ftq),
settings.batch
)
.map(Items.ftsItems(false))
}
case _ =>
if (settings.resolveDetails) {
logger.debug(
s"Using sql only search with tags: $validItemQuery / $fulltextQuery"
)
is.findItemsWithTags(settings.maxNoteLen)(validItemQuery, settings.batch)
.map(Items.itemsFull)
} else {
logger.debug(
s"Using sql only search no tags: $validItemQuery / $fulltextQuery"
)
is.findItems(settings.maxNoteLen)(validItemQuery, settings.batch)
.map(Items.itemsPlain)
}
}
}
@ -180,42 +274,5 @@ object OSimpleSearch {
case _ =>
is.findItemsSummary(q)
}
def search(settings: Settings)(q: Query, fulltextQuery: Option[String]): F[Items] =
// 1. fulltext only if fulltextQuery.isDefined && q.isEmpty && useFTS
// 2. sql+fulltext if fulltextQuery.isDefined && q.nonEmpty && useFTS
// 3. sql-only else (if fulltextQuery.isEmpty || !useFTS)
fulltextQuery match {
case Some(ftq) if settings.useFTS =>
if (q.isEmpty)
fts
.findIndexOnly(settings.maxNoteLen)(
OFulltext.FtsInput(ftq),
q.fix.account,
settings.batch
)
.map(Items.ftsItemsFull(true))
else if (settings.resolveDetails)
fts
.findItemsWithTags(settings.maxNoteLen)(
q,
OFulltext.FtsInput(ftq),
settings.batch
)
.map(Items.ftsItemsFull(false))
else
fts
.findItems(settings.maxNoteLen)(q, OFulltext.FtsInput(ftq), settings.batch)
.map(Items.ftsItems(false))
case _ =>
if (settings.resolveDetails)
is.findItemsWithTags(settings.maxNoteLen)(q, settings.batch)
.map(Items.itemsFull)
else
is.findItems(settings.maxNoteLen)(q, settings.batch)
.map(Items.itemsPlain)
}
}
}

View File

@ -1,6 +1,9 @@
package docspell.common
case class ItemQueryString(query: String)
final case class ItemQueryString(query: String) {
def isEmpty: Boolean =
query.isEmpty
}
object ItemQueryString {

View File

@ -14,21 +14,38 @@ import docspell.query.ItemQuery._
object FulltextExtract {
sealed trait Result
sealed trait SuccessResult extends Result
sealed trait SuccessResult extends Result {
def getFulltextPart: Option[String]
def getExprPart: Option[Expr]
}
sealed trait FailureResult extends Result
object Result {
case class Success(query: Expr, fts: Option[String]) extends SuccessResult
case object TooMany extends FailureResult
case object UnsupportedPosition extends FailureResult
final case class SuccessNoFulltext(query: Expr) extends SuccessResult {
val getExprPart = Some(query)
val getFulltextPart = None
}
final case class SuccessNoExpr(fts: String) extends SuccessResult {
val getExprPart = None
val getFulltextPart = Some(fts)
}
final case class SuccessBoth(query: Expr, fts: String) extends SuccessResult {
val getExprPart = Some(query)
val getFulltextPart = Some(fts)
}
final case object TooMany extends FailureResult
final case object UnsupportedPosition extends FailureResult
}
def findFulltext(expr: Expr): Result =
lookForFulltext(expr)
/** Extracts the fulltext node from the given expr and returns it
* together with the expr without that node.
*/
private def lookForFulltext(expr: Expr): Result =
expr match {
case Expr.Fulltext(ftq) =>
Result.Success(ItemQuery.all.expr, ftq.some)
Result.SuccessNoExpr(ftq)
case Expr.AndExpr(inner) =>
inner.collect({ case Expr.Fulltext(fq) => fq }) match {
case Nil =>
@ -36,7 +53,7 @@ object FulltextExtract {
case e :: Nil =>
val c = foldMap(isFulltextExpr)(expr)
if (c > 1) Result.TooMany
else Result.Success(expr, e.some)
else Result.SuccessBoth(expr, e)
case _ =>
Result.TooMany
}
@ -47,7 +64,7 @@ object FulltextExtract {
private def checkPosition(expr: Expr, max: Int): Result = {
val c = foldMap(isFulltextExpr)(expr)
if (c > max) Result.UnsupportedPosition
else Result.Success(expr, None)
else Result.SuccessNoFulltext(expr)
}
private def foldMap[B: Monoid](f: Expr => B)(expr: Expr): B =

View File

@ -17,7 +17,6 @@ final case class ItemQuery(expr: ItemQuery.Expr, raw: Option[String]) {
}
object ItemQuery {
val all = ItemQuery(Expr.Exists(Attr.ItemId), Some(""))
sealed trait Operator
object Operator {

View File

@ -1,12 +1,17 @@
package docspell.query
import cats.data.NonEmptyList
import docspell.query.internal.ExprParser
import docspell.query.internal.ExprUtil
object ItemQueryParser {
def parse(input: String): Either[ParseFailure, ItemQuery] =
if (input.isEmpty) Right(ItemQuery.all)
if (input.isEmpty)
Left(
ParseFailure("", 0, NonEmptyList.of(ParseFailure.SimpleMessage(0, "No input.")))
)
else {
val in = if (input.charAt(0) == '(') input else s"(& $input )"
ExprParser

View File

@ -1,7 +1,5 @@
package docspell.query
import cats.implicits._
import docspell.query.FulltextExtract.Result
import munit._
@ -16,38 +14,43 @@ class FulltextExtractTest extends FunSuite {
def assertFts(qstr: String, expect: Result) =
assertEquals(findFts(qstr), expect)
def assertFtsSuccess(qstr: String, expect: Option[String]) = {
def assertFtsSuccess(qstr: String, expect: String) = {
val q = ItemQueryParser.parseUnsafe(qstr)
assertEquals(findFts(qstr), Result.Success(q.expr, expect))
assertEquals(findFts(qstr), Result.SuccessBoth(q.expr, expect))
}
def assertNoFts(qstr: String) = {
val q = ItemQueryParser.parseUnsafe(qstr)
assertEquals(findFts(qstr), Result.SuccessNoFulltext(q.expr))
}
test("find fulltext as root") {
assertEquals(findFts("content:what"), Result.Success(ItemQuery.all.expr, "what".some))
assertEquals(findFts("content:what"), Result.SuccessNoExpr("what"))
assertEquals(
findFts("content:\"what hello\""),
Result.Success(ItemQuery.all.expr, "what hello".some)
Result.SuccessNoExpr("what hello")
)
assertEquals(
findFts("content:\"what OR hello\""),
Result.Success(ItemQuery.all.expr, "what OR hello".some)
Result.SuccessNoExpr("what OR hello")
)
assertEquals(
findFts("(& content:\"what OR hello\" )"),
Result.Success(ItemQuery.all.expr, "what OR hello".some)
Result.SuccessNoExpr("what OR hello")
)
}
test("find no fulltext") {
assertFtsSuccess("name:test", None)
assertNoFts("name:test")
}
test("find fulltext within and") {
assertFtsSuccess("content:what name:test", "what".some)
assertFtsSuccess("names:marc* content:what name:test", "what".some)
assertFtsSuccess("content:what name:test", "what")
assertFtsSuccess("names:marc* content:what name:test", "what")
assertFtsSuccess(
"names:marc* date:2021-02 content:\"what else\" name:test",
"what else".some
"what else"
)
}
@ -59,6 +62,6 @@ class FulltextExtractTest extends FunSuite {
test("wrong fulltext search position") {
assertFts("name:test (| date:2021-02 content:yes)", Result.UnsupportedPosition)
assertFtsSuccess("name:test (& date:2021-02 content:yes)", "yes".some)
assertFtsSuccess("name:test (& date:2021-02 content:yes)", "yes")
}
}

View File

@ -2,7 +2,6 @@ package docspell.query.internal
import cats.implicits._
import docspell.query.ItemQuery
import docspell.query.ItemQueryParser
import munit._
@ -39,9 +38,9 @@ class ItemQueryParserTest extends FunSuite {
assertEquals(expect, q)
}
test("return all if query is empty") {
val q = ItemQueryParser.parseUnsafe("")
assertEquals(ItemQuery.all, q)
test("throw if query is empty") {
val result = ItemQueryParser.parse("")
assert(result.isLeft)
}
test("splice inner and nodes") {

View File

@ -15,7 +15,6 @@ import docspell.common._
import docspell.common.syntax.all._
import docspell.query.FulltextExtract.Result.TooMany
import docspell.query.FulltextExtract.Result.UnsupportedPosition
import docspell.query.ItemQuery.Expr
import docspell.restapi.model._
import docspell.restserver.Config
import docspell.restserver.conv.Conversions
@ -62,12 +61,12 @@ object ItemRoutes {
detailFlag.getOrElse(false),
cfg.maxNoteLength
)
val fixQuery = Query.Fix(user.account, Some(Expr.ValidItemStates), None)
val fixQuery = Query.Fix(user.account, None, None)
searchItems(backend, dsl)(settings, fixQuery, itemQuery)
case GET -> Root / "searchStats" :? QP.Query(q) =>
val itemQuery = ItemQueryString(q)
val fixQuery = Query.Fix(user.account, Some(Expr.ValidItemStates), None)
val fixQuery = Query.Fix(user.account, None, None)
searchItemStats(backend, dsl)(cfg.fullTextSearch.enabled, fixQuery, itemQuery)
case req @ POST -> Root / "search" =>
@ -86,7 +85,7 @@ object ItemRoutes {
userQuery.withDetails.getOrElse(false),
cfg.maxNoteLength
)
fixQuery = Query.Fix(user.account, Some(Expr.ValidItemStates), None)
fixQuery = Query.Fix(user.account, None, None)
resp <- searchItems(backend, dsl)(settings, fixQuery, itemQuery)
} yield resp
@ -94,7 +93,7 @@ object ItemRoutes {
for {
userQuery <- req.as[ItemQuery]
itemQuery = ItemQueryString(userQuery.query)
fixQuery = Query.Fix(user.account, Some(Expr.ValidItemStates), None)
fixQuery = Query.Fix(user.account, None, None)
resp <- searchItemStats(backend, dsl)(
cfg.fullTextSearch.enabled,
fixQuery,

View File

@ -24,6 +24,9 @@ case class Query(fix: Query.Fix, cond: Query.QueryCond) {
object Query {
def apply(fix: Fix): Query =
Query(fix, QueryExpr(None))
case class Fix(
account: AccountId,
query: Option[ItemQuery.Expr],
@ -32,6 +35,9 @@ object Query {
def isEmpty: Boolean =
query.isEmpty
def andQuery(expr: ItemQuery.Expr): Fix =
copy(query = query.map(e => ItemQuery.Expr.and(e, expr)).orElse(Some(expr)))
}
sealed trait QueryCond {

View File

@ -176,7 +176,7 @@ render q =
"name" ++ attrMatch m ++ quoteStr str
AllNames str ->
"$names:" ++ quoteStr str
"names:" ++ quoteStr str
Contents str ->
"content:" ++ quoteStr str