Some basic tests and config

This commit is contained in:
eikek
2022-03-21 00:10:28 +01:00
parent 029335e607
commit fef00bdfb5
12 changed files with 385 additions and 41 deletions

View File

@ -8,7 +8,7 @@ trait DoobieMeta {
implicit val sqlLogging: LogHandler = LogHandler {
case e @ Success(_, _, _, _) =>
DoobieMeta.logger.trace("SQL " + e)
DoobieMeta.logger.debug("SQL " + e)
case e =>
DoobieMeta.logger.error(s"SQL Failure: $e")
}

View File

@ -5,7 +5,7 @@ import docspell.common.{Ident, Language}
import docspell.ftsclient.TextData
final case class FtsRecord(
id: String,
id: Ident,
itemId: Ident,
collective: Ident,
language: Language,
@ -30,7 +30,7 @@ object FtsRecord {
text
) =>
FtsRecord(
td.id.id,
td.id,
item,
collective,
language,
@ -43,7 +43,7 @@ object FtsRecord {
)
case TextData.Item(item, collective, folder, name, notes, language) =>
FtsRecord(
td.id.id,
td.id,
item,
collective,
language,

View File

@ -10,11 +10,13 @@ import fs2.Chunk
object FtsRepository extends DoobieMeta {
val table = fr"ftspsql_search"
def searchSummary(q: FtsQuery): ConnectionIO[SearchSummary] = {
val selectRank = mkSelectRank
val query = mkQueryPart(q)
def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
q: FtsQuery
): ConnectionIO[SearchSummary] = {
val selectRank = mkSelectRank(rn)
val query = mkQueryPart(pq, q)
sql"""select count(id), max($selectRank)
sql"""select count(id), coalesce(max($selectRank), 0)
|from $table, $query
|where ${mkCondition(q)} AND query @@ text_index
|""".stripMargin
@ -22,11 +24,11 @@ object FtsRepository extends DoobieMeta {
.unique
}
def search(
def search(pq: PgQueryParser, rn: RankNormalization)(
q: FtsQuery,
withHighlighting: Boolean
): ConnectionIO[Vector[SearchResult]] = {
val selectRank = mkSelectRank
val selectRank = mkSelectRank(rn)
val hlOption =
s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
@ -44,7 +46,7 @@ object FtsRepository extends DoobieMeta {
val select =
fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
val query = mkQueryPart(q)
val query = mkQueryPart(pq, q)
sql"""select $select
|from $table, $query
@ -74,16 +76,22 @@ object FtsRepository extends DoobieMeta {
List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
}
private def mkQueryPart(q: FtsQuery): Fragment =
fr"websearch_to_tsquery(fts_config, ${q.q}) query"
private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
val fname = Fragment.const(p.name)
fr"$fname(fts_config, ${q.q}) query"
}
private def mkSelectRank: Fragment =
fr"ts_rank_cd(text_index, query, 4)"
private def mkSelectRank(rn: RankNormalization): Fragment = {
val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
fr"ts_rank_cd(text_index, query, $bits)"
}
def replaceChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(replace).map(_.foldLeft(0)(_ + _))
def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
def replace(r: FtsRecord): ConnectionIO[Int] =
def replace(
pgConfig: Language => String
)(r: FtsRecord): ConnectionIO[Int] =
(fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
commas(
sql"${r.id}",
@ -107,7 +115,7 @@ object FtsRepository extends DoobieMeta {
sql"fts_config = ${pgConfig(r.language)}::regconfig"
)).update.run
def update(r: FtsRecord): ConnectionIO[Int] =
def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
(fr"UPDATE $table SET" ++ commas(
sql"lang = ${r.language}",
sql"folder_id = ${r.folderId}",
@ -118,8 +126,8 @@ object FtsRepository extends DoobieMeta {
sql"fts_config = ${pgConfig(r.language)}::regconfig"
) ++ fr"WHERE id = ${r.id}").update.run
def updateChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(update).map(_.foldLeft(0)(_ + _))
def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
def updateFolder(
itemId: Ident,
@ -154,7 +162,10 @@ object FtsRepository extends DoobieMeta {
private def commas(fr: Fragment, frn: Fragment*): Fragment =
frn.foldLeft(fr)(_ ++ fr"," ++ _)
def pgConfig(language: Language): String =
def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
select.applyOrElse(language, defaultPgConfig)
def defaultPgConfig(language: Language): String =
language match {
case Language.English => "english"
case Language.German => "german"
@ -163,7 +174,6 @@ object FtsRepository extends DoobieMeta {
case Language.Spanish => "spanish"
case Language.Hungarian => "hungarian"
case Language.Portuguese => "portuguese"
case Language.Czech => "simple" // ?
case Language.Danish => "danish"
case Language.Finnish => "finnish"
case Language.Norwegian => "norwegian"
@ -171,7 +181,8 @@ object FtsRepository extends DoobieMeta {
case Language.Russian => "russian"
case Language.Romanian => "romanian"
case Language.Dutch => "dutch"
case Language.Latvian => "lithuanian" // ?
case Language.Czech => "simple"
case Language.Latvian => "simple"
case Language.Japanese => "simple"
case Language.Hebrew => "simple"
}

View File

@ -0,0 +1,32 @@
package docspell.ftspsql
import cats.data.NonEmptyList
sealed trait PgQueryParser {
def name: String
}
object PgQueryParser {
case object ToTsQuery extends PgQueryParser {
val name = "to_tsquery"
}
case object Plain extends PgQueryParser {
val name = "plainto_tsquery"
}
case object Phrase extends PgQueryParser {
val name = "phraseto_tsquery"
}
case object Websearch extends PgQueryParser {
val name = "websearch_to_tsquery"
}
val all: NonEmptyList[PgQueryParser] =
NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
def fromName(name: String): Either[String, PgQueryParser] =
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
def unsafeFromName(name: String): PgQueryParser =
fromName(name).fold(sys.error, identity)
}

View File

@ -1,5 +1,25 @@
package docspell.ftspsql
import docspell.common.{LenientUri, Password}
import docspell.common._
case class PsqlConfig(url: LenientUri, user: String, password: Password)
final case class PsqlConfig(
url: LenientUri,
user: String,
password: Password,
pgConfigSelect: PartialFunction[Language, String],
pgQueryParser: PgQueryParser,
rankNormalization: RankNormalization
)
object PsqlConfig {
def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
PsqlConfig(
url,
user,
password,
PartialFunction.empty,
PgQueryParser.Websearch,
RankNormalization.Mhd && RankNormalization.Scale
)
}

View File

@ -17,6 +17,19 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
extends FtsClient[F] {
val engine = Ident.unsafe("postgres")
val config = cfg
private[ftspsql] val transactor = xa
private[this] val searchSummary =
FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
private[this] val search =
FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
private[this] val replaceChunk =
FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
private[this] val updateChunk =
FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
def initialize: F[List[FtsMigration[F]]] =
Sync[F].pure(
List(
@ -49,8 +62,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
def search(q: FtsQuery): F[FtsResult] =
for {
startNanos <- Sync[F].delay(System.nanoTime())
summary <- FtsRepository.searchSummary(q).transact(xa)
results <- FtsRepository.search(q, true).transact(xa)
summary <- searchSummary(q).transact(xa)
results <- search(q, true).transact(xa)
endNanos <- Sync[F].delay(System.nanoTime())
duration = Duration.nanos(endNanos - startNanos)
res = SearchResult
@ -63,9 +76,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
.map(FtsRecord.fromTextData)
.chunkN(50)
.evalMap(chunk =>
logger.debug(s"Update fts index with ${chunk.size} records") *> FtsRepository
.replaceChunk(chunk)
.transact(xa)
logger.debug(s"Add to fts index ${chunk.size} records") *>
replaceChunk(chunk).transact(xa)
)
.compile
.drain
@ -74,7 +86,10 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
data
.map(FtsRecord.fromTextData)
.chunkN(50)
.evalMap(chunk => FtsRepository.updateChunk(chunk).transact(xa))
.evalMap(chunk =>
logger.debug(s"Update fts index with ${chunk.size} records") *>
updateChunk(chunk).transact(xa)
)
.compile
.drain
@ -124,8 +139,9 @@ object PsqlFtsClient {
xa = HikariTransactor[F](ds, connectEC)
pc = new PsqlFtsClient[F](cfg, xa)
// _ <- Resource.eval(st.migrate)
} yield pc
}
def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
new PsqlFtsClient[F](cfg, xa)
}

View File

@ -0,0 +1,40 @@
package docspell.ftspsql
import cats.Order
import cats.data.NonEmptySet
sealed trait RankNormalization { self =>
def value: NonEmptySet[Int]
def &&(other: RankNormalization): RankNormalization =
new RankNormalization { val value = self.value ++ other.value }
}
object RankNormalization {
// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
def byNumber(n: Int): Either[String, RankNormalization] =
all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
implicit val order: Order[RankNormalization] =
Order.by(_.value.reduce)
val all: NonEmptySet[RankNormalization] =
NonEmptySet.of(
IgnoreDocLength,
LogDocLength,
DocLength,
Mhd,
UniqueWords,
LogUniqueWords,
Scale
)
}