mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Some basic tests and config
This commit is contained in:
@ -8,7 +8,7 @@ trait DoobieMeta {
|
||||
|
||||
implicit val sqlLogging: LogHandler = LogHandler {
|
||||
case e @ Success(_, _, _, _) =>
|
||||
DoobieMeta.logger.trace("SQL " + e)
|
||||
DoobieMeta.logger.debug("SQL " + e)
|
||||
case e =>
|
||||
DoobieMeta.logger.error(s"SQL Failure: $e")
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ import docspell.common.{Ident, Language}
|
||||
import docspell.ftsclient.TextData
|
||||
|
||||
final case class FtsRecord(
|
||||
id: String,
|
||||
id: Ident,
|
||||
itemId: Ident,
|
||||
collective: Ident,
|
||||
language: Language,
|
||||
@ -30,7 +30,7 @@ object FtsRecord {
|
||||
text
|
||||
) =>
|
||||
FtsRecord(
|
||||
td.id.id,
|
||||
td.id,
|
||||
item,
|
||||
collective,
|
||||
language,
|
||||
@ -43,7 +43,7 @@ object FtsRecord {
|
||||
)
|
||||
case TextData.Item(item, collective, folder, name, notes, language) =>
|
||||
FtsRecord(
|
||||
td.id.id,
|
||||
td.id,
|
||||
item,
|
||||
collective,
|
||||
language,
|
||||
|
@ -10,11 +10,13 @@ import fs2.Chunk
|
||||
object FtsRepository extends DoobieMeta {
|
||||
val table = fr"ftspsql_search"
|
||||
|
||||
def searchSummary(q: FtsQuery): ConnectionIO[SearchSummary] = {
|
||||
val selectRank = mkSelectRank
|
||||
val query = mkQueryPart(q)
|
||||
def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
|
||||
q: FtsQuery
|
||||
): ConnectionIO[SearchSummary] = {
|
||||
val selectRank = mkSelectRank(rn)
|
||||
val query = mkQueryPart(pq, q)
|
||||
|
||||
sql"""select count(id), max($selectRank)
|
||||
sql"""select count(id), coalesce(max($selectRank), 0)
|
||||
|from $table, $query
|
||||
|where ${mkCondition(q)} AND query @@ text_index
|
||||
|""".stripMargin
|
||||
@ -22,11 +24,11 @@ object FtsRepository extends DoobieMeta {
|
||||
.unique
|
||||
}
|
||||
|
||||
def search(
|
||||
def search(pq: PgQueryParser, rn: RankNormalization)(
|
||||
q: FtsQuery,
|
||||
withHighlighting: Boolean
|
||||
): ConnectionIO[Vector[SearchResult]] = {
|
||||
val selectRank = mkSelectRank
|
||||
val selectRank = mkSelectRank(rn)
|
||||
|
||||
val hlOption =
|
||||
s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
|
||||
@ -44,7 +46,7 @@ object FtsRepository extends DoobieMeta {
|
||||
val select =
|
||||
fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
|
||||
|
||||
val query = mkQueryPart(q)
|
||||
val query = mkQueryPart(pq, q)
|
||||
|
||||
sql"""select $select
|
||||
|from $table, $query
|
||||
@ -74,16 +76,22 @@ object FtsRepository extends DoobieMeta {
|
||||
List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
|
||||
}
|
||||
|
||||
private def mkQueryPart(q: FtsQuery): Fragment =
|
||||
fr"websearch_to_tsquery(fts_config, ${q.q}) query"
|
||||
private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
|
||||
val fname = Fragment.const(p.name)
|
||||
fr"$fname(fts_config, ${q.q}) query"
|
||||
}
|
||||
|
||||
private def mkSelectRank: Fragment =
|
||||
fr"ts_rank_cd(text_index, query, 4)"
|
||||
private def mkSelectRank(rn: RankNormalization): Fragment = {
|
||||
val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
|
||||
fr"ts_rank_cd(text_index, query, $bits)"
|
||||
}
|
||||
|
||||
def replaceChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||
r.traverse(replace).map(_.foldLeft(0)(_ + _))
|
||||
def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||
r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
|
||||
|
||||
def replace(r: FtsRecord): ConnectionIO[Int] =
|
||||
def replace(
|
||||
pgConfig: Language => String
|
||||
)(r: FtsRecord): ConnectionIO[Int] =
|
||||
(fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
|
||||
commas(
|
||||
sql"${r.id}",
|
||||
@ -107,7 +115,7 @@ object FtsRepository extends DoobieMeta {
|
||||
sql"fts_config = ${pgConfig(r.language)}::regconfig"
|
||||
)).update.run
|
||||
|
||||
def update(r: FtsRecord): ConnectionIO[Int] =
|
||||
def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
|
||||
(fr"UPDATE $table SET" ++ commas(
|
||||
sql"lang = ${r.language}",
|
||||
sql"folder_id = ${r.folderId}",
|
||||
@ -118,8 +126,8 @@ object FtsRepository extends DoobieMeta {
|
||||
sql"fts_config = ${pgConfig(r.language)}::regconfig"
|
||||
) ++ fr"WHERE id = ${r.id}").update.run
|
||||
|
||||
def updateChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||
r.traverse(update).map(_.foldLeft(0)(_ + _))
|
||||
def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
|
||||
r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
|
||||
|
||||
def updateFolder(
|
||||
itemId: Ident,
|
||||
@ -154,7 +162,10 @@ object FtsRepository extends DoobieMeta {
|
||||
private def commas(fr: Fragment, frn: Fragment*): Fragment =
|
||||
frn.foldLeft(fr)(_ ++ fr"," ++ _)
|
||||
|
||||
def pgConfig(language: Language): String =
|
||||
def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
|
||||
select.applyOrElse(language, defaultPgConfig)
|
||||
|
||||
def defaultPgConfig(language: Language): String =
|
||||
language match {
|
||||
case Language.English => "english"
|
||||
case Language.German => "german"
|
||||
@ -163,7 +174,6 @@ object FtsRepository extends DoobieMeta {
|
||||
case Language.Spanish => "spanish"
|
||||
case Language.Hungarian => "hungarian"
|
||||
case Language.Portuguese => "portuguese"
|
||||
case Language.Czech => "simple" // ?
|
||||
case Language.Danish => "danish"
|
||||
case Language.Finnish => "finnish"
|
||||
case Language.Norwegian => "norwegian"
|
||||
@ -171,7 +181,8 @@ object FtsRepository extends DoobieMeta {
|
||||
case Language.Russian => "russian"
|
||||
case Language.Romanian => "romanian"
|
||||
case Language.Dutch => "dutch"
|
||||
case Language.Latvian => "lithuanian" // ?
|
||||
case Language.Czech => "simple"
|
||||
case Language.Latvian => "simple"
|
||||
case Language.Japanese => "simple"
|
||||
case Language.Hebrew => "simple"
|
||||
}
|
||||
|
@ -0,0 +1,32 @@
|
||||
package docspell.ftspsql
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
|
||||
sealed trait PgQueryParser {
|
||||
def name: String
|
||||
}
|
||||
|
||||
object PgQueryParser {
|
||||
|
||||
case object ToTsQuery extends PgQueryParser {
|
||||
val name = "to_tsquery"
|
||||
}
|
||||
case object Plain extends PgQueryParser {
|
||||
val name = "plainto_tsquery"
|
||||
}
|
||||
case object Phrase extends PgQueryParser {
|
||||
val name = "phraseto_tsquery"
|
||||
}
|
||||
case object Websearch extends PgQueryParser {
|
||||
val name = "websearch_to_tsquery"
|
||||
}
|
||||
|
||||
val all: NonEmptyList[PgQueryParser] =
|
||||
NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
|
||||
|
||||
def fromName(name: String): Either[String, PgQueryParser] =
|
||||
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
|
||||
|
||||
def unsafeFromName(name: String): PgQueryParser =
|
||||
fromName(name).fold(sys.error, identity)
|
||||
}
|
@ -1,5 +1,25 @@
|
||||
package docspell.ftspsql
|
||||
|
||||
import docspell.common.{LenientUri, Password}
|
||||
import docspell.common._
|
||||
|
||||
case class PsqlConfig(url: LenientUri, user: String, password: Password)
|
||||
final case class PsqlConfig(
|
||||
url: LenientUri,
|
||||
user: String,
|
||||
password: Password,
|
||||
pgConfigSelect: PartialFunction[Language, String],
|
||||
pgQueryParser: PgQueryParser,
|
||||
rankNormalization: RankNormalization
|
||||
)
|
||||
|
||||
object PsqlConfig {
|
||||
|
||||
def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
|
||||
PsqlConfig(
|
||||
url,
|
||||
user,
|
||||
password,
|
||||
PartialFunction.empty,
|
||||
PgQueryParser.Websearch,
|
||||
RankNormalization.Mhd && RankNormalization.Scale
|
||||
)
|
||||
}
|
||||
|
@ -17,6 +17,19 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
|
||||
extends FtsClient[F] {
|
||||
val engine = Ident.unsafe("postgres")
|
||||
|
||||
val config = cfg
|
||||
private[ftspsql] val transactor = xa
|
||||
|
||||
private[this] val searchSummary =
|
||||
FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
|
||||
private[this] val search =
|
||||
FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
|
||||
|
||||
private[this] val replaceChunk =
|
||||
FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
|
||||
private[this] val updateChunk =
|
||||
FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
|
||||
|
||||
def initialize: F[List[FtsMigration[F]]] =
|
||||
Sync[F].pure(
|
||||
List(
|
||||
@ -49,8 +62,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
|
||||
def search(q: FtsQuery): F[FtsResult] =
|
||||
for {
|
||||
startNanos <- Sync[F].delay(System.nanoTime())
|
||||
summary <- FtsRepository.searchSummary(q).transact(xa)
|
||||
results <- FtsRepository.search(q, true).transact(xa)
|
||||
summary <- searchSummary(q).transact(xa)
|
||||
results <- search(q, true).transact(xa)
|
||||
endNanos <- Sync[F].delay(System.nanoTime())
|
||||
duration = Duration.nanos(endNanos - startNanos)
|
||||
res = SearchResult
|
||||
@ -63,9 +76,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
|
||||
.map(FtsRecord.fromTextData)
|
||||
.chunkN(50)
|
||||
.evalMap(chunk =>
|
||||
logger.debug(s"Update fts index with ${chunk.size} records") *> FtsRepository
|
||||
.replaceChunk(chunk)
|
||||
.transact(xa)
|
||||
logger.debug(s"Add to fts index ${chunk.size} records") *>
|
||||
replaceChunk(chunk).transact(xa)
|
||||
)
|
||||
.compile
|
||||
.drain
|
||||
@ -74,7 +86,10 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
|
||||
data
|
||||
.map(FtsRecord.fromTextData)
|
||||
.chunkN(50)
|
||||
.evalMap(chunk => FtsRepository.updateChunk(chunk).transact(xa))
|
||||
.evalMap(chunk =>
|
||||
logger.debug(s"Update fts index with ${chunk.size} records") *>
|
||||
updateChunk(chunk).transact(xa)
|
||||
)
|
||||
.compile
|
||||
.drain
|
||||
|
||||
@ -124,8 +139,9 @@ object PsqlFtsClient {
|
||||
xa = HikariTransactor[F](ds, connectEC)
|
||||
|
||||
pc = new PsqlFtsClient[F](cfg, xa)
|
||||
// _ <- Resource.eval(st.migrate)
|
||||
} yield pc
|
||||
}
|
||||
|
||||
def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
|
||||
new PsqlFtsClient[F](cfg, xa)
|
||||
}
|
||||
|
@ -0,0 +1,40 @@
|
||||
package docspell.ftspsql
|
||||
|
||||
import cats.Order
|
||||
import cats.data.NonEmptySet
|
||||
|
||||
sealed trait RankNormalization { self =>
|
||||
def value: NonEmptySet[Int]
|
||||
|
||||
def &&(other: RankNormalization): RankNormalization =
|
||||
new RankNormalization { val value = self.value ++ other.value }
|
||||
}
|
||||
|
||||
object RankNormalization {
|
||||
// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
|
||||
|
||||
case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
|
||||
case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
|
||||
case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
|
||||
case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
|
||||
case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
|
||||
case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
|
||||
case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
|
||||
|
||||
def byNumber(n: Int): Either[String, RankNormalization] =
|
||||
all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
|
||||
|
||||
implicit val order: Order[RankNormalization] =
|
||||
Order.by(_.value.reduce)
|
||||
|
||||
val all: NonEmptySet[RankNormalization] =
|
||||
NonEmptySet.of(
|
||||
IgnoreDocLength,
|
||||
LogDocLength,
|
||||
DocLength,
|
||||
Mhd,
|
||||
UniqueWords,
|
||||
LogUniqueWords,
|
||||
Scale
|
||||
)
|
||||
}
|
Reference in New Issue
Block a user