Some basic tests and config

This commit is contained in:
eikek 2022-03-21 00:10:28 +01:00
parent 029335e607
commit fef00bdfb5
12 changed files with 385 additions and 41 deletions

View File

@ -8,7 +8,7 @@ trait DoobieMeta {
implicit val sqlLogging: LogHandler = LogHandler {
case e @ Success(_, _, _, _) =>
DoobieMeta.logger.trace("SQL " + e)
DoobieMeta.logger.debug("SQL " + e)
case e =>
DoobieMeta.logger.error(s"SQL Failure: $e")
}

View File

@ -5,7 +5,7 @@ import docspell.common.{Ident, Language}
import docspell.ftsclient.TextData
final case class FtsRecord(
id: String,
id: Ident,
itemId: Ident,
collective: Ident,
language: Language,
@ -30,7 +30,7 @@ object FtsRecord {
text
) =>
FtsRecord(
td.id.id,
td.id,
item,
collective,
language,
@ -43,7 +43,7 @@ object FtsRecord {
)
case TextData.Item(item, collective, folder, name, notes, language) =>
FtsRecord(
td.id.id,
td.id,
item,
collective,
language,

View File

@ -10,11 +10,13 @@ import fs2.Chunk
object FtsRepository extends DoobieMeta {
val table = fr"ftspsql_search"
def searchSummary(q: FtsQuery): ConnectionIO[SearchSummary] = {
val selectRank = mkSelectRank
val query = mkQueryPart(q)
def searchSummary(pq: PgQueryParser, rn: RankNormalization)(
q: FtsQuery
): ConnectionIO[SearchSummary] = {
val selectRank = mkSelectRank(rn)
val query = mkQueryPart(pq, q)
sql"""select count(id), max($selectRank)
sql"""select count(id), coalesce(max($selectRank), 0)
|from $table, $query
|where ${mkCondition(q)} AND query @@ text_index
|""".stripMargin
@ -22,11 +24,11 @@ object FtsRepository extends DoobieMeta {
.unique
}
def search(
def search(pq: PgQueryParser, rn: RankNormalization)(
q: FtsQuery,
withHighlighting: Boolean
): ConnectionIO[Vector[SearchResult]] = {
val selectRank = mkSelectRank
val selectRank = mkSelectRank(rn)
val hlOption =
s"startsel=${q.highlight.pre},stopsel=${q.highlight.post}"
@ -44,7 +46,7 @@ object FtsRepository extends DoobieMeta {
val select =
fr"id, item_id, collective, lang, attach_id, folder_id, attach_name, item_name, $selectRank as rank, $selectHl"
val query = mkQueryPart(q)
val query = mkQueryPart(pq, q)
sql"""select $select
|from $table, $query
@ -74,16 +76,22 @@ object FtsRepository extends DoobieMeta {
List(items, folders).flatten.foldLeft(coll)(_ ++ fr"AND" ++ _)
}
private def mkQueryPart(q: FtsQuery): Fragment =
fr"websearch_to_tsquery(fts_config, ${q.q}) query"
private def mkQueryPart(p: PgQueryParser, q: FtsQuery): Fragment = {
val fname = Fragment.const(p.name)
fr"$fname(fts_config, ${q.q}) query"
}
private def mkSelectRank: Fragment =
fr"ts_rank_cd(text_index, query, 4)"
private def mkSelectRank(rn: RankNormalization): Fragment = {
val bits = rn.value.toNonEmptyList.map(n => sql"$n").reduceLeft(_ ++ sql"|" ++ _)
fr"ts_rank_cd(text_index, query, $bits)"
}
def replaceChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(replace).map(_.foldLeft(0)(_ + _))
def replaceChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(replace(pgConfig)).map(_.foldLeft(0)(_ + _))
def replace(r: FtsRecord): ConnectionIO[Int] =
def replace(
pgConfig: Language => String
)(r: FtsRecord): ConnectionIO[Int] =
(fr"INSERT INTO $table (id,item_id,collective,lang,attach_id,folder_id,attach_name,attach_content,item_name,item_notes,fts_config) VALUES (" ++
commas(
sql"${r.id}",
@ -107,7 +115,7 @@ object FtsRepository extends DoobieMeta {
sql"fts_config = ${pgConfig(r.language)}::regconfig"
)).update.run
def update(r: FtsRecord): ConnectionIO[Int] =
def update(pgConfig: Language => String)(r: FtsRecord): ConnectionIO[Int] =
(fr"UPDATE $table SET" ++ commas(
sql"lang = ${r.language}",
sql"folder_id = ${r.folderId}",
@ -118,8 +126,8 @@ object FtsRepository extends DoobieMeta {
sql"fts_config = ${pgConfig(r.language)}::regconfig"
) ++ fr"WHERE id = ${r.id}").update.run
def updateChunk(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(update).map(_.foldLeft(0)(_ + _))
def updateChunk(pgConfig: Language => String)(r: Chunk[FtsRecord]): ConnectionIO[Int] =
r.traverse(update(pgConfig)).map(_.foldLeft(0)(_ + _))
def updateFolder(
itemId: Ident,
@ -154,7 +162,10 @@ object FtsRepository extends DoobieMeta {
private def commas(fr: Fragment, frn: Fragment*): Fragment =
frn.foldLeft(fr)(_ ++ fr"," ++ _)
def pgConfig(language: Language): String =
def getPgConfig(select: PartialFunction[Language, String])(language: Language): String =
select.applyOrElse(language, defaultPgConfig)
def defaultPgConfig(language: Language): String =
language match {
case Language.English => "english"
case Language.German => "german"
@ -163,7 +174,6 @@ object FtsRepository extends DoobieMeta {
case Language.Spanish => "spanish"
case Language.Hungarian => "hungarian"
case Language.Portuguese => "portuguese"
case Language.Czech => "simple" // ?
case Language.Danish => "danish"
case Language.Finnish => "finnish"
case Language.Norwegian => "norwegian"
@ -171,7 +181,8 @@ object FtsRepository extends DoobieMeta {
case Language.Russian => "russian"
case Language.Romanian => "romanian"
case Language.Dutch => "dutch"
case Language.Latvian => "lithuanian" // ?
case Language.Czech => "simple"
case Language.Latvian => "simple"
case Language.Japanese => "simple"
case Language.Hebrew => "simple"
}

View File

@ -0,0 +1,32 @@
package docspell.ftspsql
import cats.data.NonEmptyList
sealed trait PgQueryParser {
def name: String
}
object PgQueryParser {
case object ToTsQuery extends PgQueryParser {
val name = "to_tsquery"
}
case object Plain extends PgQueryParser {
val name = "plainto_tsquery"
}
case object Phrase extends PgQueryParser {
val name = "phraseto_tsquery"
}
case object Websearch extends PgQueryParser {
val name = "websearch_to_tsquery"
}
val all: NonEmptyList[PgQueryParser] =
NonEmptyList.of(ToTsQuery, Plain, Phrase, Websearch)
def fromName(name: String): Either[String, PgQueryParser] =
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown pg query parser: $name")
def unsafeFromName(name: String): PgQueryParser =
fromName(name).fold(sys.error, identity)
}

View File

@ -1,5 +1,25 @@
package docspell.ftspsql
import docspell.common.{LenientUri, Password}
import docspell.common._
case class PsqlConfig(url: LenientUri, user: String, password: Password)
final case class PsqlConfig(
url: LenientUri,
user: String,
password: Password,
pgConfigSelect: PartialFunction[Language, String],
pgQueryParser: PgQueryParser,
rankNormalization: RankNormalization
)
object PsqlConfig {
def defaults(url: LenientUri, user: String, password: Password): PsqlConfig =
PsqlConfig(
url,
user,
password,
PartialFunction.empty,
PgQueryParser.Websearch,
RankNormalization.Mhd && RankNormalization.Scale
)
}

View File

@ -17,6 +17,19 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
extends FtsClient[F] {
val engine = Ident.unsafe("postgres")
val config = cfg
private[ftspsql] val transactor = xa
private[this] val searchSummary =
FtsRepository.searchSummary(cfg.pgQueryParser, cfg.rankNormalization) _
private[this] val search =
FtsRepository.search(cfg.pgQueryParser, cfg.rankNormalization) _
private[this] val replaceChunk =
FtsRepository.replaceChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
private[this] val updateChunk =
FtsRepository.updateChunk(FtsRepository.getPgConfig(cfg.pgConfigSelect)) _
def initialize: F[List[FtsMigration[F]]] =
Sync[F].pure(
List(
@ -49,8 +62,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
def search(q: FtsQuery): F[FtsResult] =
for {
startNanos <- Sync[F].delay(System.nanoTime())
summary <- FtsRepository.searchSummary(q).transact(xa)
results <- FtsRepository.search(q, true).transact(xa)
summary <- searchSummary(q).transact(xa)
results <- search(q, true).transact(xa)
endNanos <- Sync[F].delay(System.nanoTime())
duration = Duration.nanos(endNanos - startNanos)
res = SearchResult
@ -63,9 +76,8 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
.map(FtsRecord.fromTextData)
.chunkN(50)
.evalMap(chunk =>
logger.debug(s"Update fts index with ${chunk.size} records") *> FtsRepository
.replaceChunk(chunk)
.transact(xa)
logger.debug(s"Add to fts index ${chunk.size} records") *>
replaceChunk(chunk).transact(xa)
)
.compile
.drain
@ -74,7 +86,10 @@ final class PsqlFtsClient[F[_]: Sync](cfg: PsqlConfig, xa: Transactor[F])
data
.map(FtsRecord.fromTextData)
.chunkN(50)
.evalMap(chunk => FtsRepository.updateChunk(chunk).transact(xa))
.evalMap(chunk =>
logger.debug(s"Update fts index with ${chunk.size} records") *>
updateChunk(chunk).transact(xa)
)
.compile
.drain
@ -124,8 +139,9 @@ object PsqlFtsClient {
xa = HikariTransactor[F](ds, connectEC)
pc = new PsqlFtsClient[F](cfg, xa)
// _ <- Resource.eval(st.migrate)
} yield pc
}
def fromTransactor[F[_]: Async](cfg: PsqlConfig, xa: Transactor[F]): PsqlFtsClient[F] =
new PsqlFtsClient[F](cfg, xa)
}

View File

@ -0,0 +1,40 @@
package docspell.ftspsql
import cats.Order
import cats.data.NonEmptySet
sealed trait RankNormalization { self =>
def value: NonEmptySet[Int]
def &&(other: RankNormalization): RankNormalization =
new RankNormalization { val value = self.value ++ other.value }
}
object RankNormalization {
// see https://www.postgresql.org/docs/14/textsearch-controls.html#TEXTSEARCH-RANKING
case object IgnoreDocLength extends RankNormalization { val value = NonEmptySet.one(0) }
case object LogDocLength extends RankNormalization { val value = NonEmptySet.one(1) }
case object DocLength extends RankNormalization { val value = NonEmptySet.one(2) }
case object Mhd extends RankNormalization { val value = NonEmptySet.one(4) }
case object UniqueWords extends RankNormalization { val value = NonEmptySet.one(8) }
case object LogUniqueWords extends RankNormalization { val value = NonEmptySet.one(16) }
case object Scale extends RankNormalization { val value = NonEmptySet.one(32) }
def byNumber(n: Int): Either[String, RankNormalization] =
all.find(_.value.contains(n)).toRight(s"Unknown rank normalization number: $n")
implicit val order: Order[RankNormalization] =
Order.by(_.value.reduce)
val all: NonEmptySet[RankNormalization] =
NonEmptySet.of(
IgnoreDocLength,
LogDocLength,
DocLength,
Mhd,
UniqueWords,
LogUniqueWords,
Scale
)
}

View File

@ -1,17 +1,20 @@
package docspell.ftspsql
import cats.effect._
import cats.effect.unsafe.implicits._
import docspell.logging.{Level, LogConfig}
//import cats.implicits._
import munit.CatsEffectSuite
import com.dimafeng.testcontainers.PostgreSQLContainer
import com.dimafeng.testcontainers.munit.TestContainerForAll
import docspell.common._
import docspell.logging.TestLoggingConfig
import munit.FunSuite
import org.testcontainers.utility.DockerImageName
import doobie.implicits._
class MigrationTest extends FunSuite with TestContainerForAll with TestLoggingConfig {
class MigrationTest
extends CatsEffectSuite
with PgFixtures
with TestContainerForAll
with TestLoggingConfig {
override val containerDef: PostgreSQLContainer.Def =
PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
@ -23,9 +26,19 @@ class MigrationTest extends FunSuite with TestContainerForAll with TestLoggingCo
test("create schema") {
withContainers { cnt =>
val jdbc =
PsqlConfig(LenientUri.unsafe(cnt.jdbcUrl), cnt.username, Password(cnt.password))
PsqlConfig.defaults(
LenientUri.unsafe(cnt.jdbcUrl),
cnt.username,
Password(cnt.password)
)
new DbMigration[IO](jdbc).run.void.unsafeRunSync()
for {
_ <- DbMigration[IO](jdbc).run
n <- runQuery(cnt)(
sql"SELECT count(*) FROM ${FtsRepository.table}".query[Int].unique
)
_ = assertEquals(n, 0)
} yield ()
}
}
}

View File

@ -0,0 +1,69 @@
package docspell.ftspsql
import cats.syntax.all._
import com.dimafeng.testcontainers.PostgreSQLContainer
import docspell.common._
import docspell.store.{JdbcConfig, StoreFixture}
import doobie._
import doobie.implicits._
import cats.effect._
import docspell.ftsclient.TextData
import javax.sql.DataSource
trait PgFixtures {
def ident(n: String): Ident = Ident.unsafe(n)
def psqlConfig(cnt: PostgreSQLContainer): PsqlConfig =
PsqlConfig.defaults(
LenientUri.unsafe(cnt.jdbcUrl),
cnt.username,
Password(cnt.password)
)
def jdbcConfig(cnt: PostgreSQLContainer): JdbcConfig =
JdbcConfig(LenientUri.unsafe(cnt.jdbcUrl), cnt.username, cnt.password)
def dataSource(cnt: PostgreSQLContainer): Resource[IO, DataSource] =
StoreFixture.dataSource(jdbcConfig(cnt))
def transactor(cnt: PostgreSQLContainer): Resource[IO, Transactor[IO]] =
dataSource(cnt).flatMap(StoreFixture.makeXA)
def psqlFtsClient(cnt: PostgreSQLContainer): Resource[IO, PsqlFtsClient[IO]] =
transactor(cnt)
.map(xa => PsqlFtsClient.fromTransactor(psqlConfig(cnt), xa))
.evalTap(client => DbMigration[IO](client.config).run)
def runQuery[A](cnt: PostgreSQLContainer)(q: ConnectionIO[A]): IO[A] =
transactor(cnt).use(q.transact(_))
implicit class QueryOps[A](self: ConnectionIO[A]) {
def exec(implicit client: PsqlFtsClient[IO]): IO[A] =
self.transact(client.transactor)
}
val collective1 = ident("coll1")
val collective2 = ident("coll2")
val itemData: TextData.Item =
TextData.Item(
item = ident("item-id-1"),
collective = collective1,
folder = None,
name = "mydoc.pdf".some,
notes = Some("my notes are these"),
language = Language.English
)
val attachData: TextData.Attachment =
TextData.Attachment(
item = ident("item-id-1"),
attachId = ident("attach-id-1"),
collective = collective1,
folder = None,
language = Language.English,
name = "mydoc.pdf".some,
text = "lorem ipsum dolores est".some
)
}

View File

@ -0,0 +1,143 @@
package docspell.ftspsql
import cats.syntax.all._
import com.dimafeng.testcontainers.PostgreSQLContainer
import com.dimafeng.testcontainers.munit.TestContainerForAll
import docspell.logging.{Level, LogConfig, TestLoggingConfig}
import munit.CatsEffectSuite
import org.testcontainers.utility.DockerImageName
import cats.effect._
import docspell.ftsclient.{FtsQuery, TextData}
import doobie.implicits._
class PsqlFtsClientTest
extends CatsEffectSuite
with PgFixtures
with TestContainerForAll
with TestLoggingConfig {
override val containerDef: PostgreSQLContainer.Def =
PostgreSQLContainer.Def(DockerImageName.parse("postgres:14"))
val logger = docspell.logging.getLogger[IO]
private val table = FtsRepository.table
override def docspellLogConfig: LogConfig =
LogConfig(Level.Debug, LogConfig.Format.Fancy)
override def rootMinimumLevel = Level.Warn
test("insert data into index") {
withContainers { cnt =>
psqlFtsClient(cnt).use { implicit client =>
def assertions(id: TextData.Item, ad: TextData.Attachment) =
for {
n <- sql"SELECT count(*) from $table".query[Int].unique.exec
_ = assertEquals(n, 2)
itemStored <-
sql"select item_name, item_notes from $table WHERE id = ${id.id}"
.query[(Option[String], Option[String])]
.unique
.exec
_ = assertEquals(itemStored, (id.name, id.notes))
attachStored <-
sql"select attach_name, attach_content from $table where id = ${ad.id}"
.query[(Option[String], Option[String])]
.unique
.exec
_ = assertEquals(attachStored, (ad.name, ad.text))
} yield ()
for {
_ <- client.indexData(logger, itemData, attachData)
_ <- assertions(itemData, attachData)
_ <- client.indexData(logger, itemData, attachData)
_ <- assertions(itemData, attachData)
_ <- client.indexData(
logger,
itemData.copy(notes = None),
attachData.copy(name = "ha.pdf".some)
)
_ <- assertions(
itemData.copy(notes = None),
attachData.copy(name = "ha.pdf".some)
)
} yield ()
}
}
}
test("clear index") {
withContainers { cnt =>
psqlFtsClient(cnt).use { implicit client =>
for {
_ <- client.indexData(logger, itemData, attachData)
_ <- client.clearAll(logger)
n <- sql"select count(*) from $table".query[Int].unique.exec
_ = assertEquals(n, 0)
} yield ()
}
}
}
test("clear index by collective") {
withContainers { cnt =>
psqlFtsClient(cnt).use { implicit client =>
for {
_ <- client.indexData(
logger,
itemData,
attachData,
itemData.copy(collective = collective2, item = ident("item-id-2")),
attachData.copy(collective = collective2, item = ident("item-id-2"))
)
n <- sql"select count(*) from $table".query[Int].unique.exec
_ = assertEquals(n, 4)
_ <- client.clear(logger, collective1)
n <- sql"select count(*) from $table".query[Int].unique.exec
_ = assertEquals(n, 2)
} yield ()
}
}
}
test("search by query") {
def query(s: String): FtsQuery =
FtsQuery(
q = s,
collective = collective1,
items = Set.empty,
folders = Set.empty,
limit = 10,
offset = 0,
highlight = FtsQuery.HighlightSetting.default
)
withContainers { cnt =>
psqlFtsClient(cnt).use { implicit client =>
for {
_ <- client.indexData(
logger,
itemData,
attachData,
itemData.copy(collective = collective2, item = ident("item-id-2")),
attachData.copy(collective = collective2, item = ident("item-id-2"))
)
res0 <- client.search(query("lorem uiaeduiae"))
_ = assertEquals(res0.count, 0)
res1 <- client.search(query("lorem"))
_ = assertEquals(res1.count, 1)
_ = assertEquals(res1.results.head.id, attachData.id)
res2 <- client.search(query("note"))
_ = assertEquals(res2.count, 1)
_ = assertEquals(res2.results.head.id, itemData.id)
} yield ()
}
}
}
}

View File

@ -256,7 +256,7 @@ object JoexTasks {
if (cfg.fullTextSearch.enabled)
Resource.pure[F, FtsClient[F]](
new PsqlFtsClient[F](
PsqlConfig(cfg.jdbc.url, cfg.jdbc.user, Password(cfg.jdbc.password)),
PsqlConfig.defaults(cfg.jdbc.url, cfg.jdbc.user, Password(cfg.jdbc.password)),
store.transactor
)
)

View File

@ -195,7 +195,7 @@ object RestAppImpl {
if (cfg.fullTextSearch.enabled)
Resource.pure[F, FtsClient[F]](
new PsqlFtsClient[F](
PsqlConfig(
PsqlConfig.defaults(
cfg.backend.jdbc.url,
cfg.backend.jdbc.user,
Password(cfg.backend.jdbc.password)