Introducing fts client into codebase

This commit is contained in:
Eike Kettner
2020-06-17 00:24:23 +02:00
parent ee801745a7
commit 522daaf57e
21 changed files with 327 additions and 73 deletions

View File

@ -1,12 +1,16 @@
package docspell.backend
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
import org.http4s.client.blaze.BlazeClientBuilder
import docspell.backend.auth.Login
import docspell.backend.ops._
import docspell.backend.signup.OSignup
import docspell.joexapi.client.JoexClient
import docspell.store.Store
import docspell.store.queue.JobQueue
import docspell.store.usertask.UserTaskStore
import docspell.ftssolr.SolrFtsClient
import scala.concurrent.ExecutionContext
import emil.javamail.{JavaMailEmil, Settings}
@ -25,6 +29,7 @@ trait BackendApp[F[_]] {
def job: OJob[F]
def item: OItem[F]
def itemSearch: OItemSearch[F]
def fulltext: OFulltext[F]
def mail: OMail[F]
def joex: OJoex[F]
def userTask: OUserTask[F]
@ -39,6 +44,7 @@ object BackendApp {
blocker: Blocker
): Resource[F, BackendApp[F]] =
for {
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
utStore <- UserTaskStore(store)
queue <- JobQueue(store)
loginImpl <- Login[F](store)
@ -48,12 +54,14 @@ object BackendApp {
tagImpl <- OTag[F](store)
equipImpl <- OEquipment[F](store)
orgImpl <- OOrganization(store)
joexImpl <- OJoex.create(httpClientEc, store)
joexImpl <- OJoex(JoexClient(httpClient), store)
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
nodeImpl <- ONode(store)
jobImpl <- OJob(store, joexImpl)
itemImpl <- OItem(store)
itemSearchImpl <- OItemSearch(store)
solrFts <- SolrFtsClient(cfg.fullTextSearch.solr, httpClient)
fulltextImpl <- OFulltext(itemSearchImpl, solrFts)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
mailImpl <- OMail(store, javaEmil)
@ -71,6 +79,7 @@ object BackendApp {
val job = jobImpl
val item = itemImpl
val itemSearch = itemSearchImpl
val fulltext = fulltextImpl
val mail = mailImpl
val joex = joexImpl
val userTask = userTaskImpl

View File

@ -3,16 +3,19 @@ package docspell.backend
import docspell.backend.signup.{Config => SignupConfig}
import docspell.common._
import docspell.store.JdbcConfig
import docspell.ftssolr.SolrConfig
case class Config(
mailDebug: Boolean,
jdbc: JdbcConfig,
signup: SignupConfig,
files: Config.Files
files: Config.Files,
fullTextSearch: Config.FullTextSearch
) {}
object Config {
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType])
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
}

View File

@ -0,0 +1,76 @@
package docspell.backend.ops
import cats.effect._
import cats.implicits._
import fs2.Stream
import docspell.ftsclient._
import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
trait OFulltext[F[_]] {
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]]
/** Same as `findItems` but does more queries per item to find all tags. */
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]]
}
object OFulltext {
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
// then run a query
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
def apply[F[_]: Effect](
itemSearch: OItemSearch[F],
fts: FtsClient[F]
): Resource[F, OFulltext[F]] =
Resource.pure[F, OFulltext[F]](new OFulltext[F] {
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
findItemsFts(q, ftsQ, batch, itemSearch.findItems)
.take(batch.limit.toLong)
.compile
.toVector
def findItemsWithTags(
q: Query,
ftsQ: String,
batch: Batch
): F[Vector[ListItemWithTags]] =
findItemsFts(q, ftsQ, batch, itemSearch.findItemsWithTags)
.take(batch.limit.toLong)
.compile
.toVector
private def findItemsFts[A](
q: Query,
ftsQ: String,
batch: Batch,
search: (Query, Batch) => F[Vector[A]]
): Stream[F, A] = {
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset)
val qres =
for {
items <-
fts
.searchBasic(fq)
.map(_.item)
.compile
.toVector
.map(_.toSet)
sq = q.copy(itemIds = Some(items))
res <- search(sq, batch)
} yield res
Stream.eval(qres).flatMap { v =>
val results = Stream.emits(v)
if (v.size < batch.limit) results
else results ++ findItemsFts(q, ftsQ, batch.next, search)
}
}
})
}

View File

@ -14,5 +14,5 @@ trait FtsClient[F[_]] {
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
def indexData(data: TextData): F[Unit]
def indexData(data: Stream[F, TextData]): F[Unit]
}

View File

@ -0,0 +1,7 @@
package docspell.ftssolr
import docspell.common._
final case class SolrConfig(url: LenientUri)
object SolrConfig {}

View File

@ -1,12 +1,40 @@
package docspell.ftssolr
import fs2.Stream
import cats.effect._
import org.http4s.client.Client
import cats.data.NonEmptyList
import docspell.common._
import docspell.ftsclient._
import docspell.ftsclient.FtsBasicResult._
final class SolrFtsClient[F[_]] extends FtsClient[F] {
final class SolrFtsClient[F[_]](cfg: SolrConfig, client: Client[F]) extends FtsClient[F] {
println(s"$client $cfg")
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
???
def indexData(data: TextData): F[Unit] =
Stream.emits(
Seq(
FtsBasicResult(
Ident.unsafe("5J4zvCiTE2j-UEznDUsUCsA-5px6ftrSwfs-FpUWCaHh2Ei"),
NonEmptyList.of(AttachmentMatch(Ident.unsafe("a"), 0.2))
),
FtsBasicResult(
Ident.unsafe("8B8UNoC1U4y-dqnqjdFG7ue-LG5ktz9pWVt-diFemCLrLAa"),
NonEmptyList.of(AttachmentMatch(Ident.unsafe("b"), 0.5))
)
)
)
def indexData(data: Stream[F, TextData]): F[Unit] =
???
}
object SolrFtsClient {
def apply[F[_]: ConcurrentEffect](
cfg: SolrConfig,
httpClient: Client[F]
): Resource[F, FtsClient[F]] =
Resource.pure[F, FtsClient[F]](new SolrFtsClient(cfg, httpClient))
}

View File

@ -364,4 +364,12 @@ docspell.joex {
# By default all files are allowed.
valid-mime-types = [ ]
}
# Configuration of the full-text search engine.
full-text-search {
enabled = true
solr = {
url = "http://localhost:8983/solr/docspell_core"
}
}
}

View File

@ -8,6 +8,7 @@ import docspell.convert.ConvertConfig
import docspell.extract.ExtractConfig
import docspell.joex.hk.HouseKeepingConfig
import docspell.backend.Config.Files
import docspell.ftssolr.SolrConfig
case class Config(
appId: Ident,
@ -23,7 +24,8 @@ case class Config(
convert: ConvertConfig,
sendMail: MailSendConfig,
files: Files,
mailDebug: Boolean
mailDebug: Boolean,
fullTextSearch: Config.FullTextSearch
)
object Config {
@ -34,4 +36,6 @@ object Config {
math.min(mailChunkSize, maxMails)
}
case class UserTasks(scanMailbox: ScanMailbox)
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
}

View File

@ -14,8 +14,10 @@ import docspell.joexapi.client.JoexClient
import docspell.store.Store
import docspell.store.queue._
import docspell.store.records.RJobLog
import docspell.ftssolr.SolrFtsClient
import fs2.concurrent.SignallingRef
import scala.concurrent.ExecutionContext
import org.http4s.client.blaze.BlazeClientBuilder
final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
cfg: Config,
@ -63,13 +65,15 @@ object JoexAppImpl {
blocker: Blocker
): Resource[F, JoexApp[F]] =
for {
client <- JoexClient.resource(clientEC)
httpClient <- BlazeClientBuilder[F](clientEC).resource
client = JoexClient(httpClient)
store <- Store.create(cfg.jdbc, connectEC, blocker)
queue <- JobQueue(store)
pstore <- PeriodicTaskStore.create(store)
nodeOps <- ONode(store)
joex <- OJoex(client, store)
upload <- OUpload(store, queue, cfg.files, joex)
fts <- SolrFtsClient(cfg.fullTextSearch.solr, httpClient)
javaEmil =
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
@ -77,7 +81,7 @@ object JoexAppImpl {
.withTask(
JobTask.json(
ProcessItemArgs.taskName,
ItemHandler.newItem[F](cfg),
ItemHandler.newItem[F](cfg, fts),
ItemHandler.onCancel[F]
)
)

View File

@ -8,6 +8,7 @@ import docspell.joex.Config
import docspell.joex.scheduler.Task
import docspell.store.queries.QItem
import docspell.store.records.RItem
import docspell.ftsclient.FtsClient
object ItemHandler {
def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
@ -16,11 +17,11 @@ object ItemHandler {
)
def newItem[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config, fts: FtsClient[F]
): Task[F, ProcessItemArgs, Unit] =
CreateItem[F]
.flatMap(itemStateTask(ItemState.Processing))
.flatMap(safeProcess[F](cfg))
.flatMap(safeProcess[F](cfg, fts))
.map(_ => ())
def itemStateTask[F[_]: Sync, A](
@ -36,11 +37,11 @@ object ItemHandler {
Task(_.isLastRetry)
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config, fts: FtsClient[F]
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
isLastRetry[F].flatMap {
case true =>
ProcessItem[F](cfg)(data).attempt.flatMap({
ProcessItem[F](cfg, fts)(data).attempt.flatMap({
case Right(d) =>
Task.pure(d)
case Left(ex) =>
@ -50,7 +51,7 @@ object ItemHandler {
.andThen(_ => Sync[F].raiseError(ex))
})
case false =>
ProcessItem[F](cfg)(data).flatMap(itemStateTask(ItemState.Created))
ProcessItem[F](cfg, fts)(data).flatMap(itemStateTask(ItemState.Created))
}
def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =

View File

@ -5,17 +5,18 @@ import docspell.common.ProcessItemArgs
import docspell.analysis.TextAnalysisConfig
import docspell.joex.scheduler.Task
import docspell.joex.Config
import docspell.ftsclient.FtsClient
object ProcessItem {
def apply[F[_]: ConcurrentEffect: ContextShift](
cfg: Config
cfg: Config, fts: FtsClient[F]
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
ExtractArchive(item)
.flatMap(Task.setProgress(20))
.flatMap(ConvertPdf(cfg.convert, _))
.flatMap(Task.setProgress(40))
.flatMap(TextExtraction(cfg.extraction, _))
.flatMap(TextExtraction(cfg.extraction, fts))
.flatMap(Task.setProgress(60))
.flatMap(analysisOnly[F](cfg.textAnalysis))
.flatMap(Task.setProgress(80))

View File

@ -1,19 +1,20 @@
package docspell.joex.process
import fs2.Stream
import bitpeace.{Mimetype, RangeDef}
import cats.data.OptionT
import cats.implicits._
import cats.effect.{ContextShift, Sync}
import cats.effect._
import docspell.common._
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
import docspell.joex.scheduler.{Context, Task}
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
import docspell.store.syntax.MimeTypes._
import docspell.ftsclient.{FtsClient, TextData}
object TextExtraction {
def apply[F[_]: Sync: ContextShift](
cfg: ExtractConfig,
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])(
item: ItemData
): Task[F, ProcessItemArgs, ItemData] =
Task { ctx =>
@ -23,8 +24,20 @@ object TextExtraction {
txt <- item.attachments.traverse(
extractTextIfEmpty(ctx, cfg, ctx.args.meta.language, item)
)
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
_ <- fts.indexData(
Stream
.emits(txt)
.map(a =>
TextData(
item.item.id,
a.id,
ctx.args.meta.collective,
a.content.getOrElse("")
)
)
)
dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
} yield item.copy(metas = txt)

View File

@ -84,6 +84,10 @@ docspell.server {
}
}
fulltext-search {
enabled = true
}
# Configuration for the backend.
backend {
# Enable or disable debugging for e-mail related functionality. This
@ -143,5 +147,13 @@ docspell.server {
# By default all files are allowed.
valid-mime-types = [ ]
}
# Configuration of the full-text search engine.
full-text-search {
enabled = true
solr = {
url = "http://localhost:8983/solr/docspell_core"
}
}
}
}

View File

@ -13,7 +13,8 @@ case class Config(
backend: BackendConfig,
auth: Login.Config,
integrationEndpoint: Config.IntegrationEndpoint,
maxItemPageSize: Int
maxItemPageSize: Int,
fulltextSearch: Config.FulltextSearch
)
object Config {
@ -50,4 +51,9 @@ object Config {
}
}
}
case class FulltextSearch(enabled: Boolean)
object FulltextSearch {}
}

View File

@ -124,7 +124,7 @@ trait Conversions {
m.dueDateFrom,
m.dueDateUntil,
m.allNames,
m.fullText,
None,
None
)

View File

@ -47,10 +47,19 @@ object ItemRoutes {
_ <- logger.ftrace(s"Got search mask: $mask")
query = Conversions.mkQuery(mask, user.account.collective)
_ <- logger.ftrace(s"Running query: $query")
items <- backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
items <- mask.fullText match {
case None =>
backend.itemSearch.findItemsWithTags(
query,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
case Some(fq) =>
backend.fulltext.findItemsWithTags(
query,
fq,
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
)
}
resp <- Ok(Conversions.mkItemListWithTags(items))
} yield resp

View File

@ -3,6 +3,7 @@ package docspell.store.queries
import bitpeace.FileMeta
import cats.effect.Sync
import cats.data.OptionT
import cats.data.NonEmptyList
import cats.implicits._
import cats.effect.concurrent.Ref
import fs2.Stream
@ -165,6 +166,7 @@ object QItem {
dueDateFrom: Option[Timestamp],
dueDateTo: Option[Timestamp],
allNames: Option[String],
itemIds: Option[Set[Ident]],
orderAsc: Option[RItem.Columns.type => Column]
)
@ -186,6 +188,7 @@ object QItem {
None,
None,
None,
None,
None
)
}
@ -193,6 +196,9 @@ object QItem {
case class Batch(offset: Int, limit: Int) {
def restrictLimitTo(n: Int): Batch =
Batch(offset, math.min(n, limit))
def next: Batch =
Batch(offset + limit, limit)
}
object Batch {
@ -326,7 +332,15 @@ object QItem {
)
.getOrElse(Fragment.empty),
q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty),
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty)
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty),
q.itemIds
.map(ids =>
NonEmptyList
.fromList(ids.toList)
.map(nel => IC.id.prefix("i").isIn(nel))
.getOrElse(IC.id.prefix("i").is(""))
)
.getOrElse(Fragment.empty)
)
val order = q.orderAsc match {

View File

@ -55,6 +55,7 @@ type alias Model =
, untilDueDate : Maybe Int
, nameModel : Maybe String
, allNameModel : Maybe String
, fulltextModel : Maybe String
, datePickerInitialized : Bool
}
@ -111,6 +112,7 @@ init =
, untilDueDate = Nothing
, nameModel = Nothing
, allNameModel = Nothing
, fulltextModel = Nothing
, datePickerInitialized = False
}
@ -135,6 +137,7 @@ type Msg
| GetPersonResp (Result Http.Error ReferenceList)
| SetName String
| SetAllName String
| SetFulltext String
| ResetForm
@ -188,6 +191,7 @@ getItemSearch model =
, allNames =
model.allNameModel
|> Maybe.map amendWildcards
, fullText = model.fulltextModel
}
@ -484,6 +488,17 @@ update flags settings msg model =
)
(model.allNameModel /= next)
SetFulltext str ->
let
next =
Util.Maybe.fromString str
in
NextState
( { model | fulltextModel = next }
, Cmd.none
)
(model.fulltextModel /= next)
-- View
@ -517,6 +532,18 @@ view settings model =
]
]
]
, div [ class "field" ]
[ label [] [ text "Content Search" ]
, input
[ type_ "text"
, onInput SetFulltext
, model.fulltextModel |> Maybe.withDefault "" |> value
]
[]
, span [ class "small-info" ]
[ text "Fulltext search in document contents."
]
]
, formHeader nameIcon "Names"
, div [ class "field" ]
[ label [] [ text "All Names" ]

View File

@ -58,6 +58,7 @@ type Msg
| LoadMore
| UpdateThrottle
| SetBasicSearch String
| SetFulltextSearch String
type ViewMode

View File

@ -155,6 +155,13 @@ update key flags settings msg model =
in
update key flags settings m model
SetFulltextSearch str ->
let
m =
SearchMenuMsg (Comp.SearchMenu.SetFulltext str)
in
update key flags settings m model
--- Helpers

View File

@ -73,49 +73,7 @@ view settings model =
, ( "item-card-list", True )
]
]
[ div
[ classList
[ ( "invisible hidden", not model.menuCollapsed )
, ( "ui menu container", True )
]
]
[ a
[ class "item"
, onClick ToggleSearchMenu
, href "#"
, title "Open search menu"
]
[ i [ class "angle left icon" ] []
, i [ class "icons" ]
[ i [ class "grey bars icon" ] []
, i [ class "bottom left corner search icon" ] []
, if hasMoreSearch model then
i [ class "top right blue corner circle icon" ] []
else
span [ class "hidden invisible" ] []
]
]
, div [ class "ui category search item" ]
[ div [ class "ui transparent icon input" ]
[ input
[ type_ "text"
, placeholder "Basic search"
, onInput SetBasicSearch
, Maybe.map value model.searchMenuModel.allNameModel
|> Maybe.withDefault (value "")
]
[]
, i
[ classList
[ ( "search link icon", not model.searchInProgress )
, ( "loading spinner icon", model.searchInProgress )
]
]
[]
]
]
]
[ viewSearchBar model
, case model.viewMode of
Listing ->
Html.map ItemCardListMsg
@ -157,6 +115,72 @@ view settings model =
]
viewSearchBar : Model -> Html Msg
viewSearchBar model =
div
[ classList
[ ( "invisible hidden", not model.menuCollapsed )
, ( "ui menu container", True )
]
]
[ a
[ class "item"
, onClick ToggleSearchMenu
, href "#"
, title "Open search menu"
]
[ i [ class "angle left icon" ] []
, i [ class "icons" ]
[ i [ class "grey bars icon" ] []
, i [ class "bottom left corner search icon" ] []
, if hasMoreSearch model then
i [ class "top right blue corner circle icon" ] []
else
span [ class "hidden invisible" ] []
]
]
, div [ class "ui category search item" ]
[ div [ class "ui transparent icon input" ]
[ input
[ type_ "text"
, placeholder "Basic search"
, onInput SetBasicSearch
, Maybe.map value model.searchMenuModel.allNameModel
|> Maybe.withDefault (value "")
]
[]
, i
[ classList
[ ( "search link icon", not model.searchInProgress )
, ( "loading spinner icon", model.searchInProgress )
]
]
[]
]
]
, div [ class "ui category search item" ]
[ div [ class "ui transparent icon input" ]
[ input
[ type_ "text"
, placeholder "Fulltext search"
, onInput SetFulltextSearch
, Maybe.map value model.searchMenuModel.fulltextModel
|> Maybe.withDefault (value "")
]
[]
, i
[ classList
[ ( "search link icon", not model.searchInProgress )
, ( "loading spinner icon", model.searchInProgress )
]
]
[]
]
]
]
hasMoreSearch : Model -> Bool
hasMoreSearch model =
let
@ -164,6 +188,6 @@ hasMoreSearch model =
Comp.SearchMenu.getItemSearch model.searchMenuModel
is_ =
{ is | allNames = Nothing }
{ is | allNames = Nothing, fullText = Nothing }
in
is_ /= Api.Model.ItemSearch.empty