mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Introducing fts client into codebase
This commit is contained in:
@ -1,12 +1,16 @@
|
|||||||
package docspell.backend
|
package docspell.backend
|
||||||
|
|
||||||
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
|
import cats.effect.{Blocker, ConcurrentEffect, ContextShift, Resource}
|
||||||
|
import org.http4s.client.blaze.BlazeClientBuilder
|
||||||
|
|
||||||
import docspell.backend.auth.Login
|
import docspell.backend.auth.Login
|
||||||
import docspell.backend.ops._
|
import docspell.backend.ops._
|
||||||
import docspell.backend.signup.OSignup
|
import docspell.backend.signup.OSignup
|
||||||
|
import docspell.joexapi.client.JoexClient
|
||||||
import docspell.store.Store
|
import docspell.store.Store
|
||||||
import docspell.store.queue.JobQueue
|
import docspell.store.queue.JobQueue
|
||||||
import docspell.store.usertask.UserTaskStore
|
import docspell.store.usertask.UserTaskStore
|
||||||
|
import docspell.ftssolr.SolrFtsClient
|
||||||
|
|
||||||
import scala.concurrent.ExecutionContext
|
import scala.concurrent.ExecutionContext
|
||||||
import emil.javamail.{JavaMailEmil, Settings}
|
import emil.javamail.{JavaMailEmil, Settings}
|
||||||
@ -25,6 +29,7 @@ trait BackendApp[F[_]] {
|
|||||||
def job: OJob[F]
|
def job: OJob[F]
|
||||||
def item: OItem[F]
|
def item: OItem[F]
|
||||||
def itemSearch: OItemSearch[F]
|
def itemSearch: OItemSearch[F]
|
||||||
|
def fulltext: OFulltext[F]
|
||||||
def mail: OMail[F]
|
def mail: OMail[F]
|
||||||
def joex: OJoex[F]
|
def joex: OJoex[F]
|
||||||
def userTask: OUserTask[F]
|
def userTask: OUserTask[F]
|
||||||
@ -39,6 +44,7 @@ object BackendApp {
|
|||||||
blocker: Blocker
|
blocker: Blocker
|
||||||
): Resource[F, BackendApp[F]] =
|
): Resource[F, BackendApp[F]] =
|
||||||
for {
|
for {
|
||||||
|
httpClient <- BlazeClientBuilder[F](httpClientEc).resource
|
||||||
utStore <- UserTaskStore(store)
|
utStore <- UserTaskStore(store)
|
||||||
queue <- JobQueue(store)
|
queue <- JobQueue(store)
|
||||||
loginImpl <- Login[F](store)
|
loginImpl <- Login[F](store)
|
||||||
@ -48,12 +54,14 @@ object BackendApp {
|
|||||||
tagImpl <- OTag[F](store)
|
tagImpl <- OTag[F](store)
|
||||||
equipImpl <- OEquipment[F](store)
|
equipImpl <- OEquipment[F](store)
|
||||||
orgImpl <- OOrganization(store)
|
orgImpl <- OOrganization(store)
|
||||||
joexImpl <- OJoex.create(httpClientEc, store)
|
joexImpl <- OJoex(JoexClient(httpClient), store)
|
||||||
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
|
uploadImpl <- OUpload(store, queue, cfg.files, joexImpl)
|
||||||
nodeImpl <- ONode(store)
|
nodeImpl <- ONode(store)
|
||||||
jobImpl <- OJob(store, joexImpl)
|
jobImpl <- OJob(store, joexImpl)
|
||||||
itemImpl <- OItem(store)
|
itemImpl <- OItem(store)
|
||||||
itemSearchImpl <- OItemSearch(store)
|
itemSearchImpl <- OItemSearch(store)
|
||||||
|
solrFts <- SolrFtsClient(cfg.fullTextSearch.solr, httpClient)
|
||||||
|
fulltextImpl <- OFulltext(itemSearchImpl, solrFts)
|
||||||
javaEmil =
|
javaEmil =
|
||||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||||
mailImpl <- OMail(store, javaEmil)
|
mailImpl <- OMail(store, javaEmil)
|
||||||
@ -71,6 +79,7 @@ object BackendApp {
|
|||||||
val job = jobImpl
|
val job = jobImpl
|
||||||
val item = itemImpl
|
val item = itemImpl
|
||||||
val itemSearch = itemSearchImpl
|
val itemSearch = itemSearchImpl
|
||||||
|
val fulltext = fulltextImpl
|
||||||
val mail = mailImpl
|
val mail = mailImpl
|
||||||
val joex = joexImpl
|
val joex = joexImpl
|
||||||
val userTask = userTaskImpl
|
val userTask = userTaskImpl
|
||||||
|
@ -3,16 +3,19 @@ package docspell.backend
|
|||||||
import docspell.backend.signup.{Config => SignupConfig}
|
import docspell.backend.signup.{Config => SignupConfig}
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.store.JdbcConfig
|
import docspell.store.JdbcConfig
|
||||||
|
import docspell.ftssolr.SolrConfig
|
||||||
|
|
||||||
case class Config(
|
case class Config(
|
||||||
mailDebug: Boolean,
|
mailDebug: Boolean,
|
||||||
jdbc: JdbcConfig,
|
jdbc: JdbcConfig,
|
||||||
signup: SignupConfig,
|
signup: SignupConfig,
|
||||||
files: Config.Files
|
files: Config.Files,
|
||||||
|
fullTextSearch: Config.FullTextSearch
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
object Config {
|
object Config {
|
||||||
|
|
||||||
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType])
|
case class Files(chunkSize: Int, validMimeTypes: Seq[MimeType])
|
||||||
|
|
||||||
|
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
package docspell.backend.ops
|
||||||
|
|
||||||
|
import cats.effect._
|
||||||
|
import cats.implicits._
|
||||||
|
import fs2.Stream
|
||||||
|
import docspell.ftsclient._
|
||||||
|
import OItemSearch.{Batch, ListItem, ListItemWithTags, Query}
|
||||||
|
|
||||||
|
trait OFulltext[F[_]] {
|
||||||
|
|
||||||
|
def findItems(q: Query, fts: String, batch: Batch): F[Vector[ListItem]]
|
||||||
|
|
||||||
|
/** Same as `findItems` but does more queries per item to find all tags. */
|
||||||
|
def findItemsWithTags(q: Query, fts: String, batch: Batch): F[Vector[ListItemWithTags]]
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
object OFulltext {
|
||||||
|
// maybe use a temporary table? could run fts and do .take(batch.limit) and store this in sql
|
||||||
|
// then run a query
|
||||||
|
// check if supported by mariadb, postgres and h2. seems like it is supported everywhere
|
||||||
|
|
||||||
|
|
||||||
|
def apply[F[_]: Effect](
|
||||||
|
itemSearch: OItemSearch[F],
|
||||||
|
fts: FtsClient[F]
|
||||||
|
): Resource[F, OFulltext[F]] =
|
||||||
|
Resource.pure[F, OFulltext[F]](new OFulltext[F] {
|
||||||
|
|
||||||
|
def findItems(q: Query, ftsQ: String, batch: Batch): F[Vector[ListItem]] =
|
||||||
|
findItemsFts(q, ftsQ, batch, itemSearch.findItems)
|
||||||
|
.take(batch.limit.toLong)
|
||||||
|
.compile
|
||||||
|
.toVector
|
||||||
|
|
||||||
|
def findItemsWithTags(
|
||||||
|
q: Query,
|
||||||
|
ftsQ: String,
|
||||||
|
batch: Batch
|
||||||
|
): F[Vector[ListItemWithTags]] =
|
||||||
|
findItemsFts(q, ftsQ, batch, itemSearch.findItemsWithTags)
|
||||||
|
.take(batch.limit.toLong)
|
||||||
|
.compile
|
||||||
|
.toVector
|
||||||
|
|
||||||
|
|
||||||
|
private def findItemsFts[A](
|
||||||
|
q: Query,
|
||||||
|
ftsQ: String,
|
||||||
|
batch: Batch,
|
||||||
|
search: (Query, Batch) => F[Vector[A]]
|
||||||
|
): Stream[F, A] = {
|
||||||
|
val fq = FtsQuery(ftsQ, q.collective, batch.limit, batch.offset)
|
||||||
|
|
||||||
|
val qres =
|
||||||
|
for {
|
||||||
|
items <-
|
||||||
|
fts
|
||||||
|
.searchBasic(fq)
|
||||||
|
.map(_.item)
|
||||||
|
.compile
|
||||||
|
.toVector
|
||||||
|
.map(_.toSet)
|
||||||
|
sq = q.copy(itemIds = Some(items))
|
||||||
|
res <- search(sq, batch)
|
||||||
|
} yield res
|
||||||
|
|
||||||
|
Stream.eval(qres).flatMap { v =>
|
||||||
|
val results = Stream.emits(v)
|
||||||
|
if (v.size < batch.limit) results
|
||||||
|
else results ++ findItemsFts(q, ftsQ, batch.next, search)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
@ -14,5 +14,5 @@ trait FtsClient[F[_]] {
|
|||||||
|
|
||||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
|
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult]
|
||||||
|
|
||||||
def indexData(data: TextData): F[Unit]
|
def indexData(data: Stream[F, TextData]): F[Unit]
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
package docspell.ftssolr
|
||||||
|
|
||||||
|
import docspell.common._
|
||||||
|
|
||||||
|
final case class SolrConfig(url: LenientUri)
|
||||||
|
|
||||||
|
object SolrConfig {}
|
@ -1,12 +1,40 @@
|
|||||||
package docspell.ftssolr
|
package docspell.ftssolr
|
||||||
|
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
|
import cats.effect._
|
||||||
|
import org.http4s.client.Client
|
||||||
|
|
||||||
|
import cats.data.NonEmptyList
|
||||||
|
import docspell.common._
|
||||||
import docspell.ftsclient._
|
import docspell.ftsclient._
|
||||||
|
import docspell.ftsclient.FtsBasicResult._
|
||||||
|
|
||||||
final class SolrFtsClient[F[_]] extends FtsClient[F] {
|
final class SolrFtsClient[F[_]](cfg: SolrConfig, client: Client[F]) extends FtsClient[F] {
|
||||||
|
println(s"$client $cfg")
|
||||||
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
|
def searchBasic(q: FtsQuery): Stream[F, FtsBasicResult] =
|
||||||
???
|
Stream.emits(
|
||||||
def indexData(data: TextData): F[Unit] =
|
Seq(
|
||||||
|
FtsBasicResult(
|
||||||
|
Ident.unsafe("5J4zvCiTE2j-UEznDUsUCsA-5px6ftrSwfs-FpUWCaHh2Ei"),
|
||||||
|
NonEmptyList.of(AttachmentMatch(Ident.unsafe("a"), 0.2))
|
||||||
|
),
|
||||||
|
FtsBasicResult(
|
||||||
|
Ident.unsafe("8B8UNoC1U4y-dqnqjdFG7ue-LG5ktz9pWVt-diFemCLrLAa"),
|
||||||
|
NonEmptyList.of(AttachmentMatch(Ident.unsafe("b"), 0.5))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def indexData(data: Stream[F, TextData]): F[Unit] =
|
||||||
???
|
???
|
||||||
}
|
}
|
||||||
|
|
||||||
|
object SolrFtsClient {
|
||||||
|
|
||||||
|
def apply[F[_]: ConcurrentEffect](
|
||||||
|
cfg: SolrConfig,
|
||||||
|
httpClient: Client[F]
|
||||||
|
): Resource[F, FtsClient[F]] =
|
||||||
|
Resource.pure[F, FtsClient[F]](new SolrFtsClient(cfg, httpClient))
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -364,4 +364,12 @@ docspell.joex {
|
|||||||
# By default all files are allowed.
|
# By default all files are allowed.
|
||||||
valid-mime-types = [ ]
|
valid-mime-types = [ ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Configuration of the full-text search engine.
|
||||||
|
full-text-search {
|
||||||
|
enabled = true
|
||||||
|
solr = {
|
||||||
|
url = "http://localhost:8983/solr/docspell_core"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -8,6 +8,7 @@ import docspell.convert.ConvertConfig
|
|||||||
import docspell.extract.ExtractConfig
|
import docspell.extract.ExtractConfig
|
||||||
import docspell.joex.hk.HouseKeepingConfig
|
import docspell.joex.hk.HouseKeepingConfig
|
||||||
import docspell.backend.Config.Files
|
import docspell.backend.Config.Files
|
||||||
|
import docspell.ftssolr.SolrConfig
|
||||||
|
|
||||||
case class Config(
|
case class Config(
|
||||||
appId: Ident,
|
appId: Ident,
|
||||||
@ -23,7 +24,8 @@ case class Config(
|
|||||||
convert: ConvertConfig,
|
convert: ConvertConfig,
|
||||||
sendMail: MailSendConfig,
|
sendMail: MailSendConfig,
|
||||||
files: Files,
|
files: Files,
|
||||||
mailDebug: Boolean
|
mailDebug: Boolean,
|
||||||
|
fullTextSearch: Config.FullTextSearch
|
||||||
)
|
)
|
||||||
|
|
||||||
object Config {
|
object Config {
|
||||||
@ -34,4 +36,6 @@ object Config {
|
|||||||
math.min(mailChunkSize, maxMails)
|
math.min(mailChunkSize, maxMails)
|
||||||
}
|
}
|
||||||
case class UserTasks(scanMailbox: ScanMailbox)
|
case class UserTasks(scanMailbox: ScanMailbox)
|
||||||
|
|
||||||
|
case class FullTextSearch(enabled: Boolean, solr: SolrConfig)
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,10 @@ import docspell.joexapi.client.JoexClient
|
|||||||
import docspell.store.Store
|
import docspell.store.Store
|
||||||
import docspell.store.queue._
|
import docspell.store.queue._
|
||||||
import docspell.store.records.RJobLog
|
import docspell.store.records.RJobLog
|
||||||
|
import docspell.ftssolr.SolrFtsClient
|
||||||
import fs2.concurrent.SignallingRef
|
import fs2.concurrent.SignallingRef
|
||||||
import scala.concurrent.ExecutionContext
|
import scala.concurrent.ExecutionContext
|
||||||
|
import org.http4s.client.blaze.BlazeClientBuilder
|
||||||
|
|
||||||
final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
|
final class JoexAppImpl[F[_]: ConcurrentEffect: ContextShift: Timer](
|
||||||
cfg: Config,
|
cfg: Config,
|
||||||
@ -63,13 +65,15 @@ object JoexAppImpl {
|
|||||||
blocker: Blocker
|
blocker: Blocker
|
||||||
): Resource[F, JoexApp[F]] =
|
): Resource[F, JoexApp[F]] =
|
||||||
for {
|
for {
|
||||||
client <- JoexClient.resource(clientEC)
|
httpClient <- BlazeClientBuilder[F](clientEC).resource
|
||||||
|
client = JoexClient(httpClient)
|
||||||
store <- Store.create(cfg.jdbc, connectEC, blocker)
|
store <- Store.create(cfg.jdbc, connectEC, blocker)
|
||||||
queue <- JobQueue(store)
|
queue <- JobQueue(store)
|
||||||
pstore <- PeriodicTaskStore.create(store)
|
pstore <- PeriodicTaskStore.create(store)
|
||||||
nodeOps <- ONode(store)
|
nodeOps <- ONode(store)
|
||||||
joex <- OJoex(client, store)
|
joex <- OJoex(client, store)
|
||||||
upload <- OUpload(store, queue, cfg.files, joex)
|
upload <- OUpload(store, queue, cfg.files, joex)
|
||||||
|
fts <- SolrFtsClient(cfg.fullTextSearch.solr, httpClient)
|
||||||
javaEmil =
|
javaEmil =
|
||||||
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
JavaMailEmil(blocker, Settings.defaultSettings.copy(debug = cfg.mailDebug))
|
||||||
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
|
sch <- SchedulerBuilder(cfg.scheduler, blocker, store)
|
||||||
@ -77,7 +81,7 @@ object JoexAppImpl {
|
|||||||
.withTask(
|
.withTask(
|
||||||
JobTask.json(
|
JobTask.json(
|
||||||
ProcessItemArgs.taskName,
|
ProcessItemArgs.taskName,
|
||||||
ItemHandler.newItem[F](cfg),
|
ItemHandler.newItem[F](cfg, fts),
|
||||||
ItemHandler.onCancel[F]
|
ItemHandler.onCancel[F]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -8,6 +8,7 @@ import docspell.joex.Config
|
|||||||
import docspell.joex.scheduler.Task
|
import docspell.joex.scheduler.Task
|
||||||
import docspell.store.queries.QItem
|
import docspell.store.queries.QItem
|
||||||
import docspell.store.records.RItem
|
import docspell.store.records.RItem
|
||||||
|
import docspell.ftsclient.FtsClient
|
||||||
|
|
||||||
object ItemHandler {
|
object ItemHandler {
|
||||||
def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
|
def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
|
||||||
@ -16,11 +17,11 @@ object ItemHandler {
|
|||||||
)
|
)
|
||||||
|
|
||||||
def newItem[F[_]: ConcurrentEffect: ContextShift](
|
def newItem[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config
|
cfg: Config, fts: FtsClient[F]
|
||||||
): Task[F, ProcessItemArgs, Unit] =
|
): Task[F, ProcessItemArgs, Unit] =
|
||||||
CreateItem[F]
|
CreateItem[F]
|
||||||
.flatMap(itemStateTask(ItemState.Processing))
|
.flatMap(itemStateTask(ItemState.Processing))
|
||||||
.flatMap(safeProcess[F](cfg))
|
.flatMap(safeProcess[F](cfg, fts))
|
||||||
.map(_ => ())
|
.map(_ => ())
|
||||||
|
|
||||||
def itemStateTask[F[_]: Sync, A](
|
def itemStateTask[F[_]: Sync, A](
|
||||||
@ -36,11 +37,11 @@ object ItemHandler {
|
|||||||
Task(_.isLastRetry)
|
Task(_.isLastRetry)
|
||||||
|
|
||||||
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
def safeProcess[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config
|
cfg: Config, fts: FtsClient[F]
|
||||||
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
)(data: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
isLastRetry[F].flatMap {
|
isLastRetry[F].flatMap {
|
||||||
case true =>
|
case true =>
|
||||||
ProcessItem[F](cfg)(data).attempt.flatMap({
|
ProcessItem[F](cfg, fts)(data).attempt.flatMap({
|
||||||
case Right(d) =>
|
case Right(d) =>
|
||||||
Task.pure(d)
|
Task.pure(d)
|
||||||
case Left(ex) =>
|
case Left(ex) =>
|
||||||
@ -50,7 +51,7 @@ object ItemHandler {
|
|||||||
.andThen(_ => Sync[F].raiseError(ex))
|
.andThen(_ => Sync[F].raiseError(ex))
|
||||||
})
|
})
|
||||||
case false =>
|
case false =>
|
||||||
ProcessItem[F](cfg)(data).flatMap(itemStateTask(ItemState.Created))
|
ProcessItem[F](cfg, fts)(data).flatMap(itemStateTask(ItemState.Created))
|
||||||
}
|
}
|
||||||
|
|
||||||
def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
|
def deleteByFileIds[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] =
|
||||||
|
@ -5,17 +5,18 @@ import docspell.common.ProcessItemArgs
|
|||||||
import docspell.analysis.TextAnalysisConfig
|
import docspell.analysis.TextAnalysisConfig
|
||||||
import docspell.joex.scheduler.Task
|
import docspell.joex.scheduler.Task
|
||||||
import docspell.joex.Config
|
import docspell.joex.Config
|
||||||
|
import docspell.ftsclient.FtsClient
|
||||||
|
|
||||||
object ProcessItem {
|
object ProcessItem {
|
||||||
|
|
||||||
def apply[F[_]: ConcurrentEffect: ContextShift](
|
def apply[F[_]: ConcurrentEffect: ContextShift](
|
||||||
cfg: Config
|
cfg: Config, fts: FtsClient[F]
|
||||||
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
)(item: ItemData): Task[F, ProcessItemArgs, ItemData] =
|
||||||
ExtractArchive(item)
|
ExtractArchive(item)
|
||||||
.flatMap(Task.setProgress(20))
|
.flatMap(Task.setProgress(20))
|
||||||
.flatMap(ConvertPdf(cfg.convert, _))
|
.flatMap(ConvertPdf(cfg.convert, _))
|
||||||
.flatMap(Task.setProgress(40))
|
.flatMap(Task.setProgress(40))
|
||||||
.flatMap(TextExtraction(cfg.extraction, _))
|
.flatMap(TextExtraction(cfg.extraction, fts))
|
||||||
.flatMap(Task.setProgress(60))
|
.flatMap(Task.setProgress(60))
|
||||||
.flatMap(analysisOnly[F](cfg.textAnalysis))
|
.flatMap(analysisOnly[F](cfg.textAnalysis))
|
||||||
.flatMap(Task.setProgress(80))
|
.flatMap(Task.setProgress(80))
|
||||||
|
@ -1,19 +1,20 @@
|
|||||||
package docspell.joex.process
|
package docspell.joex.process
|
||||||
|
|
||||||
|
import fs2.Stream
|
||||||
import bitpeace.{Mimetype, RangeDef}
|
import bitpeace.{Mimetype, RangeDef}
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import cats.effect.{ContextShift, Sync}
|
import cats.effect._
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
|
import docspell.extract.{ExtractConfig, ExtractResult, Extraction}
|
||||||
import docspell.joex.scheduler.{Context, Task}
|
import docspell.joex.scheduler.{Context, Task}
|
||||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
|
import docspell.store.records.{RAttachment, RAttachmentMeta, RFileMeta}
|
||||||
import docspell.store.syntax.MimeTypes._
|
import docspell.store.syntax.MimeTypes._
|
||||||
|
import docspell.ftsclient.{FtsClient, TextData}
|
||||||
|
|
||||||
object TextExtraction {
|
object TextExtraction {
|
||||||
|
|
||||||
def apply[F[_]: Sync: ContextShift](
|
def apply[F[_]: ConcurrentEffect: ContextShift](cfg: ExtractConfig, fts: FtsClient[F])(
|
||||||
cfg: ExtractConfig,
|
|
||||||
item: ItemData
|
item: ItemData
|
||||||
): Task[F, ProcessItemArgs, ItemData] =
|
): Task[F, ProcessItemArgs, ItemData] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
@ -23,8 +24,20 @@ object TextExtraction {
|
|||||||
txt <- item.attachments.traverse(
|
txt <- item.attachments.traverse(
|
||||||
extractTextIfEmpty(ctx, cfg, ctx.args.meta.language, item)
|
extractTextIfEmpty(ctx, cfg, ctx.args.meta.language, item)
|
||||||
)
|
)
|
||||||
_ <- ctx.logger.debug("Storing extracted texts")
|
_ <- ctx.logger.debug("Storing extracted texts")
|
||||||
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
|
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm)))
|
||||||
|
_ <- fts.indexData(
|
||||||
|
Stream
|
||||||
|
.emits(txt)
|
||||||
|
.map(a =>
|
||||||
|
TextData(
|
||||||
|
item.item.id,
|
||||||
|
a.id,
|
||||||
|
ctx.args.meta.collective,
|
||||||
|
a.content.getOrElse("")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
dur <- start
|
dur <- start
|
||||||
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
|
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
|
||||||
} yield item.copy(metas = txt)
|
} yield item.copy(metas = txt)
|
||||||
|
@ -84,6 +84,10 @@ docspell.server {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fulltext-search {
|
||||||
|
enabled = true
|
||||||
|
}
|
||||||
|
|
||||||
# Configuration for the backend.
|
# Configuration for the backend.
|
||||||
backend {
|
backend {
|
||||||
# Enable or disable debugging for e-mail related functionality. This
|
# Enable or disable debugging for e-mail related functionality. This
|
||||||
@ -143,5 +147,13 @@ docspell.server {
|
|||||||
# By default all files are allowed.
|
# By default all files are allowed.
|
||||||
valid-mime-types = [ ]
|
valid-mime-types = [ ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Configuration of the full-text search engine.
|
||||||
|
full-text-search {
|
||||||
|
enabled = true
|
||||||
|
solr = {
|
||||||
|
url = "http://localhost:8983/solr/docspell_core"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -13,7 +13,8 @@ case class Config(
|
|||||||
backend: BackendConfig,
|
backend: BackendConfig,
|
||||||
auth: Login.Config,
|
auth: Login.Config,
|
||||||
integrationEndpoint: Config.IntegrationEndpoint,
|
integrationEndpoint: Config.IntegrationEndpoint,
|
||||||
maxItemPageSize: Int
|
maxItemPageSize: Int,
|
||||||
|
fulltextSearch: Config.FulltextSearch
|
||||||
)
|
)
|
||||||
|
|
||||||
object Config {
|
object Config {
|
||||||
@ -50,4 +51,9 @@ object Config {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case class FulltextSearch(enabled: Boolean)
|
||||||
|
|
||||||
|
object FulltextSearch {}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -124,7 +124,7 @@ trait Conversions {
|
|||||||
m.dueDateFrom,
|
m.dueDateFrom,
|
||||||
m.dueDateUntil,
|
m.dueDateUntil,
|
||||||
m.allNames,
|
m.allNames,
|
||||||
m.fullText,
|
None,
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -47,10 +47,19 @@ object ItemRoutes {
|
|||||||
_ <- logger.ftrace(s"Got search mask: $mask")
|
_ <- logger.ftrace(s"Got search mask: $mask")
|
||||||
query = Conversions.mkQuery(mask, user.account.collective)
|
query = Conversions.mkQuery(mask, user.account.collective)
|
||||||
_ <- logger.ftrace(s"Running query: $query")
|
_ <- logger.ftrace(s"Running query: $query")
|
||||||
items <- backend.itemSearch.findItemsWithTags(
|
items <- mask.fullText match {
|
||||||
query,
|
case None =>
|
||||||
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
backend.itemSearch.findItemsWithTags(
|
||||||
)
|
query,
|
||||||
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
|
)
|
||||||
|
case Some(fq) =>
|
||||||
|
backend.fulltext.findItemsWithTags(
|
||||||
|
query,
|
||||||
|
fq,
|
||||||
|
Batch(mask.offset, mask.limit).restrictLimitTo(cfg.maxItemPageSize)
|
||||||
|
)
|
||||||
|
}
|
||||||
resp <- Ok(Conversions.mkItemListWithTags(items))
|
resp <- Ok(Conversions.mkItemListWithTags(items))
|
||||||
} yield resp
|
} yield resp
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ package docspell.store.queries
|
|||||||
import bitpeace.FileMeta
|
import bitpeace.FileMeta
|
||||||
import cats.effect.Sync
|
import cats.effect.Sync
|
||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
|
import cats.data.NonEmptyList
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
import cats.effect.concurrent.Ref
|
import cats.effect.concurrent.Ref
|
||||||
import fs2.Stream
|
import fs2.Stream
|
||||||
@ -165,6 +166,7 @@ object QItem {
|
|||||||
dueDateFrom: Option[Timestamp],
|
dueDateFrom: Option[Timestamp],
|
||||||
dueDateTo: Option[Timestamp],
|
dueDateTo: Option[Timestamp],
|
||||||
allNames: Option[String],
|
allNames: Option[String],
|
||||||
|
itemIds: Option[Set[Ident]],
|
||||||
orderAsc: Option[RItem.Columns.type => Column]
|
orderAsc: Option[RItem.Columns.type => Column]
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -186,6 +188,7 @@ object QItem {
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
None
|
None
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -193,6 +196,9 @@ object QItem {
|
|||||||
case class Batch(offset: Int, limit: Int) {
|
case class Batch(offset: Int, limit: Int) {
|
||||||
def restrictLimitTo(n: Int): Batch =
|
def restrictLimitTo(n: Int): Batch =
|
||||||
Batch(offset, math.min(n, limit))
|
Batch(offset, math.min(n, limit))
|
||||||
|
|
||||||
|
def next: Batch =
|
||||||
|
Batch(offset + limit, limit)
|
||||||
}
|
}
|
||||||
|
|
||||||
object Batch {
|
object Batch {
|
||||||
@ -326,7 +332,15 @@ object QItem {
|
|||||||
)
|
)
|
||||||
.getOrElse(Fragment.empty),
|
.getOrElse(Fragment.empty),
|
||||||
q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty),
|
q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty),
|
||||||
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty)
|
q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty),
|
||||||
|
q.itemIds
|
||||||
|
.map(ids =>
|
||||||
|
NonEmptyList
|
||||||
|
.fromList(ids.toList)
|
||||||
|
.map(nel => IC.id.prefix("i").isIn(nel))
|
||||||
|
.getOrElse(IC.id.prefix("i").is(""))
|
||||||
|
)
|
||||||
|
.getOrElse(Fragment.empty)
|
||||||
)
|
)
|
||||||
|
|
||||||
val order = q.orderAsc match {
|
val order = q.orderAsc match {
|
||||||
|
@ -55,6 +55,7 @@ type alias Model =
|
|||||||
, untilDueDate : Maybe Int
|
, untilDueDate : Maybe Int
|
||||||
, nameModel : Maybe String
|
, nameModel : Maybe String
|
||||||
, allNameModel : Maybe String
|
, allNameModel : Maybe String
|
||||||
|
, fulltextModel : Maybe String
|
||||||
, datePickerInitialized : Bool
|
, datePickerInitialized : Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,6 +112,7 @@ init =
|
|||||||
, untilDueDate = Nothing
|
, untilDueDate = Nothing
|
||||||
, nameModel = Nothing
|
, nameModel = Nothing
|
||||||
, allNameModel = Nothing
|
, allNameModel = Nothing
|
||||||
|
, fulltextModel = Nothing
|
||||||
, datePickerInitialized = False
|
, datePickerInitialized = False
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,6 +137,7 @@ type Msg
|
|||||||
| GetPersonResp (Result Http.Error ReferenceList)
|
| GetPersonResp (Result Http.Error ReferenceList)
|
||||||
| SetName String
|
| SetName String
|
||||||
| SetAllName String
|
| SetAllName String
|
||||||
|
| SetFulltext String
|
||||||
| ResetForm
|
| ResetForm
|
||||||
|
|
||||||
|
|
||||||
@ -188,6 +191,7 @@ getItemSearch model =
|
|||||||
, allNames =
|
, allNames =
|
||||||
model.allNameModel
|
model.allNameModel
|
||||||
|> Maybe.map amendWildcards
|
|> Maybe.map amendWildcards
|
||||||
|
, fullText = model.fulltextModel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -484,6 +488,17 @@ update flags settings msg model =
|
|||||||
)
|
)
|
||||||
(model.allNameModel /= next)
|
(model.allNameModel /= next)
|
||||||
|
|
||||||
|
SetFulltext str ->
|
||||||
|
let
|
||||||
|
next =
|
||||||
|
Util.Maybe.fromString str
|
||||||
|
in
|
||||||
|
NextState
|
||||||
|
( { model | fulltextModel = next }
|
||||||
|
, Cmd.none
|
||||||
|
)
|
||||||
|
(model.fulltextModel /= next)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- View
|
-- View
|
||||||
@ -517,6 +532,18 @@ view settings model =
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
, div [ class "field" ]
|
||||||
|
[ label [] [ text "Content Search" ]
|
||||||
|
, input
|
||||||
|
[ type_ "text"
|
||||||
|
, onInput SetFulltext
|
||||||
|
, model.fulltextModel |> Maybe.withDefault "" |> value
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
, span [ class "small-info" ]
|
||||||
|
[ text "Fulltext search in document contents."
|
||||||
|
]
|
||||||
|
]
|
||||||
, formHeader nameIcon "Names"
|
, formHeader nameIcon "Names"
|
||||||
, div [ class "field" ]
|
, div [ class "field" ]
|
||||||
[ label [] [ text "All Names" ]
|
[ label [] [ text "All Names" ]
|
||||||
|
@ -58,6 +58,7 @@ type Msg
|
|||||||
| LoadMore
|
| LoadMore
|
||||||
| UpdateThrottle
|
| UpdateThrottle
|
||||||
| SetBasicSearch String
|
| SetBasicSearch String
|
||||||
|
| SetFulltextSearch String
|
||||||
|
|
||||||
|
|
||||||
type ViewMode
|
type ViewMode
|
||||||
|
@ -155,6 +155,13 @@ update key flags settings msg model =
|
|||||||
in
|
in
|
||||||
update key flags settings m model
|
update key flags settings m model
|
||||||
|
|
||||||
|
SetFulltextSearch str ->
|
||||||
|
let
|
||||||
|
m =
|
||||||
|
SearchMenuMsg (Comp.SearchMenu.SetFulltext str)
|
||||||
|
in
|
||||||
|
update key flags settings m model
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--- Helpers
|
--- Helpers
|
||||||
|
@ -73,49 +73,7 @@ view settings model =
|
|||||||
, ( "item-card-list", True )
|
, ( "item-card-list", True )
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
[ div
|
[ viewSearchBar model
|
||||||
[ classList
|
|
||||||
[ ( "invisible hidden", not model.menuCollapsed )
|
|
||||||
, ( "ui menu container", True )
|
|
||||||
]
|
|
||||||
]
|
|
||||||
[ a
|
|
||||||
[ class "item"
|
|
||||||
, onClick ToggleSearchMenu
|
|
||||||
, href "#"
|
|
||||||
, title "Open search menu"
|
|
||||||
]
|
|
||||||
[ i [ class "angle left icon" ] []
|
|
||||||
, i [ class "icons" ]
|
|
||||||
[ i [ class "grey bars icon" ] []
|
|
||||||
, i [ class "bottom left corner search icon" ] []
|
|
||||||
, if hasMoreSearch model then
|
|
||||||
i [ class "top right blue corner circle icon" ] []
|
|
||||||
|
|
||||||
else
|
|
||||||
span [ class "hidden invisible" ] []
|
|
||||||
]
|
|
||||||
]
|
|
||||||
, div [ class "ui category search item" ]
|
|
||||||
[ div [ class "ui transparent icon input" ]
|
|
||||||
[ input
|
|
||||||
[ type_ "text"
|
|
||||||
, placeholder "Basic search…"
|
|
||||||
, onInput SetBasicSearch
|
|
||||||
, Maybe.map value model.searchMenuModel.allNameModel
|
|
||||||
|> Maybe.withDefault (value "")
|
|
||||||
]
|
|
||||||
[]
|
|
||||||
, i
|
|
||||||
[ classList
|
|
||||||
[ ( "search link icon", not model.searchInProgress )
|
|
||||||
, ( "loading spinner icon", model.searchInProgress )
|
|
||||||
]
|
|
||||||
]
|
|
||||||
[]
|
|
||||||
]
|
|
||||||
]
|
|
||||||
]
|
|
||||||
, case model.viewMode of
|
, case model.viewMode of
|
||||||
Listing ->
|
Listing ->
|
||||||
Html.map ItemCardListMsg
|
Html.map ItemCardListMsg
|
||||||
@ -157,6 +115,72 @@ view settings model =
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
viewSearchBar : Model -> Html Msg
|
||||||
|
viewSearchBar model =
|
||||||
|
div
|
||||||
|
[ classList
|
||||||
|
[ ( "invisible hidden", not model.menuCollapsed )
|
||||||
|
, ( "ui menu container", True )
|
||||||
|
]
|
||||||
|
]
|
||||||
|
[ a
|
||||||
|
[ class "item"
|
||||||
|
, onClick ToggleSearchMenu
|
||||||
|
, href "#"
|
||||||
|
, title "Open search menu"
|
||||||
|
]
|
||||||
|
[ i [ class "angle left icon" ] []
|
||||||
|
, i [ class "icons" ]
|
||||||
|
[ i [ class "grey bars icon" ] []
|
||||||
|
, i [ class "bottom left corner search icon" ] []
|
||||||
|
, if hasMoreSearch model then
|
||||||
|
i [ class "top right blue corner circle icon" ] []
|
||||||
|
|
||||||
|
else
|
||||||
|
span [ class "hidden invisible" ] []
|
||||||
|
]
|
||||||
|
]
|
||||||
|
, div [ class "ui category search item" ]
|
||||||
|
[ div [ class "ui transparent icon input" ]
|
||||||
|
[ input
|
||||||
|
[ type_ "text"
|
||||||
|
, placeholder "Basic search…"
|
||||||
|
, onInput SetBasicSearch
|
||||||
|
, Maybe.map value model.searchMenuModel.allNameModel
|
||||||
|
|> Maybe.withDefault (value "")
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
, i
|
||||||
|
[ classList
|
||||||
|
[ ( "search link icon", not model.searchInProgress )
|
||||||
|
, ( "loading spinner icon", model.searchInProgress )
|
||||||
|
]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
, div [ class "ui category search item" ]
|
||||||
|
[ div [ class "ui transparent icon input" ]
|
||||||
|
[ input
|
||||||
|
[ type_ "text"
|
||||||
|
, placeholder "Fulltext search…"
|
||||||
|
, onInput SetFulltextSearch
|
||||||
|
, Maybe.map value model.searchMenuModel.fulltextModel
|
||||||
|
|> Maybe.withDefault (value "")
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
, i
|
||||||
|
[ classList
|
||||||
|
[ ( "search link icon", not model.searchInProgress )
|
||||||
|
, ( "loading spinner icon", model.searchInProgress )
|
||||||
|
]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
hasMoreSearch : Model -> Bool
|
hasMoreSearch : Model -> Bool
|
||||||
hasMoreSearch model =
|
hasMoreSearch model =
|
||||||
let
|
let
|
||||||
@ -164,6 +188,6 @@ hasMoreSearch model =
|
|||||||
Comp.SearchMenu.getItemSearch model.searchMenuModel
|
Comp.SearchMenu.getItemSearch model.searchMenuModel
|
||||||
|
|
||||||
is_ =
|
is_ =
|
||||||
{ is | allNames = Nothing }
|
{ is | allNames = Nothing, fullText = Nothing }
|
||||||
in
|
in
|
||||||
is_ /= Api.Model.ItemSearch.empty
|
is_ /= Api.Model.ItemSearch.empty
|
||||||
|
Reference in New Issue
Block a user