Apply folder restriction to fulltext only search

And update index when folder changes.
This commit is contained in:
Eike Kettner 2020-07-12 13:44:11 +02:00
parent aeba4ba913
commit 22fa1dba13
17 changed files with 183 additions and 27 deletions

View File

@ -9,7 +9,7 @@ import docspell.backend.ops.OItemSearch._
import docspell.common._
import docspell.ftsclient._
import docspell.store.Store
import docspell.store.queries.QItem
import docspell.store.queries.{QFolder, QItem}
import docspell.store.queue.JobQueue
import docspell.store.records.RJob
@ -101,12 +101,14 @@ object OFulltext {
ftsQ.query,
account.collective,
Set.empty,
Set.empty,
batch.limit,
batch.offset,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
)
for {
ftsR <- fts.search(fq)
folders <- store.transact(QFolder.getMemberFolders(account))
ftsR <- fts.search(fq.withFolders(folders))
ftsItems = ftsR.results.groupBy(_.itemId)
select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet
itemsWithTags <-
@ -184,6 +186,7 @@ object OFulltext {
ftsQ.query,
q.account.collective,
Set.empty,
Set.empty,
0,
0,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)

View File

@ -142,6 +142,9 @@ object OItem {
.transact(RItem.updateFolder(item, collective, folder))
.attempt
.map(AddResult.fromUpdate)
.flatTap(
onSuccessIgnoreError(fts.updateFolder(logger, item, collective, folder))
)
def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult] =
store

View File

@ -58,7 +58,7 @@ trait FtsClient[F[_]] {
collective: Ident,
name: String
): F[Unit] =
updateIndex(logger, TextData.item(itemId, collective, Some(name), None))
updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None))
def updateItemNotes(
logger: Logger[F],
@ -68,7 +68,7 @@ trait FtsClient[F[_]] {
): F[Unit] =
updateIndex(
logger,
TextData.item(itemId, collective, None, Some(notes.getOrElse("")))
TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")))
)
def updateAttachmentName(
@ -84,12 +84,20 @@ trait FtsClient[F[_]] {
itemId,
attachId,
collective,
None,
Language.English,
Some(name.getOrElse("")),
None
)
)
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit]
def removeItem(logger: Logger[F], itemId: Ident): F[Unit]
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit]
@ -117,6 +125,14 @@ object FtsClient {
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!")
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] =
logger.warn("Full-text search is disabled!")
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!")

View File

@ -10,11 +10,16 @@ import docspell.common._
* Searches must only look for given collective and in the given list
* of item ids, if it is non-empty. If the item set is empty, then
* don't restrict the result in this way.
*
* The set of folders must be used to restrict the results only to
* items that have one of the folders set or no folder set. If the
* set is empty, the restriction does not apply.
*/
final case class FtsQuery(
q: String,
collective: Ident,
items: Set[Ident],
folders: Set[Ident],
limit: Int,
offset: Int,
highlight: FtsQuery.HighlightSetting
@ -22,6 +27,9 @@ final case class FtsQuery(
def nextPage: FtsQuery =
copy(offset = limit + offset)
def withFolders(fs: Set[Ident]): FtsQuery =
copy(folders = fs)
}
object FtsQuery {

View File

@ -10,6 +10,8 @@ sealed trait TextData {
def collective: Ident
def folder: Option[Ident]
final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
this match {
case a: TextData.Attachment => f(a)
@ -23,6 +25,7 @@ object TextData {
item: Ident,
attachId: Ident,
collective: Ident,
folder: Option[Ident],
lang: Language,
name: Option[String],
text: Option[String]
@ -36,15 +39,17 @@ object TextData {
item: Ident,
attachId: Ident,
collective: Ident,
folder: Option[Ident],
lang: Language,
name: Option[String],
text: Option[String]
): TextData =
Attachment(item, attachId, collective, lang, name, text)
Attachment(item, attachId, collective, folder, lang, name, text)
final case class Item(
item: Ident,
collective: Ident,
folder: Option[Ident],
name: Option[String],
notes: Option[String]
) extends TextData {
@ -56,8 +61,9 @@ object TextData {
def item(
item: Ident,
collective: Ident,
folder: Option[Ident],
name: Option[String],
notes: Option[String]
): TextData =
Item(item, collective, name, notes)
Item(item, collective, folder, name, notes)
}

View File

@ -0,0 +1,9 @@
package docspell.ftssolr
import docspell.common._
final case class DocIdResult(ids: List[Ident]) {
def toSetFolder(folder: Option[Ident]): List[SetFolder] =
ids.map(id => SetFolder(id, folder))
}

View File

@ -1,5 +1,7 @@
package docspell.ftssolr
import cats.implicits._
import docspell.common._
import docspell.ftsclient._
@ -21,6 +23,7 @@ trait JsonCodec {
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson),
(Field.attachmentId.name, enc(td.attachId)),
(Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))),
(Field.discriminator.name, Json.fromString("attachment"))
@ -37,6 +40,7 @@ trait JsonCodec {
(Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)),
(Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson),
(Field.itemName.name, Json.fromString(td.name.getOrElse(""))),
(Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))),
(Field.discriminator.name, Json.fromString("item"))
@ -49,6 +53,18 @@ trait JsonCodec {
): Encoder[TextData] =
Encoder(_.fold(ae.apply, ie.apply))
implicit def docIdResultsDecoder: Decoder[DocIdResult] =
new Decoder[DocIdResult] {
final def apply(c: HCursor): Decoder.Result[DocIdResult] =
c.downField("response")
.downField("docs")
.values
.getOrElse(Nil)
.toList
.traverse(_.hcursor.get[Ident](Field.id.name))
.map(DocIdResult.apply)
}
implicit def ftsResultDecoder: Decoder[FtsResult] =
new Decoder[FtsResult] {
final def apply(c: HCursor): Decoder.Result[FtsResult] =
@ -89,6 +105,12 @@ trait JsonCodec {
} yield md
}
implicit def decodeEverythingToUnit: Decoder[Unit] =
new Decoder[Unit] {
final def apply(c: HCursor): Decoder.Result[Unit] =
Right(())
}
implicit def identKeyEncoder: KeyEncoder[Ident] =
new KeyEncoder[Ident] {
override def apply(ident: Ident): String = ident.id
@ -129,9 +151,24 @@ trait JsonCodec {
}
}
implicit def textDataEncoder: Encoder[SetFields] =
implicit def setTextDataFieldsEncoder: Encoder[SetFields] =
Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply))
implicit def setFolderEncoder(implicit
enc: Encoder[Option[Ident]]
): Encoder[SetFolder] =
new Encoder[SetFolder] {
final def apply(td: SetFolder): Json =
Json.fromFields(
List(
(Field.id.name, td.docId.asJson),
(
Field.folderId.name,
Map("set" -> td.folder.asJson).asJson
)
)
)
}
}
object JsonCodec extends JsonCodec

View File

@ -40,16 +40,26 @@ object QueryData {
fields: List[Field],
fq: FtsQuery
): QueryData = {
val q = sanitize(fq.q)
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
val items = fq.items.map(_.id).mkString(" ")
val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}""""
val filterQ = fq.items match {
case s if s.isEmpty =>
collQ
case _ =>
(collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ")
}
val q = sanitize(fq.q)
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
val items = fq.items.map(_.id).mkString(" ")
val folders = fq.folders.map(_.id).mkString(" ")
val filterQ = List(
s"""${Field.collectiveId.name}:"${fq.collective.id}"""",
fq.items match {
case s if s.isEmpty =>
""
case _ =>
s"""${Field.itemId.name}:($items)"""
},
fq.folders match {
case s if s.isEmpty =>
""
case _ =>
s"""${Field.folderId.name}:($folders) OR (*:* NOT ${Field.folderId.name}:*)"""
}
).filterNot(_.isEmpty).map(t => s"($t)").mkString(" AND ")
QueryData(
extQ,
filterQ,

View File

@ -0,0 +1,5 @@
package docspell.ftssolr
import docspell.common._
final case class SetFolder(docId: Ident, folder: Option[Ident])

View File

@ -29,6 +29,17 @@ final class SolrFtsClient[F[_]: Effect](
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
modifyIndex(logger, data)(solrUpdate.update)
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] =
logger.debug(
s"Update folder in solr index for coll/item ${collective.id}/${itemId.id}"
) *>
solrUpdate.updateFolder(itemId, collective, folder)
def modifyIndex(logger: Logger[F], data: Stream[F, TextData])(
f: List[TextData] => F[Unit]
): F[Unit] =

View File

@ -1,7 +1,9 @@
package docspell.ftssolr
import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.ftsclient._
import docspell.ftssolr.JsonCodec._
@ -11,6 +13,7 @@ import org.http4s._
import org.http4s.circe._
import org.http4s.client.Client
import org.http4s.client.dsl.Http4sClientDsl
import org.http4s.circe.CirceEntityDecoder._
trait SolrUpdate[F[_]] {
@ -18,6 +21,8 @@ trait SolrUpdate[F[_]] {
def update(tds: List[TextData]): F[Unit]
def updateFolder(itemId: Ident, collective: Ident, folder: Option[Ident]): F[Unit]
def delete(q: String, commitWithin: Option[Int]): F[Unit]
}
@ -43,6 +48,29 @@ object SolrUpdate {
client.expect[Unit](req)
}
def updateFolder(
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] = {
val queryUrl = Uri.unsafeFromString(cfg.url.asString) / "query"
val q = QueryData(
"*:*",
s"${Field.itemId.name}:${itemId.id} AND ${Field.collectiveId.name}:${collective.id}",
Int.MaxValue,
0,
List(Field.id),
Map.empty
)
val searchReq = Method.POST(q.asJson, queryUrl)
for {
docIds <- client.expect[DocIdResult](searchReq)
sets = docIds.toSetFolder(folder)
req = Method.POST(sets.asJson, url)
_ <- client.expect[Unit](req)
} yield ()
}
def delete(q: String, commitWithin: Option[Int]): F[Unit] = {
val uri = commitWithin match {
case Some(n) =>

View File

@ -80,6 +80,7 @@ object FtsWork {
caa.item,
caa.id,
caa.collective,
caa.folder,
caa.lang,
caa.name,
caa.content
@ -92,7 +93,9 @@ object FtsWork {
ctx.logger,
ctx.store
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
.map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes))
.map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
)
)
)

View File

@ -33,8 +33,13 @@ object TextExtraction {
)
_ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
idxItem =
TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None)
idxItem = TextData.item(
item.item.id,
ctx.args.meta.collective,
None, //folder
item.item.name.some,
None
)
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*)
dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
@ -55,6 +60,7 @@ object TextExtraction {
item.item.id,
ra.id,
collective,
None, //folder
lang,
ra.name,
rm.content

View File

@ -145,6 +145,7 @@ object QAttachment {
id: Ident,
item: Ident,
collective: Ident,
folder: Option[Ident],
lang: Language,
name: Option[String],
content: Option[String]
@ -160,10 +161,11 @@ object QAttachment {
val mContent = RAttachmentMeta.Columns.content.prefix("m")
val iId = RItem.Columns.id.prefix("i")
val iColl = RItem.Columns.cid.prefix("i")
val iFolder = RItem.Columns.folder.prefix("i")
val cId = RCollective.Columns.id.prefix("c")
val cLang = RCollective.Columns.language.prefix("c")
val cols = Seq(aId, aItem, iColl, cLang, aName, mContent)
val cols = Seq(aId, aItem, iColl, iFolder, cLang, aName, mContent)
val from = RAttachment.table ++ fr"a INNER JOIN" ++
RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++

View File

@ -270,6 +270,9 @@ object QFolder {
)
}
def getMemberFolders(account: AccountId): ConnectionIO[Set[Ident]] =
findMemberFolderIds(account).query[Ident].to[Set]
private def findUserId(account: AccountId): ConnectionIO[Option[Ident]] =
RUser.findByAccount(account).map(_.map(_.uid))
}

View File

@ -585,6 +585,7 @@ object QItem {
final case class NameAndNotes(
id: Ident,
collective: Ident,
folder: Option[Ident],
name: String,
notes: Option[String]
)
@ -592,12 +593,13 @@ object QItem {
coll: Option[Ident],
chunkSize: Int
): Stream[ConnectionIO, NameAndNotes] = {
val iId = RItem.Columns.id
val iColl = RItem.Columns.cid
val iName = RItem.Columns.name
val iNotes = RItem.Columns.notes
val iId = RItem.Columns.id
val iColl = RItem.Columns.cid
val iName = RItem.Columns.name
val iFolder = RItem.Columns.folder
val iNotes = RItem.Columns.notes
val cols = Seq(iId, iColl, iName, iNotes)
val cols = Seq(iId, iColl, iFolder, iName, iNotes)
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
selectSimple(cols, RItem.table, where)
.query[NameAndNotes]

View File

@ -247,7 +247,11 @@ object RItem {
).update.run
} yield n
def updateFolder(itemId: Ident, coll: Ident, folderId: Option[Ident]): ConnectionIO[Int] =
def updateFolder(
itemId: Ident,
coll: Ident,
folderId: Option[Ident]
): ConnectionIO[Int] =
for {
t <- currentTime
n <- updateRow(