Apply folder restriction to fulltext only search

And update index when folder changes.
This commit is contained in:
Eike Kettner
2020-07-12 13:44:11 +02:00
parent aeba4ba913
commit 22fa1dba13
17 changed files with 183 additions and 27 deletions

View File

@ -9,7 +9,7 @@ import docspell.backend.ops.OItemSearch._
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.store.Store import docspell.store.Store
import docspell.store.queries.QItem import docspell.store.queries.{QFolder, QItem}
import docspell.store.queue.JobQueue import docspell.store.queue.JobQueue
import docspell.store.records.RJob import docspell.store.records.RJob
@ -101,12 +101,14 @@ object OFulltext {
ftsQ.query, ftsQ.query,
account.collective, account.collective,
Set.empty, Set.empty,
Set.empty,
batch.limit, batch.limit,
batch.offset, batch.offset,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)
) )
for { for {
ftsR <- fts.search(fq) folders <- store.transact(QFolder.getMemberFolders(account))
ftsR <- fts.search(fq.withFolders(folders))
ftsItems = ftsR.results.groupBy(_.itemId) ftsItems = ftsR.results.groupBy(_.itemId)
select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet
itemsWithTags <- itemsWithTags <-
@ -184,6 +186,7 @@ object OFulltext {
ftsQ.query, ftsQ.query,
q.account.collective, q.account.collective,
Set.empty, Set.empty,
Set.empty,
0, 0,
0, 0,
FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost)

View File

@ -142,6 +142,9 @@ object OItem {
.transact(RItem.updateFolder(item, collective, folder)) .transact(RItem.updateFolder(item, collective, folder))
.attempt .attempt
.map(AddResult.fromUpdate) .map(AddResult.fromUpdate)
.flatTap(
onSuccessIgnoreError(fts.updateFolder(logger, item, collective, folder))
)
def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult] = def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult] =
store store

View File

@ -58,7 +58,7 @@ trait FtsClient[F[_]] {
collective: Ident, collective: Ident,
name: String name: String
): F[Unit] = ): F[Unit] =
updateIndex(logger, TextData.item(itemId, collective, Some(name), None)) updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None))
def updateItemNotes( def updateItemNotes(
logger: Logger[F], logger: Logger[F],
@ -68,7 +68,7 @@ trait FtsClient[F[_]] {
): F[Unit] = ): F[Unit] =
updateIndex( updateIndex(
logger, logger,
TextData.item(itemId, collective, None, Some(notes.getOrElse(""))) TextData.item(itemId, collective, None, None, Some(notes.getOrElse("")))
) )
def updateAttachmentName( def updateAttachmentName(
@ -84,12 +84,20 @@ trait FtsClient[F[_]] {
itemId, itemId,
attachId, attachId,
collective, collective,
None,
Language.English, Language.English,
Some(name.getOrElse("")), Some(name.getOrElse("")),
None None
) )
) )
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit]
def removeItem(logger: Logger[F], itemId: Ident): F[Unit] def removeItem(logger: Logger[F], itemId: Ident): F[Unit]
def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit]
@ -117,6 +125,14 @@ object FtsClient {
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!") logger.warn("Full-text search is disabled!")
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] =
logger.warn("Full-text search is disabled!")
def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
logger.warn("Full-text search is disabled!") logger.warn("Full-text search is disabled!")

View File

@ -10,11 +10,16 @@ import docspell.common._
* Searches must only look for given collective and in the given list * Searches must only look for given collective and in the given list
* of item ids, if it is non-empty. If the item set is empty, then * of item ids, if it is non-empty. If the item set is empty, then
* don't restrict the result in this way. * don't restrict the result in this way.
*
* The set of folders must be used to restrict the results only to
* items that have one of the folders set or no folder set. If the
* set is empty, the restriction does not apply.
*/ */
final case class FtsQuery( final case class FtsQuery(
q: String, q: String,
collective: Ident, collective: Ident,
items: Set[Ident], items: Set[Ident],
folders: Set[Ident],
limit: Int, limit: Int,
offset: Int, offset: Int,
highlight: FtsQuery.HighlightSetting highlight: FtsQuery.HighlightSetting
@ -22,6 +27,9 @@ final case class FtsQuery(
def nextPage: FtsQuery = def nextPage: FtsQuery =
copy(offset = limit + offset) copy(offset = limit + offset)
def withFolders(fs: Set[Ident]): FtsQuery =
copy(folders = fs)
} }
object FtsQuery { object FtsQuery {

View File

@ -10,6 +10,8 @@ sealed trait TextData {
def collective: Ident def collective: Ident
def folder: Option[Ident]
final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A = final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A =
this match { this match {
case a: TextData.Attachment => f(a) case a: TextData.Attachment => f(a)
@ -23,6 +25,7 @@ object TextData {
item: Ident, item: Ident,
attachId: Ident, attachId: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
lang: Language, lang: Language,
name: Option[String], name: Option[String],
text: Option[String] text: Option[String]
@ -36,15 +39,17 @@ object TextData {
item: Ident, item: Ident,
attachId: Ident, attachId: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
lang: Language, lang: Language,
name: Option[String], name: Option[String],
text: Option[String] text: Option[String]
): TextData = ): TextData =
Attachment(item, attachId, collective, lang, name, text) Attachment(item, attachId, collective, folder, lang, name, text)
final case class Item( final case class Item(
item: Ident, item: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
name: Option[String], name: Option[String],
notes: Option[String] notes: Option[String]
) extends TextData { ) extends TextData {
@ -56,8 +61,9 @@ object TextData {
def item( def item(
item: Ident, item: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
name: Option[String], name: Option[String],
notes: Option[String] notes: Option[String]
): TextData = ): TextData =
Item(item, collective, name, notes) Item(item, collective, folder, name, notes)
} }

View File

@ -0,0 +1,9 @@
package docspell.ftssolr
import docspell.common._
final case class DocIdResult(ids: List[Ident]) {
def toSetFolder(folder: Option[Ident]): List[SetFolder] =
ids.map(id => SetFolder(id, folder))
}

View File

@ -1,5 +1,7 @@
package docspell.ftssolr package docspell.ftssolr
import cats.implicits._
import docspell.common._ import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
@ -21,6 +23,7 @@ trait JsonCodec {
(Field.id.name, enc(td.id)), (Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)), (Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)), (Field.collectiveId.name, enc(td.collective)),
(Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson),
(Field.attachmentId.name, enc(td.attachId)), (Field.attachmentId.name, enc(td.attachId)),
(Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))), (Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))),
(Field.discriminator.name, Json.fromString("attachment")) (Field.discriminator.name, Json.fromString("attachment"))
@ -37,6 +40,7 @@ trait JsonCodec {
(Field.id.name, enc(td.id)), (Field.id.name, enc(td.id)),
(Field.itemId.name, enc(td.item)), (Field.itemId.name, enc(td.item)),
(Field.collectiveId.name, enc(td.collective)), (Field.collectiveId.name, enc(td.collective)),
(Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson),
(Field.itemName.name, Json.fromString(td.name.getOrElse(""))), (Field.itemName.name, Json.fromString(td.name.getOrElse(""))),
(Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))), (Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))),
(Field.discriminator.name, Json.fromString("item")) (Field.discriminator.name, Json.fromString("item"))
@ -49,6 +53,18 @@ trait JsonCodec {
): Encoder[TextData] = ): Encoder[TextData] =
Encoder(_.fold(ae.apply, ie.apply)) Encoder(_.fold(ae.apply, ie.apply))
implicit def docIdResultsDecoder: Decoder[DocIdResult] =
new Decoder[DocIdResult] {
final def apply(c: HCursor): Decoder.Result[DocIdResult] =
c.downField("response")
.downField("docs")
.values
.getOrElse(Nil)
.toList
.traverse(_.hcursor.get[Ident](Field.id.name))
.map(DocIdResult.apply)
}
implicit def ftsResultDecoder: Decoder[FtsResult] = implicit def ftsResultDecoder: Decoder[FtsResult] =
new Decoder[FtsResult] { new Decoder[FtsResult] {
final def apply(c: HCursor): Decoder.Result[FtsResult] = final def apply(c: HCursor): Decoder.Result[FtsResult] =
@ -89,6 +105,12 @@ trait JsonCodec {
} yield md } yield md
} }
implicit def decodeEverythingToUnit: Decoder[Unit] =
new Decoder[Unit] {
final def apply(c: HCursor): Decoder.Result[Unit] =
Right(())
}
implicit def identKeyEncoder: KeyEncoder[Ident] = implicit def identKeyEncoder: KeyEncoder[Ident] =
new KeyEncoder[Ident] { new KeyEncoder[Ident] {
override def apply(ident: Ident): String = ident.id override def apply(ident: Ident): String = ident.id
@ -129,9 +151,24 @@ trait JsonCodec {
} }
} }
implicit def textDataEncoder: Encoder[SetFields] = implicit def setTextDataFieldsEncoder: Encoder[SetFields] =
Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply)) Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply))
implicit def setFolderEncoder(implicit
enc: Encoder[Option[Ident]]
): Encoder[SetFolder] =
new Encoder[SetFolder] {
final def apply(td: SetFolder): Json =
Json.fromFields(
List(
(Field.id.name, td.docId.asJson),
(
Field.folderId.name,
Map("set" -> td.folder.asJson).asJson
)
)
)
}
} }
object JsonCodec extends JsonCodec object JsonCodec extends JsonCodec

View File

@ -40,16 +40,26 @@ object QueryData {
fields: List[Field], fields: List[Field],
fq: FtsQuery fq: FtsQuery
): QueryData = { ): QueryData = {
val q = sanitize(fq.q) val q = sanitize(fq.q)
val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ") val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ")
val items = fq.items.map(_.id).mkString(" ") val items = fq.items.map(_.id).mkString(" ")
val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}"""" val folders = fq.folders.map(_.id).mkString(" ")
val filterQ = fq.items match { val filterQ = List(
case s if s.isEmpty => s"""${Field.collectiveId.name}:"${fq.collective.id}"""",
collQ fq.items match {
case _ => case s if s.isEmpty =>
(collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ") ""
} case _ =>
s"""${Field.itemId.name}:($items)"""
},
fq.folders match {
case s if s.isEmpty =>
""
case _ =>
s"""${Field.folderId.name}:($folders) OR (*:* NOT ${Field.folderId.name}:*)"""
}
).filterNot(_.isEmpty).map(t => s"($t)").mkString(" AND ")
QueryData( QueryData(
extQ, extQ,
filterQ, filterQ,

View File

@ -0,0 +1,5 @@
package docspell.ftssolr
import docspell.common._
final case class SetFolder(docId: Ident, folder: Option[Ident])

View File

@ -29,6 +29,17 @@ final class SolrFtsClient[F[_]: Effect](
def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] =
modifyIndex(logger, data)(solrUpdate.update) modifyIndex(logger, data)(solrUpdate.update)
def updateFolder(
logger: Logger[F],
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] =
logger.debug(
s"Update folder in solr index for coll/item ${collective.id}/${itemId.id}"
) *>
solrUpdate.updateFolder(itemId, collective, folder)
def modifyIndex(logger: Logger[F], data: Stream[F, TextData])( def modifyIndex(logger: Logger[F], data: Stream[F, TextData])(
f: List[TextData] => F[Unit] f: List[TextData] => F[Unit]
): F[Unit] = ): F[Unit] =

View File

@ -1,7 +1,9 @@
package docspell.ftssolr package docspell.ftssolr
import cats.effect._ import cats.effect._
import cats.implicits._
import docspell.common._
import docspell.ftsclient._ import docspell.ftsclient._
import docspell.ftssolr.JsonCodec._ import docspell.ftssolr.JsonCodec._
@ -11,6 +13,7 @@ import org.http4s._
import org.http4s.circe._ import org.http4s.circe._
import org.http4s.client.Client import org.http4s.client.Client
import org.http4s.client.dsl.Http4sClientDsl import org.http4s.client.dsl.Http4sClientDsl
import org.http4s.circe.CirceEntityDecoder._
trait SolrUpdate[F[_]] { trait SolrUpdate[F[_]] {
@ -18,6 +21,8 @@ trait SolrUpdate[F[_]] {
def update(tds: List[TextData]): F[Unit] def update(tds: List[TextData]): F[Unit]
def updateFolder(itemId: Ident, collective: Ident, folder: Option[Ident]): F[Unit]
def delete(q: String, commitWithin: Option[Int]): F[Unit] def delete(q: String, commitWithin: Option[Int]): F[Unit]
} }
@ -43,6 +48,29 @@ object SolrUpdate {
client.expect[Unit](req) client.expect[Unit](req)
} }
def updateFolder(
itemId: Ident,
collective: Ident,
folder: Option[Ident]
): F[Unit] = {
val queryUrl = Uri.unsafeFromString(cfg.url.asString) / "query"
val q = QueryData(
"*:*",
s"${Field.itemId.name}:${itemId.id} AND ${Field.collectiveId.name}:${collective.id}",
Int.MaxValue,
0,
List(Field.id),
Map.empty
)
val searchReq = Method.POST(q.asJson, queryUrl)
for {
docIds <- client.expect[DocIdResult](searchReq)
sets = docIds.toSetFolder(folder)
req = Method.POST(sets.asJson, url)
_ <- client.expect[Unit](req)
} yield ()
}
def delete(q: String, commitWithin: Option[Int]): F[Unit] = { def delete(q: String, commitWithin: Option[Int]): F[Unit] = {
val uri = commitWithin match { val uri = commitWithin match {
case Some(n) => case Some(n) =>

View File

@ -80,6 +80,7 @@ object FtsWork {
caa.item, caa.item,
caa.id, caa.id,
caa.collective, caa.collective,
caa.folder,
caa.lang, caa.lang,
caa.name, caa.name,
caa.content caa.content
@ -92,7 +93,9 @@ object FtsWork {
ctx.logger, ctx.logger,
ctx.store ctx.store
.transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5)) .transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5))
.map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes)) .map(nn =>
TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes)
)
) )
) )
) )

View File

@ -33,8 +33,13 @@ object TextExtraction {
) )
_ <- ctx.logger.debug("Storing extracted texts") _ <- ctx.logger.debug("Storing extracted texts")
_ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1))) _ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1)))
idxItem = idxItem = TextData.item(
TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None) item.item.id,
ctx.args.meta.collective,
None, //folder
item.item.name.some,
None
)
_ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*) _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*)
dur <- start dur <- start
_ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}") _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}")
@ -55,6 +60,7 @@ object TextExtraction {
item.item.id, item.item.id,
ra.id, ra.id,
collective, collective,
None, //folder
lang, lang,
ra.name, ra.name,
rm.content rm.content

View File

@ -145,6 +145,7 @@ object QAttachment {
id: Ident, id: Ident,
item: Ident, item: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
lang: Language, lang: Language,
name: Option[String], name: Option[String],
content: Option[String] content: Option[String]
@ -160,10 +161,11 @@ object QAttachment {
val mContent = RAttachmentMeta.Columns.content.prefix("m") val mContent = RAttachmentMeta.Columns.content.prefix("m")
val iId = RItem.Columns.id.prefix("i") val iId = RItem.Columns.id.prefix("i")
val iColl = RItem.Columns.cid.prefix("i") val iColl = RItem.Columns.cid.prefix("i")
val iFolder = RItem.Columns.folder.prefix("i")
val cId = RCollective.Columns.id.prefix("c") val cId = RCollective.Columns.id.prefix("c")
val cLang = RCollective.Columns.language.prefix("c") val cLang = RCollective.Columns.language.prefix("c")
val cols = Seq(aId, aItem, iColl, cLang, aName, mContent) val cols = Seq(aId, aItem, iColl, iFolder, cLang, aName, mContent)
val from = RAttachment.table ++ fr"a INNER JOIN" ++ val from = RAttachment.table ++ fr"a INNER JOIN" ++
RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++ RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++
fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++ fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++

View File

@ -270,6 +270,9 @@ object QFolder {
) )
} }
def getMemberFolders(account: AccountId): ConnectionIO[Set[Ident]] =
findMemberFolderIds(account).query[Ident].to[Set]
private def findUserId(account: AccountId): ConnectionIO[Option[Ident]] = private def findUserId(account: AccountId): ConnectionIO[Option[Ident]] =
RUser.findByAccount(account).map(_.map(_.uid)) RUser.findByAccount(account).map(_.map(_.uid))
} }

View File

@ -585,6 +585,7 @@ object QItem {
final case class NameAndNotes( final case class NameAndNotes(
id: Ident, id: Ident,
collective: Ident, collective: Ident,
folder: Option[Ident],
name: String, name: String,
notes: Option[String] notes: Option[String]
) )
@ -592,12 +593,13 @@ object QItem {
coll: Option[Ident], coll: Option[Ident],
chunkSize: Int chunkSize: Int
): Stream[ConnectionIO, NameAndNotes] = { ): Stream[ConnectionIO, NameAndNotes] = {
val iId = RItem.Columns.id val iId = RItem.Columns.id
val iColl = RItem.Columns.cid val iColl = RItem.Columns.cid
val iName = RItem.Columns.name val iName = RItem.Columns.name
val iNotes = RItem.Columns.notes val iFolder = RItem.Columns.folder
val iNotes = RItem.Columns.notes
val cols = Seq(iId, iColl, iName, iNotes) val cols = Seq(iId, iColl, iFolder, iName, iNotes)
val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty) val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty)
selectSimple(cols, RItem.table, where) selectSimple(cols, RItem.table, where)
.query[NameAndNotes] .query[NameAndNotes]

View File

@ -247,7 +247,11 @@ object RItem {
).update.run ).update.run
} yield n } yield n
def updateFolder(itemId: Ident, coll: Ident, folderId: Option[Ident]): ConnectionIO[Int] = def updateFolder(
itemId: Ident,
coll: Ident,
folderId: Option[Ident]
): ConnectionIO[Int] =
for { for {
t <- currentTime t <- currentTime
n <- updateRow( n <- updateRow(