From 22fa1dba13ddeed8f466489a3e4d48425d184f37 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sun, 12 Jul 2020 13:44:11 +0200 Subject: [PATCH] Apply folder restriction to fulltext only search And update index when folder changes. --- .../docspell/backend/ops/OFulltext.scala | 7 +++- .../scala/docspell/backend/ops/OItem.scala | 3 ++ .../scala/docspell/ftsclient/FtsClient.scala | 20 +++++++++- .../scala/docspell/ftsclient/FtsQuery.scala | 8 ++++ .../scala/docspell/ftsclient/TextData.scala | 10 ++++- .../scala/docspell/ftssolr/DocIdResult.scala | 9 +++++ .../scala/docspell/ftssolr/JsonCodec.scala | 39 ++++++++++++++++++- .../scala/docspell/ftssolr/QueryData.scala | 30 +++++++++----- .../scala/docspell/ftssolr/SetFolder.scala | 5 +++ .../docspell/ftssolr/SolrFtsClient.scala | 11 ++++++ .../scala/docspell/ftssolr/SolrUpdate.scala | 28 +++++++++++++ .../scala/docspell/joex/fts/FtsWork.scala | 5 ++- .../joex/process/TextExtraction.scala | 10 ++++- .../docspell/store/queries/QAttachment.scala | 4 +- .../docspell/store/queries/QFolder.scala | 3 ++ .../scala/docspell/store/queries/QItem.scala | 12 +++--- .../scala/docspell/store/records/RItem.scala | 6 ++- 17 files changed, 183 insertions(+), 27 deletions(-) create mode 100644 modules/fts-solr/src/main/scala/docspell/ftssolr/DocIdResult.scala create mode 100644 modules/fts-solr/src/main/scala/docspell/ftssolr/SetFolder.scala diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala index 5e32ad5d..1f88d740 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OFulltext.scala @@ -9,7 +9,7 @@ import docspell.backend.ops.OItemSearch._ import docspell.common._ import docspell.ftsclient._ import docspell.store.Store -import docspell.store.queries.QItem +import docspell.store.queries.{QFolder, QItem} import docspell.store.queue.JobQueue import docspell.store.records.RJob @@ -101,12 +101,14 @@ object OFulltext { ftsQ.query, account.collective, Set.empty, + Set.empty, batch.limit, batch.offset, FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) ) for { - ftsR <- fts.search(fq) + folders <- store.transact(QFolder.getMemberFolders(account)) + ftsR <- fts.search(fq.withFolders(folders)) ftsItems = ftsR.results.groupBy(_.itemId) select = ftsR.results.map(r => QItem.SelectedItem(r.itemId, r.score)).toSet itemsWithTags <- @@ -184,6 +186,7 @@ object OFulltext { ftsQ.query, q.account.collective, Set.empty, + Set.empty, 0, 0, FtsQuery.HighlightSetting(ftsQ.highlightPre, ftsQ.highlightPost) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index f51e7bd3..d17b453b 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -142,6 +142,9 @@ object OItem { .transact(RItem.updateFolder(item, collective, folder)) .attempt .map(AddResult.fromUpdate) + .flatTap( + onSuccessIgnoreError(fts.updateFolder(logger, item, collective, folder)) + ) def setCorrOrg(item: Ident, org: Option[Ident], collective: Ident): F[AddResult] = store diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala index b3bdcf9a..dcf2d88f 100644 --- a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsClient.scala @@ -58,7 +58,7 @@ trait FtsClient[F[_]] { collective: Ident, name: String ): F[Unit] = - updateIndex(logger, TextData.item(itemId, collective, Some(name), None)) + updateIndex(logger, TextData.item(itemId, collective, None, Some(name), None)) def updateItemNotes( logger: Logger[F], @@ -68,7 +68,7 @@ trait FtsClient[F[_]] { ): F[Unit] = updateIndex( logger, - TextData.item(itemId, collective, None, Some(notes.getOrElse(""))) + TextData.item(itemId, collective, None, None, Some(notes.getOrElse(""))) ) def updateAttachmentName( @@ -84,12 +84,20 @@ trait FtsClient[F[_]] { itemId, attachId, collective, + None, Language.English, Some(name.getOrElse("")), None ) ) + def updateFolder( + logger: Logger[F], + itemId: Ident, + collective: Ident, + folder: Option[Ident] + ): F[Unit] + def removeItem(logger: Logger[F], itemId: Ident): F[Unit] def removeAttachment(logger: Logger[F], attachId: Ident): F[Unit] @@ -117,6 +125,14 @@ object FtsClient { def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = logger.warn("Full-text search is disabled!") + def updateFolder( + logger: Logger[F], + itemId: Ident, + collective: Ident, + folder: Option[Ident] + ): F[Unit] = + logger.warn("Full-text search is disabled!") + def indexData(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = logger.warn("Full-text search is disabled!") diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala index 785d2e20..f5027867 100644 --- a/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/FtsQuery.scala @@ -10,11 +10,16 @@ import docspell.common._ * Searches must only look for given collective and in the given list * of item ids, if it is non-empty. If the item set is empty, then * don't restrict the result in this way. + * + * The set of folders must be used to restrict the results only to + * items that have one of the folders set or no folder set. If the + * set is empty, the restriction does not apply. */ final case class FtsQuery( q: String, collective: Ident, items: Set[Ident], + folders: Set[Ident], limit: Int, offset: Int, highlight: FtsQuery.HighlightSetting @@ -22,6 +27,9 @@ final case class FtsQuery( def nextPage: FtsQuery = copy(offset = limit + offset) + + def withFolders(fs: Set[Ident]): FtsQuery = + copy(folders = fs) } object FtsQuery { diff --git a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala index 625411ad..3f043599 100644 --- a/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala +++ b/modules/fts-client/src/main/scala/docspell/ftsclient/TextData.scala @@ -10,6 +10,8 @@ sealed trait TextData { def collective: Ident + def folder: Option[Ident] + final def fold[A](f: TextData.Attachment => A, g: TextData.Item => A): A = this match { case a: TextData.Attachment => f(a) @@ -23,6 +25,7 @@ object TextData { item: Ident, attachId: Ident, collective: Ident, + folder: Option[Ident], lang: Language, name: Option[String], text: Option[String] @@ -36,15 +39,17 @@ object TextData { item: Ident, attachId: Ident, collective: Ident, + folder: Option[Ident], lang: Language, name: Option[String], text: Option[String] ): TextData = - Attachment(item, attachId, collective, lang, name, text) + Attachment(item, attachId, collective, folder, lang, name, text) final case class Item( item: Ident, collective: Ident, + folder: Option[Ident], name: Option[String], notes: Option[String] ) extends TextData { @@ -56,8 +61,9 @@ object TextData { def item( item: Ident, collective: Ident, + folder: Option[Ident], name: Option[String], notes: Option[String] ): TextData = - Item(item, collective, name, notes) + Item(item, collective, folder, name, notes) } diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/DocIdResult.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/DocIdResult.scala new file mode 100644 index 00000000..a6070443 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/DocIdResult.scala @@ -0,0 +1,9 @@ +package docspell.ftssolr + +import docspell.common._ + +final case class DocIdResult(ids: List[Ident]) { + + def toSetFolder(folder: Option[Ident]): List[SetFolder] = + ids.map(id => SetFolder(id, folder)) +} diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala index e532bf6b..4c639668 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/JsonCodec.scala @@ -1,5 +1,7 @@ package docspell.ftssolr +import cats.implicits._ + import docspell.common._ import docspell.ftsclient._ @@ -21,6 +23,7 @@ trait JsonCodec { (Field.id.name, enc(td.id)), (Field.itemId.name, enc(td.item)), (Field.collectiveId.name, enc(td.collective)), + (Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson), (Field.attachmentId.name, enc(td.attachId)), (Field.attachmentName.name, Json.fromString(td.name.getOrElse(""))), (Field.discriminator.name, Json.fromString("attachment")) @@ -37,6 +40,7 @@ trait JsonCodec { (Field.id.name, enc(td.id)), (Field.itemId.name, enc(td.item)), (Field.collectiveId.name, enc(td.collective)), + (Field.folderId.name, td.folder.getOrElse(Ident.unsafe("")).asJson), (Field.itemName.name, Json.fromString(td.name.getOrElse(""))), (Field.itemNotes.name, Json.fromString(td.notes.getOrElse(""))), (Field.discriminator.name, Json.fromString("item")) @@ -49,6 +53,18 @@ trait JsonCodec { ): Encoder[TextData] = Encoder(_.fold(ae.apply, ie.apply)) + implicit def docIdResultsDecoder: Decoder[DocIdResult] = + new Decoder[DocIdResult] { + final def apply(c: HCursor): Decoder.Result[DocIdResult] = + c.downField("response") + .downField("docs") + .values + .getOrElse(Nil) + .toList + .traverse(_.hcursor.get[Ident](Field.id.name)) + .map(DocIdResult.apply) + } + implicit def ftsResultDecoder: Decoder[FtsResult] = new Decoder[FtsResult] { final def apply(c: HCursor): Decoder.Result[FtsResult] = @@ -89,6 +105,12 @@ trait JsonCodec { } yield md } + implicit def decodeEverythingToUnit: Decoder[Unit] = + new Decoder[Unit] { + final def apply(c: HCursor): Decoder.Result[Unit] = + Right(()) + } + implicit def identKeyEncoder: KeyEncoder[Ident] = new KeyEncoder[Ident] { override def apply(ident: Ident): String = ident.id @@ -129,9 +151,24 @@ trait JsonCodec { } } - implicit def textDataEncoder: Encoder[SetFields] = + implicit def setTextDataFieldsEncoder: Encoder[SetFields] = Encoder(_.td.fold(setAttachmentEncoder.apply, setItemEncoder.apply)) + implicit def setFolderEncoder(implicit + enc: Encoder[Option[Ident]] + ): Encoder[SetFolder] = + new Encoder[SetFolder] { + final def apply(td: SetFolder): Json = + Json.fromFields( + List( + (Field.id.name, td.docId.asJson), + ( + Field.folderId.name, + Map("set" -> td.folder.asJson).asJson + ) + ) + ) + } } object JsonCodec extends JsonCodec diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala index 1ca3e483..0c332630 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/QueryData.scala @@ -40,16 +40,26 @@ object QueryData { fields: List[Field], fq: FtsQuery ): QueryData = { - val q = sanitize(fq.q) - val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ") - val items = fq.items.map(_.id).mkString(" ") - val collQ = s"""${Field.collectiveId.name}:"${fq.collective.id}"""" - val filterQ = fq.items match { - case s if s.isEmpty => - collQ - case _ => - (collQ :: List(s"""${Field.itemId.name}:($items)""")).mkString(" AND ") - } + val q = sanitize(fq.q) + val extQ = search.map(f => s"${f.name}:($q)").mkString(" OR ") + val items = fq.items.map(_.id).mkString(" ") + val folders = fq.folders.map(_.id).mkString(" ") + val filterQ = List( + s"""${Field.collectiveId.name}:"${fq.collective.id}"""", + fq.items match { + case s if s.isEmpty => + "" + case _ => + s"""${Field.itemId.name}:($items)""" + }, + fq.folders match { + case s if s.isEmpty => + "" + case _ => + s"""${Field.folderId.name}:($folders) OR (*:* NOT ${Field.folderId.name}:*)""" + } + ).filterNot(_.isEmpty).map(t => s"($t)").mkString(" AND ") + QueryData( extQ, filterQ, diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFolder.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFolder.scala new file mode 100644 index 00000000..5dedb968 --- /dev/null +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SetFolder.scala @@ -0,0 +1,5 @@ +package docspell.ftssolr + +import docspell.common._ + +final case class SetFolder(docId: Ident, folder: Option[Ident]) diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala index c0994328..f8f7fd3b 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrFtsClient.scala @@ -29,6 +29,17 @@ final class SolrFtsClient[F[_]: Effect]( def updateIndex(logger: Logger[F], data: Stream[F, TextData]): F[Unit] = modifyIndex(logger, data)(solrUpdate.update) + def updateFolder( + logger: Logger[F], + itemId: Ident, + collective: Ident, + folder: Option[Ident] + ): F[Unit] = + logger.debug( + s"Update folder in solr index for coll/item ${collective.id}/${itemId.id}" + ) *> + solrUpdate.updateFolder(itemId, collective, folder) + def modifyIndex(logger: Logger[F], data: Stream[F, TextData])( f: List[TextData] => F[Unit] ): F[Unit] = diff --git a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala index 88089d51..616f7b16 100644 --- a/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala +++ b/modules/fts-solr/src/main/scala/docspell/ftssolr/SolrUpdate.scala @@ -1,7 +1,9 @@ package docspell.ftssolr import cats.effect._ +import cats.implicits._ +import docspell.common._ import docspell.ftsclient._ import docspell.ftssolr.JsonCodec._ @@ -11,6 +13,7 @@ import org.http4s._ import org.http4s.circe._ import org.http4s.client.Client import org.http4s.client.dsl.Http4sClientDsl +import org.http4s.circe.CirceEntityDecoder._ trait SolrUpdate[F[_]] { @@ -18,6 +21,8 @@ trait SolrUpdate[F[_]] { def update(tds: List[TextData]): F[Unit] + def updateFolder(itemId: Ident, collective: Ident, folder: Option[Ident]): F[Unit] + def delete(q: String, commitWithin: Option[Int]): F[Unit] } @@ -43,6 +48,29 @@ object SolrUpdate { client.expect[Unit](req) } + def updateFolder( + itemId: Ident, + collective: Ident, + folder: Option[Ident] + ): F[Unit] = { + val queryUrl = Uri.unsafeFromString(cfg.url.asString) / "query" + val q = QueryData( + "*:*", + s"${Field.itemId.name}:${itemId.id} AND ${Field.collectiveId.name}:${collective.id}", + Int.MaxValue, + 0, + List(Field.id), + Map.empty + ) + val searchReq = Method.POST(q.asJson, queryUrl) + for { + docIds <- client.expect[DocIdResult](searchReq) + sets = docIds.toSetFolder(folder) + req = Method.POST(sets.asJson, url) + _ <- client.expect[Unit](req) + } yield () + } + def delete(q: String, commitWithin: Option[Int]): F[Unit] = { val uri = commitWithin match { case Some(n) => diff --git a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala index fc3c77b3..43d34ae4 100644 --- a/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala +++ b/modules/joex/src/main/scala/docspell/joex/fts/FtsWork.scala @@ -80,6 +80,7 @@ object FtsWork { caa.item, caa.id, caa.collective, + caa.folder, caa.lang, caa.name, caa.content @@ -92,7 +93,9 @@ object FtsWork { ctx.logger, ctx.store .transact(QItem.allNameAndNotes(coll, ctx.cfg.migration.indexAllChunk * 5)) - .map(nn => TextData.item(nn.id, nn.collective, Option(nn.name), nn.notes)) + .map(nn => + TextData.item(nn.id, nn.collective, nn.folder, Option(nn.name), nn.notes) + ) ) ) ) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index 23024d4e..912507a5 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -33,8 +33,13 @@ object TextExtraction { ) _ <- ctx.logger.debug("Storing extracted texts") _ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1))) - idxItem = - TextData.item(item.item.id, ctx.args.meta.collective, item.item.name.some, None) + idxItem = TextData.item( + item.item.id, + ctx.args.meta.collective, + None, //folder + item.item.name.some, + None + ) _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*) dur <- start _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}") @@ -55,6 +60,7 @@ object TextExtraction { item.item.id, ra.id, collective, + None, //folder lang, ra.name, rm.content diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index 81c734c3..b0a479ce 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -145,6 +145,7 @@ object QAttachment { id: Ident, item: Ident, collective: Ident, + folder: Option[Ident], lang: Language, name: Option[String], content: Option[String] @@ -160,10 +161,11 @@ object QAttachment { val mContent = RAttachmentMeta.Columns.content.prefix("m") val iId = RItem.Columns.id.prefix("i") val iColl = RItem.Columns.cid.prefix("i") + val iFolder = RItem.Columns.folder.prefix("i") val cId = RCollective.Columns.id.prefix("c") val cLang = RCollective.Columns.language.prefix("c") - val cols = Seq(aId, aItem, iColl, cLang, aName, mContent) + val cols = Seq(aId, aItem, iColl, iFolder, cLang, aName, mContent) val from = RAttachment.table ++ fr"a INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ aId.is(mId) ++ fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ iId.is(aItem) ++ diff --git a/modules/store/src/main/scala/docspell/store/queries/QFolder.scala b/modules/store/src/main/scala/docspell/store/queries/QFolder.scala index 8f8b50a8..e613f6e9 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QFolder.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QFolder.scala @@ -270,6 +270,9 @@ object QFolder { ) } + def getMemberFolders(account: AccountId): ConnectionIO[Set[Ident]] = + findMemberFolderIds(account).query[Ident].to[Set] + private def findUserId(account: AccountId): ConnectionIO[Option[Ident]] = RUser.findByAccount(account).map(_.map(_.uid)) } diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index 99415125..bc6dc7ce 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -585,6 +585,7 @@ object QItem { final case class NameAndNotes( id: Ident, collective: Ident, + folder: Option[Ident], name: String, notes: Option[String] ) @@ -592,12 +593,13 @@ object QItem { coll: Option[Ident], chunkSize: Int ): Stream[ConnectionIO, NameAndNotes] = { - val iId = RItem.Columns.id - val iColl = RItem.Columns.cid - val iName = RItem.Columns.name - val iNotes = RItem.Columns.notes + val iId = RItem.Columns.id + val iColl = RItem.Columns.cid + val iName = RItem.Columns.name + val iFolder = RItem.Columns.folder + val iNotes = RItem.Columns.notes - val cols = Seq(iId, iColl, iName, iNotes) + val cols = Seq(iId, iColl, iFolder, iName, iNotes) val where = coll.map(cid => iColl.is(cid)).getOrElse(Fragment.empty) selectSimple(cols, RItem.table, where) .query[NameAndNotes] diff --git a/modules/store/src/main/scala/docspell/store/records/RItem.scala b/modules/store/src/main/scala/docspell/store/records/RItem.scala index ea40ec30..97b87d84 100644 --- a/modules/store/src/main/scala/docspell/store/records/RItem.scala +++ b/modules/store/src/main/scala/docspell/store/records/RItem.scala @@ -247,7 +247,11 @@ object RItem { ).update.run } yield n - def updateFolder(itemId: Ident, coll: Ident, folderId: Option[Ident]): ConnectionIO[Int] = + def updateFolder( + itemId: Ident, + coll: Ident, + folderId: Option[Ident] + ): ConnectionIO[Int] = for { t <- currentTime n <- updateRow(