Use keywords in pdfs to search for existing tags

During processing, keywords stored in PDF metadata are used to look
them up in the tag database and associate any existing tags to the
item.

See #175
This commit is contained in:
Eike Kettner
2020-07-19 00:28:04 +02:00
parent da68405f9b
commit 209c068436
14 changed files with 184 additions and 64 deletions

View File

@ -53,6 +53,9 @@ case class Column(name: String, ns: String = "", alias: String = "") {
def isIn[A: Put](values: NonEmptyList[A]): Fragment =
isIn(values.map(a => sql"$a").toList)
def isLowerIn[A: Put](values: NonEmptyList[A]): Fragment =
fr"lower(" ++ f ++ fr") IN (" ++ commas(values.map(a => sql"$a").toList) ++ fr")"
def isIn(frag: Fragment): Fragment =
f ++ fr"IN (" ++ frag ++ fr")"

View File

@ -314,6 +314,9 @@ object RItem {
def findByIdAndCollective(itemId: Ident, coll: Ident): ConnectionIO[Option[RItem]] =
selectSimple(all, table, and(id.is(itemId), cid.is(coll))).query[RItem].option
def checkByIdAndCollective(itemId: Ident, coll: Ident): ConnectionIO[Option[Ident]] =
selectSimple(Seq(id), table, and(id.is(itemId), cid.is(coll))).query[Ident].option
def removeFolder(folderId: Ident): ConnectionIO[Int] = {
val empty: Option[Ident] = None
updateRow(table, folder.is(folderId), folder.setTo(empty)).update.run

View File

@ -1,5 +1,8 @@
package docspell.store.records
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common._
import docspell.store.impl.Implicits._
import docspell.store.impl._
@ -101,6 +104,21 @@ object RTag {
) ++ orderBy(name.prefix("t").asc)).query[RTag].to[Vector]
}
def findAllByNameOrId(
nameOrIds: List[String],
coll: Ident
): ConnectionIO[Vector[RTag]] = {
val idList =
NonEmptyList.fromList(nameOrIds.flatMap(s => Ident.fromString(s).toOption)).toSeq
val nameList = NonEmptyList.fromList(nameOrIds.map(_.toLowerCase)).toSeq
val cond = idList.flatMap(ids => Seq(tid.isIn(ids))) ++
nameList.flatMap(ns => Seq(name.isLowerIn(ns)))
if (cond.isEmpty) Vector.empty.pure[ConnectionIO]
else selectSimple(all, table, and(cid.is(coll), or(cond))).query[RTag].to[Vector]
}
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
deleteFrom(table, and(tid.is(tagId), cid.is(coll))).update.run
}

View File

@ -1,5 +1,6 @@
package docspell.store.records
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common._
@ -43,4 +44,28 @@ object RTagItem {
def findByItem(item: Ident): ConnectionIO[Vector[RTagItem]] =
selectSimple(all, table, itemId.is(item)).query[RTagItem].to[Vector]
def findAllIn(item: Ident, tags: Seq[Ident]): ConnectionIO[Vector[RTagItem]] =
NonEmptyList.fromList(tags.toList) match {
case Some(nel) =>
selectSimple(all, table, and(itemId.is(item), tagId.isIn(nel)))
.query[RTagItem]
.to[Vector]
case None =>
Vector.empty.pure[ConnectionIO]
}
def setAllTags(item: Ident, tags: Seq[Ident]): ConnectionIO[Int] =
if (tags.isEmpty) 0.pure[ConnectionIO]
else
for {
entities <- tags.toList.traverse(tagId =>
Ident.randomId[ConnectionIO].map(id => RTagItem(id, item, tagId))
)
n <- insertRows(
table,
all,
entities.map(v => fr"${v.tagItemId},${v.itemId},${v.tagId}")
).update.run
} yield n
}