mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 18:39:33 +00:00
Exclude tags w/o category from classifying; remove obsolete models
This commit is contained in:
parent
3e28ce1254
commit
cce8878898
@ -2,8 +2,12 @@ package docspell.joex.learn
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common.Ident
|
||||
import docspell.store.qb.DSL._
|
||||
import docspell.store.qb._
|
||||
import docspell.store.records.{RClassifierModel, RTag}
|
||||
|
||||
import doobie._
|
||||
|
||||
final class ClassifierName(val name: String) extends AnyVal
|
||||
@ -12,9 +16,6 @@ object ClassifierName {
|
||||
def apply(name: String): ClassifierName =
|
||||
new ClassifierName(name)
|
||||
|
||||
val noCategory: ClassifierName =
|
||||
apply("__docspell_no_category__")
|
||||
|
||||
val categoryPrefix = "tagcategory-"
|
||||
|
||||
def tagCategory(cat: String): ClassifierName =
|
||||
@ -34,7 +35,7 @@ object ClassifierName {
|
||||
|
||||
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
categories <- RTag.listCategories(coll, noCategory.name)
|
||||
categories <- RTag.listCategories(coll)
|
||||
models <- NonEmptyList.fromList(categories) match {
|
||||
case Some(nel) =>
|
||||
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
@ -42,4 +43,23 @@ object ClassifierName {
|
||||
List.empty[RClassifierModel].pure[ConnectionIO]
|
||||
}
|
||||
} yield models
|
||||
|
||||
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = {
|
||||
val model = RClassifierModel.as("m")
|
||||
val tag = RTag.as("t")
|
||||
val sql =
|
||||
Select(
|
||||
select(model.all),
|
||||
from(model),
|
||||
model.cid === coll && model.name.notIn(
|
||||
Select(
|
||||
select(concat(lit(categoryPrefix), tag.category.s)),
|
||||
from(tag),
|
||||
tag.cid === coll && tag.category.isNotNull
|
||||
).distinct
|
||||
)
|
||||
).build
|
||||
sql.query[RClassifierModel].to[List]
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,12 +4,13 @@ import cats.data.Kleisli
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.backend.ops.OCollective
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.records.{RClassifierSetting, RTag}
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag}
|
||||
|
||||
object LearnClassifierTask {
|
||||
val pageSep = " --n-- "
|
||||
@ -31,6 +32,7 @@ object LearnClassifierTask {
|
||||
_ <- OptionT.liftF(
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
_ <- OptionT.liftF(clearObsoleteModels(ctx))
|
||||
} yield ())
|
||||
.getOrElseF(logInactiveWarning(ctx.logger))
|
||||
}
|
||||
@ -62,14 +64,27 @@ object LearnClassifierTask {
|
||||
): Task[F, A, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
cats <- ctx.store.transact(
|
||||
RTag.listCategories(collective, ClassifierName.noCategory.name)
|
||||
)
|
||||
cats <- ctx.store.transact(RTag.listCategories(collective))
|
||||
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
||||
_ <- cats.map(task).traverse(_.run(ctx))
|
||||
} yield ()
|
||||
}
|
||||
|
||||
private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||
for {
|
||||
list <- ctx.store.transact(
|
||||
ClassifierName.findOrphanTagModels(ctx.args.collective)
|
||||
)
|
||||
_ <- ctx.logger.info(
|
||||
s"Found ${list.size} obsolete model files that are deleted now."
|
||||
)
|
||||
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id)))
|
||||
_ <- list
|
||||
.map(_.fileId.id)
|
||||
.traverse(id => ctx.store.bitpeace.delete(id).compile.drain)
|
||||
_ <- ctx.logger.debug(s"Deleted $n model files.")
|
||||
} yield ()
|
||||
|
||||
private def findActiveSettings[F[_]: Sync](
|
||||
ctx: Context[F, Args],
|
||||
cfg: Config.TextAnalysis
|
||||
|
@ -1,7 +1,7 @@
|
||||
package docspell.store.records
|
||||
|
||||
import cats.effect._
|
||||
import cats.data.NonEmptyList
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
@ -63,6 +63,17 @@ object RClassifierModel {
|
||||
else 0.pure[ConnectionIO]
|
||||
} yield n + k
|
||||
|
||||
def deleteById(id: Ident): ConnectionIO[Int] =
|
||||
DML.delete(T, T.id === id)
|
||||
|
||||
def deleteAll(ids: List[Ident]): ConnectionIO[Int] =
|
||||
NonEmptyList.fromList(ids) match {
|
||||
case Some(nel) =>
|
||||
DML.delete(T, T.id.in(nel))
|
||||
case None =>
|
||||
0.pure[ConnectionIO]
|
||||
}
|
||||
|
||||
def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
|
||||
Select(select(T.all), from(T), T.cid === cid && T.name === name).build
|
||||
.query[RClassifierModel]
|
||||
@ -75,4 +86,5 @@ object RClassifierModel {
|
||||
Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
|
||||
.query[RClassifierModel]
|
||||
.to[List]
|
||||
|
||||
}
|
||||
|
@ -148,11 +148,11 @@ object RTag {
|
||||
).orderBy(T.name.asc).build.query[RTag].to[List]
|
||||
}
|
||||
|
||||
def listCategories(coll: Ident, fallback: String): ConnectionIO[List[String]] =
|
||||
def listCategories(coll: Ident): ConnectionIO[List[String]] =
|
||||
Select(
|
||||
coalesce(T.category.s, lit(fallback)).s,
|
||||
T.category.s,
|
||||
from(T),
|
||||
T.cid === coll
|
||||
T.cid === coll && T.category.isNotNull
|
||||
).distinct.build.query[String].to[List]
|
||||
|
||||
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
|
||||
|
Loading…
x
Reference in New Issue
Block a user