mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 10:28:27 +00:00
Control what tag categories to use for auto-tagging
This commit is contained in:
@ -4,9 +4,7 @@ import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common.Ident
|
||||
import docspell.store.qb.DSL._
|
||||
import docspell.store.qb._
|
||||
import docspell.store.records.{RClassifierModel, RTag}
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting}
|
||||
|
||||
import doobie._
|
||||
|
||||
@ -16,7 +14,7 @@ object ClassifierName {
|
||||
def apply(name: String): ClassifierName =
|
||||
new ClassifierName(name)
|
||||
|
||||
val categoryPrefix = "tagcategory-"
|
||||
private val categoryPrefix = "tagcategory-"
|
||||
|
||||
def tagCategory(cat: String): ClassifierName =
|
||||
apply(s"${categoryPrefix}${cat}")
|
||||
@ -35,7 +33,7 @@ object ClassifierName {
|
||||
|
||||
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
categories <- RTag.listCategories(coll)
|
||||
categories <- RClassifierSetting.getActiveCategories(coll)
|
||||
models <- NonEmptyList.fromList(categories) match {
|
||||
case Some(nel) =>
|
||||
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
@ -44,22 +42,20 @@ object ClassifierName {
|
||||
}
|
||||
} yield models
|
||||
|
||||
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = {
|
||||
val model = RClassifierModel.as("m")
|
||||
val tag = RTag.as("t")
|
||||
val sql =
|
||||
Select(
|
||||
select(model.all),
|
||||
from(model),
|
||||
model.cid === coll && model.name.notIn(
|
||||
Select(
|
||||
select(concat(lit(categoryPrefix), tag.category.s)),
|
||||
from(tag),
|
||||
tag.cid === coll && tag.category.isNotNull
|
||||
).distinct
|
||||
)
|
||||
).build
|
||||
sql.query[RClassifierModel].to[List]
|
||||
}
|
||||
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
cats <- RClassifierSetting.getActiveCategories(coll)
|
||||
allModels = RClassifierModel.findAllByQuery(coll, s"${categoryPrefix}%")
|
||||
result <- NonEmptyList.fromList(cats) match {
|
||||
case Some(nel) =>
|
||||
allModels.flatMap(all =>
|
||||
RClassifierModel
|
||||
.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
.map(active => all.diff(active))
|
||||
)
|
||||
case None =>
|
||||
allModels
|
||||
}
|
||||
} yield result
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ import docspell.backend.ops.OCollective
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag}
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting}
|
||||
|
||||
object LearnClassifierTask {
|
||||
val pageSep = " --n-- "
|
||||
@ -26,15 +26,23 @@ object LearnClassifierTask {
|
||||
analyser: TextAnalyser[F]
|
||||
): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
sett <- findActiveSettings[F](ctx, cfg)
|
||||
maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
|
||||
_ <- OptionT.liftF(
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
_ <- OptionT.liftF(clearObsoleteModels(ctx))
|
||||
} yield ())
|
||||
.getOrElseF(logInactiveWarning(ctx.logger))
|
||||
val learnTags =
|
||||
for {
|
||||
sett <- findActiveSettings[F](ctx, cfg)
|
||||
maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
|
||||
_ <- OptionT.liftF(
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
} yield ()
|
||||
|
||||
// learn classifier models from active tag categories
|
||||
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
|
||||
// delete classifier model files for categories that have been removed
|
||||
clearObsoleteTagModels(ctx) *>
|
||||
// when tags are deleted, categories may get removed. fix the json array
|
||||
ctx.store
|
||||
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
|
||||
.map(_ => ())
|
||||
}
|
||||
|
||||
def learnTagCategory[F[_]: Sync: ContextShift, A](
|
||||
@ -64,13 +72,13 @@ object LearnClassifierTask {
|
||||
): Task[F, A, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
cats <- ctx.store.transact(RTag.listCategories(collective))
|
||||
cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective))
|
||||
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
||||
_ <- cats.map(task).traverse(_.run(ctx))
|
||||
} yield ()
|
||||
}
|
||||
|
||||
private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||
private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||
for {
|
||||
list <- ctx.store.transact(
|
||||
ClassifierName.findOrphanTagModels(ctx.args.collective)
|
||||
@ -98,6 +106,6 @@ object LearnClassifierTask {
|
||||
|
||||
private def logInactiveWarning[F[_]: Sync](logger: Logger[F]): F[Unit] =
|
||||
logger.warn(
|
||||
"Classification is disabled. Check joex config and the collective settings."
|
||||
"Auto-tagging is disabled. Check joex config and the collective settings."
|
||||
)
|
||||
}
|
||||
|
Reference in New Issue
Block a user