mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-13 01:39:33 +00:00
Exclude tags w/o category from classifying; remove obsolete models
This commit is contained in:
parent
3e28ce1254
commit
cce8878898
@ -2,8 +2,12 @@ package docspell.joex.learn
|
|||||||
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common.Ident
|
import docspell.common.Ident
|
||||||
|
import docspell.store.qb.DSL._
|
||||||
|
import docspell.store.qb._
|
||||||
import docspell.store.records.{RClassifierModel, RTag}
|
import docspell.store.records.{RClassifierModel, RTag}
|
||||||
|
|
||||||
import doobie._
|
import doobie._
|
||||||
|
|
||||||
final class ClassifierName(val name: String) extends AnyVal
|
final class ClassifierName(val name: String) extends AnyVal
|
||||||
@ -12,9 +16,6 @@ object ClassifierName {
|
|||||||
def apply(name: String): ClassifierName =
|
def apply(name: String): ClassifierName =
|
||||||
new ClassifierName(name)
|
new ClassifierName(name)
|
||||||
|
|
||||||
val noCategory: ClassifierName =
|
|
||||||
apply("__docspell_no_category__")
|
|
||||||
|
|
||||||
val categoryPrefix = "tagcategory-"
|
val categoryPrefix = "tagcategory-"
|
||||||
|
|
||||||
def tagCategory(cat: String): ClassifierName =
|
def tagCategory(cat: String): ClassifierName =
|
||||||
@ -34,7 +35,7 @@ object ClassifierName {
|
|||||||
|
|
||||||
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||||
for {
|
for {
|
||||||
categories <- RTag.listCategories(coll, noCategory.name)
|
categories <- RTag.listCategories(coll)
|
||||||
models <- NonEmptyList.fromList(categories) match {
|
models <- NonEmptyList.fromList(categories) match {
|
||||||
case Some(nel) =>
|
case Some(nel) =>
|
||||||
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||||
@ -42,4 +43,23 @@ object ClassifierName {
|
|||||||
List.empty[RClassifierModel].pure[ConnectionIO]
|
List.empty[RClassifierModel].pure[ConnectionIO]
|
||||||
}
|
}
|
||||||
} yield models
|
} yield models
|
||||||
|
|
||||||
|
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = {
|
||||||
|
val model = RClassifierModel.as("m")
|
||||||
|
val tag = RTag.as("t")
|
||||||
|
val sql =
|
||||||
|
Select(
|
||||||
|
select(model.all),
|
||||||
|
from(model),
|
||||||
|
model.cid === coll && model.name.notIn(
|
||||||
|
Select(
|
||||||
|
select(concat(lit(categoryPrefix), tag.category.s)),
|
||||||
|
from(tag),
|
||||||
|
tag.cid === coll && tag.category.isNotNull
|
||||||
|
).distinct
|
||||||
|
)
|
||||||
|
).build
|
||||||
|
sql.query[RClassifierModel].to[List]
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,12 +4,13 @@ import cats.data.Kleisli
|
|||||||
import cats.data.OptionT
|
import cats.data.OptionT
|
||||||
import cats.effect._
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.analysis.TextAnalyser
|
import docspell.analysis.TextAnalyser
|
||||||
import docspell.backend.ops.OCollective
|
import docspell.backend.ops.OCollective
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
import docspell.joex.Config
|
import docspell.joex.Config
|
||||||
import docspell.joex.scheduler._
|
import docspell.joex.scheduler._
|
||||||
import docspell.store.records.{RClassifierSetting, RTag}
|
import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag}
|
||||||
|
|
||||||
object LearnClassifierTask {
|
object LearnClassifierTask {
|
||||||
val pageSep = " --n-- "
|
val pageSep = " --n-- "
|
||||||
@ -31,6 +32,7 @@ object LearnClassifierTask {
|
|||||||
_ <- OptionT.liftF(
|
_ <- OptionT.liftF(
|
||||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||||
)
|
)
|
||||||
|
_ <- OptionT.liftF(clearObsoleteModels(ctx))
|
||||||
} yield ())
|
} yield ())
|
||||||
.getOrElseF(logInactiveWarning(ctx.logger))
|
.getOrElseF(logInactiveWarning(ctx.logger))
|
||||||
}
|
}
|
||||||
@ -62,14 +64,27 @@ object LearnClassifierTask {
|
|||||||
): Task[F, A, Unit] =
|
): Task[F, A, Unit] =
|
||||||
Task { ctx =>
|
Task { ctx =>
|
||||||
for {
|
for {
|
||||||
cats <- ctx.store.transact(
|
cats <- ctx.store.transact(RTag.listCategories(collective))
|
||||||
RTag.listCategories(collective, ClassifierName.noCategory.name)
|
|
||||||
)
|
|
||||||
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
||||||
_ <- cats.map(task).traverse(_.run(ctx))
|
_ <- cats.map(task).traverse(_.run(ctx))
|
||||||
} yield ()
|
} yield ()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||||
|
for {
|
||||||
|
list <- ctx.store.transact(
|
||||||
|
ClassifierName.findOrphanTagModels(ctx.args.collective)
|
||||||
|
)
|
||||||
|
_ <- ctx.logger.info(
|
||||||
|
s"Found ${list.size} obsolete model files that are deleted now."
|
||||||
|
)
|
||||||
|
n <- ctx.store.transact(RClassifierModel.deleteAll(list.map(_.id)))
|
||||||
|
_ <- list
|
||||||
|
.map(_.fileId.id)
|
||||||
|
.traverse(id => ctx.store.bitpeace.delete(id).compile.drain)
|
||||||
|
_ <- ctx.logger.debug(s"Deleted $n model files.")
|
||||||
|
} yield ()
|
||||||
|
|
||||||
private def findActiveSettings[F[_]: Sync](
|
private def findActiveSettings[F[_]: Sync](
|
||||||
ctx: Context[F, Args],
|
ctx: Context[F, Args],
|
||||||
cfg: Config.TextAnalysis
|
cfg: Config.TextAnalysis
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package docspell.store.records
|
package docspell.store.records
|
||||||
|
|
||||||
import cats.effect._
|
|
||||||
import cats.data.NonEmptyList
|
import cats.data.NonEmptyList
|
||||||
|
import cats.effect._
|
||||||
import cats.implicits._
|
import cats.implicits._
|
||||||
|
|
||||||
import docspell.common._
|
import docspell.common._
|
||||||
@ -63,6 +63,17 @@ object RClassifierModel {
|
|||||||
else 0.pure[ConnectionIO]
|
else 0.pure[ConnectionIO]
|
||||||
} yield n + k
|
} yield n + k
|
||||||
|
|
||||||
|
def deleteById(id: Ident): ConnectionIO[Int] =
|
||||||
|
DML.delete(T, T.id === id)
|
||||||
|
|
||||||
|
def deleteAll(ids: List[Ident]): ConnectionIO[Int] =
|
||||||
|
NonEmptyList.fromList(ids) match {
|
||||||
|
case Some(nel) =>
|
||||||
|
DML.delete(T, T.id.in(nel))
|
||||||
|
case None =>
|
||||||
|
0.pure[ConnectionIO]
|
||||||
|
}
|
||||||
|
|
||||||
def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
|
def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
|
||||||
Select(select(T.all), from(T), T.cid === cid && T.name === name).build
|
Select(select(T.all), from(T), T.cid === cid && T.name === name).build
|
||||||
.query[RClassifierModel]
|
.query[RClassifierModel]
|
||||||
@ -75,4 +86,5 @@ object RClassifierModel {
|
|||||||
Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
|
Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
|
||||||
.query[RClassifierModel]
|
.query[RClassifierModel]
|
||||||
.to[List]
|
.to[List]
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -148,11 +148,11 @@ object RTag {
|
|||||||
).orderBy(T.name.asc).build.query[RTag].to[List]
|
).orderBy(T.name.asc).build.query[RTag].to[List]
|
||||||
}
|
}
|
||||||
|
|
||||||
def listCategories(coll: Ident, fallback: String): ConnectionIO[List[String]] =
|
def listCategories(coll: Ident): ConnectionIO[List[String]] =
|
||||||
Select(
|
Select(
|
||||||
coalesce(T.category.s, lit(fallback)).s,
|
T.category.s,
|
||||||
from(T),
|
from(T),
|
||||||
T.cid === coll
|
T.cid === coll && T.category.isNotNull
|
||||||
).distinct.build.query[String].to[List]
|
).distinct.build.query[String].to[List]
|
||||||
|
|
||||||
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
|
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
|
||||||
|
Loading…
x
Reference in New Issue
Block a user