mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Control what tag categories to use for auto-tagging
This commit is contained in:
parent
cce8878898
commit
a6f29153c4
@ -131,7 +131,8 @@ val openapiScalaSettings = Seq(
|
||||
case "ident" =>
|
||||
field => field.copy(typeDef = TypeDef("Ident", Imports("docspell.common.Ident")))
|
||||
case "accountid" =>
|
||||
field => field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId")))
|
||||
field =>
|
||||
field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId")))
|
||||
case "collectivestate" =>
|
||||
field =>
|
||||
field.copy(typeDef =
|
||||
@ -190,6 +191,9 @@ val openapiScalaSettings = Seq(
|
||||
field.copy(typeDef =
|
||||
TypeDef("CustomFieldType", Imports("docspell.common.CustomFieldType"))
|
||||
)
|
||||
case "listtype" =>
|
||||
field =>
|
||||
field.copy(typeDef = TypeDef("ListType", Imports("docspell.common.ListType")))
|
||||
}))
|
||||
)
|
||||
|
||||
|
33
modules/common/src/main/scala/docspell/common/ListType.scala
Normal file
33
modules/common/src/main/scala/docspell/common/ListType.scala
Normal file
@ -0,0 +1,33 @@
|
||||
package docspell.common
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
|
||||
import io.circe.{Decoder, Encoder}
|
||||
|
||||
sealed trait ListType { self: Product =>
|
||||
def name: String =
|
||||
productPrefix.toLowerCase
|
||||
}
|
||||
|
||||
object ListType {
|
||||
|
||||
case object Whitelist extends ListType
|
||||
val whitelist: ListType = Whitelist
|
||||
|
||||
case object Blacklist extends ListType
|
||||
val blacklist: ListType = Blacklist
|
||||
|
||||
val all: NonEmptyList[ListType] = NonEmptyList.of(Whitelist, Blacklist)
|
||||
|
||||
def fromString(name: String): Either[String, ListType] =
|
||||
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown list type: $name")
|
||||
|
||||
def unsafeFromString(name: String): ListType =
|
||||
fromString(name).fold(sys.error, identity)
|
||||
|
||||
implicit val jsonEncoder: Encoder[ListType] =
|
||||
Encoder.encodeString.contramap(_.name)
|
||||
|
||||
implicit val jsonDecoder: Decoder[ListType] =
|
||||
Decoder.decodeString.emap(fromString)
|
||||
}
|
@ -4,9 +4,7 @@ import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common.Ident
|
||||
import docspell.store.qb.DSL._
|
||||
import docspell.store.qb._
|
||||
import docspell.store.records.{RClassifierModel, RTag}
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting}
|
||||
|
||||
import doobie._
|
||||
|
||||
@ -16,7 +14,7 @@ object ClassifierName {
|
||||
def apply(name: String): ClassifierName =
|
||||
new ClassifierName(name)
|
||||
|
||||
val categoryPrefix = "tagcategory-"
|
||||
private val categoryPrefix = "tagcategory-"
|
||||
|
||||
def tagCategory(cat: String): ClassifierName =
|
||||
apply(s"${categoryPrefix}${cat}")
|
||||
@ -35,7 +33,7 @@ object ClassifierName {
|
||||
|
||||
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
categories <- RTag.listCategories(coll)
|
||||
categories <- RClassifierSetting.getActiveCategories(coll)
|
||||
models <- NonEmptyList.fromList(categories) match {
|
||||
case Some(nel) =>
|
||||
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
@ -44,22 +42,20 @@ object ClassifierName {
|
||||
}
|
||||
} yield models
|
||||
|
||||
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = {
|
||||
val model = RClassifierModel.as("m")
|
||||
val tag = RTag.as("t")
|
||||
val sql =
|
||||
Select(
|
||||
select(model.all),
|
||||
from(model),
|
||||
model.cid === coll && model.name.notIn(
|
||||
Select(
|
||||
select(concat(lit(categoryPrefix), tag.category.s)),
|
||||
from(tag),
|
||||
tag.cid === coll && tag.category.isNotNull
|
||||
).distinct
|
||||
)
|
||||
).build
|
||||
sql.query[RClassifierModel].to[List]
|
||||
}
|
||||
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
cats <- RClassifierSetting.getActiveCategories(coll)
|
||||
allModels = RClassifierModel.findAllByQuery(coll, s"${categoryPrefix}%")
|
||||
result <- NonEmptyList.fromList(cats) match {
|
||||
case Some(nel) =>
|
||||
allModels.flatMap(all =>
|
||||
RClassifierModel
|
||||
.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
.map(active => all.diff(active))
|
||||
)
|
||||
case None =>
|
||||
allModels
|
||||
}
|
||||
} yield result
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ import docspell.backend.ops.OCollective
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag}
|
||||
import docspell.store.records.{RClassifierModel, RClassifierSetting}
|
||||
|
||||
object LearnClassifierTask {
|
||||
val pageSep = " --n-- "
|
||||
@ -26,15 +26,23 @@ object LearnClassifierTask {
|
||||
analyser: TextAnalyser[F]
|
||||
): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
sett <- findActiveSettings[F](ctx, cfg)
|
||||
maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
|
||||
_ <- OptionT.liftF(
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
_ <- OptionT.liftF(clearObsoleteModels(ctx))
|
||||
} yield ())
|
||||
.getOrElseF(logInactiveWarning(ctx.logger))
|
||||
val learnTags =
|
||||
for {
|
||||
sett <- findActiveSettings[F](ctx, cfg)
|
||||
maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
|
||||
_ <- OptionT.liftF(
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
} yield ()
|
||||
|
||||
// learn classifier models from active tag categories
|
||||
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
|
||||
// delete classifier model files for categories that have been removed
|
||||
clearObsoleteTagModels(ctx) *>
|
||||
// when tags are deleted, categories may get removed. fix the json array
|
||||
ctx.store
|
||||
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
|
||||
.map(_ => ())
|
||||
}
|
||||
|
||||
def learnTagCategory[F[_]: Sync: ContextShift, A](
|
||||
@ -64,13 +72,13 @@ object LearnClassifierTask {
|
||||
): Task[F, A, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
cats <- ctx.store.transact(RTag.listCategories(collective))
|
||||
cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective))
|
||||
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
||||
_ <- cats.map(task).traverse(_.run(ctx))
|
||||
} yield ()
|
||||
}
|
||||
|
||||
private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||
private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
|
||||
for {
|
||||
list <- ctx.store.transact(
|
||||
ClassifierName.findOrphanTagModels(ctx.args.collective)
|
||||
@ -98,6 +106,6 @@ object LearnClassifierTask {
|
||||
|
||||
private def logInactiveWarning[F[_]: Sync](logger: Logger[F]): F[Unit] =
|
||||
logger.warn(
|
||||
"Classification is disabled. Check joex config and the collective settings."
|
||||
"Auto-tagging is disabled. Check joex config and the collective settings."
|
||||
)
|
||||
}
|
||||
|
@ -4850,12 +4850,11 @@ components:
|
||||
description: |
|
||||
Settings for learning a document classifier.
|
||||
required:
|
||||
- enabled
|
||||
- schedule
|
||||
- itemCount
|
||||
- categoryList
|
||||
- listType
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
itemCount:
|
||||
type: integer
|
||||
format: int32
|
||||
@ -4865,6 +4864,16 @@ components:
|
||||
schedule:
|
||||
type: string
|
||||
format: calevent
|
||||
categoryList:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
listType:
|
||||
type: string
|
||||
format: listtype
|
||||
enum:
|
||||
- blacklist
|
||||
- whitelist
|
||||
|
||||
SourceList:
|
||||
description: |
|
||||
|
@ -6,7 +6,7 @@ import cats.implicits._
|
||||
import docspell.backend.BackendApp
|
||||
import docspell.backend.auth.AuthToken
|
||||
import docspell.backend.ops.OCollective
|
||||
import docspell.common.MakePreviewArgs
|
||||
import docspell.common.{ListType, MakePreviewArgs}
|
||||
import docspell.restapi.model._
|
||||
import docspell.restserver.conv.Conversions
|
||||
import docspell.restserver.http4s._
|
||||
@ -44,9 +44,10 @@ object CollectiveRoutes {
|
||||
settings.integrationEnabled,
|
||||
Some(
|
||||
OCollective.Classifier(
|
||||
settings.classifier.enabled,
|
||||
settings.classifier.schedule,
|
||||
settings.classifier.itemCount
|
||||
settings.classifier.itemCount,
|
||||
settings.classifier.categoryList,
|
||||
settings.classifier.listType
|
||||
)
|
||||
)
|
||||
)
|
||||
@ -64,11 +65,12 @@ object CollectiveRoutes {
|
||||
c.language,
|
||||
c.integrationEnabled,
|
||||
ClassifierSetting(
|
||||
c.classifier.exists(_.enabled),
|
||||
c.classifier.map(_.itemCount).getOrElse(0),
|
||||
c.classifier
|
||||
.map(_.schedule)
|
||||
.getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00"))
|
||||
.getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00")),
|
||||
c.classifier.map(_.categories).getOrElse(Nil),
|
||||
c.classifier.map(_.listType).getOrElse(ListType.whitelist)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
@ -14,8 +14,31 @@ select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name
|
||||
from "classifier_setting"
|
||||
where "file_id" is not null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
add column "categories" text;
|
||||
|
||||
alter table "classifier_setting"
|
||||
add column "category_list_type" varchar(254);
|
||||
|
||||
update "classifier_setting"
|
||||
set "category_list_type" = 'whitelist';
|
||||
|
||||
update "classifier_setting"
|
||||
set "categories" = concat('["', category, '"]')
|
||||
where category is not null;
|
||||
|
||||
update "classifier_setting"
|
||||
set "categories" = '[]'
|
||||
where category is null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "category";
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "file_id";
|
||||
|
||||
ALTER TABLE "classifier_setting"
|
||||
ALTER COLUMN "categories" SET NOT NULL;
|
||||
|
||||
ALTER TABLE "classifier_setting"
|
||||
ALTER COLUMN "category_list_type" SET NOT NULL;
|
||||
|
@ -14,13 +14,35 @@ select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `f
|
||||
from `classifier_setting`
|
||||
where `file_id` is not null;
|
||||
|
||||
alter table `classifier_setting`
|
||||
add column (`categories` mediumtext);
|
||||
|
||||
alter table `classifier_setting`
|
||||
add column (`category_list_type` varchar(254));
|
||||
|
||||
update `classifier_setting`
|
||||
set `category_list_type` = 'whitelist';
|
||||
|
||||
update `classifier_setting`
|
||||
set `categories` = concat('[`', category, '`]')
|
||||
where category is not null;
|
||||
|
||||
update `classifier_setting`
|
||||
set `categories` = '[]'
|
||||
where category is null;
|
||||
|
||||
alter table `classifier_setting`
|
||||
drop column `category`;
|
||||
|
||||
-- mariadb needs special treatment when dropping a column that is part
|
||||
-- of an index and foreign key
|
||||
-- mariadb requires to drop constraint manually when dropping a column
|
||||
alter table `classifier_setting`
|
||||
drop constraint `classifier_setting_ibfk_2`;
|
||||
|
||||
alter table `classifier_setting`
|
||||
drop column `file_id`;
|
||||
|
||||
ALTER TABLE `classifier_setting`
|
||||
MODIFY `categories` mediumtext NOT NULL;
|
||||
|
||||
ALTER TABLE `classifier_setting`
|
||||
MODIFY `category_list_type` varchar(254) NOT NULL;
|
||||
|
@ -14,8 +14,31 @@ select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name",
|
||||
from "classifier_setting"
|
||||
where "file_id" is not null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
add column "categories" text;
|
||||
|
||||
alter table "classifier_setting"
|
||||
add column "category_list_type" varchar(254);
|
||||
|
||||
update "classifier_setting"
|
||||
set "category_list_type" = 'whitelist';
|
||||
|
||||
update "classifier_setting"
|
||||
set "categories" = concat('["', category, '"]')
|
||||
where category is not null;
|
||||
|
||||
update "classifier_setting"
|
||||
set "categories" = '[]'
|
||||
where category is null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "category";
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "file_id";
|
||||
|
||||
ALTER TABLE "classifier_setting"
|
||||
ALTER COLUMN "categories" SET NOT NULL;
|
||||
|
||||
ALTER TABLE "classifier_setting"
|
||||
ALTER COLUMN "category_list_type" SET NOT NULL;
|
||||
|
@ -97,6 +97,9 @@ trait DoobieMeta extends EmilDoobieMeta {
|
||||
|
||||
implicit val metaCustomFieldType: Meta[CustomFieldType] =
|
||||
Meta[String].timap(CustomFieldType.unsafe)(_.name)
|
||||
|
||||
implicit val metaListType: Meta[ListType] =
|
||||
Meta[String].timap(ListType.unsafeFromString)(_.name)
|
||||
}
|
||||
|
||||
object DoobieMeta extends DoobieMeta {
|
||||
|
@ -57,7 +57,12 @@ object RClassifierModel {
|
||||
|
||||
def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
|
||||
for {
|
||||
n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid)))
|
||||
now <- Timestamp.current[ConnectionIO]
|
||||
n <- DML.update(
|
||||
T,
|
||||
T.cid === coll && T.name === name,
|
||||
DML.set(T.fileId.setTo(fid), T.created.setTo(now))
|
||||
)
|
||||
k <-
|
||||
if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert)
|
||||
else 0.pure[ConnectionIO]
|
||||
@ -87,4 +92,11 @@ object RClassifierModel {
|
||||
.query[RClassifierModel]
|
||||
.to[List]
|
||||
|
||||
def findAllByQuery(
|
||||
cid: Ident,
|
||||
nameQuery: String
|
||||
): ConnectionIO[List[RClassifierModel]] =
|
||||
Select(select(T.all), from(T), T.cid === cid && T.name.like(nameQuery)).build
|
||||
.query[RClassifierModel]
|
||||
.to[List]
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package docspell.store.records
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.data.{NonEmptyList, OptionT}
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
@ -13,23 +13,38 @@ import doobie.implicits._
|
||||
|
||||
case class RClassifierSetting(
|
||||
cid: Ident,
|
||||
enabled: Boolean,
|
||||
schedule: CalEvent,
|
||||
itemCount: Int,
|
||||
created: Timestamp
|
||||
) {}
|
||||
created: Timestamp,
|
||||
categoryList: List[String],
|
||||
listType: ListType
|
||||
) {
|
||||
|
||||
def enabled: Boolean =
|
||||
listType match {
|
||||
case ListType.Blacklist =>
|
||||
true
|
||||
case ListType.Whitelist =>
|
||||
categoryList.nonEmpty
|
||||
}
|
||||
}
|
||||
|
||||
object RClassifierSetting {
|
||||
// the categoryList is stored as a json array
|
||||
implicit val stringListMeta: Meta[List[String]] =
|
||||
jsonMeta[List[String]]
|
||||
|
||||
final case class Table(alias: Option[String]) extends TableDef {
|
||||
val tableName = "classifier_setting"
|
||||
|
||||
val cid = Column[Ident]("cid", this)
|
||||
val enabled = Column[Boolean]("enabled", this)
|
||||
val schedule = Column[CalEvent]("schedule", this)
|
||||
val itemCount = Column[Int]("item_count", this)
|
||||
val created = Column[Timestamp]("created", this)
|
||||
val cid = Column[Ident]("cid", this)
|
||||
val schedule = Column[CalEvent]("schedule", this)
|
||||
val itemCount = Column[Int]("item_count", this)
|
||||
val created = Column[Timestamp]("created", this)
|
||||
val categories = Column[List[String]]("categories", this)
|
||||
val listType = Column[ListType]("category_list_type", this)
|
||||
val all = NonEmptyList
|
||||
.of[Column[_]](cid, enabled, schedule, itemCount, created)
|
||||
.of[Column[_]](cid, schedule, itemCount, created, categories, listType)
|
||||
}
|
||||
|
||||
val T = Table(None)
|
||||
@ -40,29 +55,19 @@ object RClassifierSetting {
|
||||
DML.insert(
|
||||
T,
|
||||
T.all,
|
||||
fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}"
|
||||
fr"${v.cid},${v.schedule},${v.itemCount},${v.created},${v.categoryList},${v.listType}"
|
||||
)
|
||||
|
||||
def updateAll(v: RClassifierSetting): ConnectionIO[Int] =
|
||||
DML.update(
|
||||
T,
|
||||
T.cid === v.cid,
|
||||
DML.set(
|
||||
T.enabled.setTo(v.enabled),
|
||||
T.schedule.setTo(v.schedule),
|
||||
T.itemCount.setTo(v.itemCount)
|
||||
)
|
||||
)
|
||||
|
||||
def updateSettings(v: RClassifierSetting): ConnectionIO[Int] =
|
||||
def update(v: RClassifierSetting): ConnectionIO[Int] =
|
||||
for {
|
||||
n1 <- DML.update(
|
||||
T,
|
||||
T.cid === v.cid,
|
||||
DML.set(
|
||||
T.enabled.setTo(v.enabled),
|
||||
T.schedule.setTo(v.schedule),
|
||||
T.itemCount.setTo(v.itemCount)
|
||||
T.itemCount.setTo(v.itemCount),
|
||||
T.categories.setTo(v.categoryList),
|
||||
T.listType.setTo(v.listType)
|
||||
)
|
||||
)
|
||||
n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO]
|
||||
@ -76,24 +81,62 @@ object RClassifierSetting {
|
||||
def delete(coll: Ident): ConnectionIO[Int] =
|
||||
DML.delete(T, T.cid === coll)
|
||||
|
||||
/** Finds tag categories that exist and match the classifier setting.
|
||||
* If the setting contains a black list, they are removed from the
|
||||
* existing categories. If it is a whitelist, the intersection is
|
||||
* returned.
|
||||
*/
|
||||
def getActiveCategories(coll: Ident): ConnectionIO[List[String]] =
|
||||
(for {
|
||||
sett <- OptionT(findById(coll))
|
||||
cats <- OptionT.liftF(RTag.listCategories(coll))
|
||||
res = sett.listType match {
|
||||
case ListType.Blacklist =>
|
||||
cats.diff(sett.categoryList)
|
||||
case ListType.Whitelist =>
|
||||
sett.categoryList.intersect(cats)
|
||||
}
|
||||
} yield res).getOrElse(Nil)
|
||||
|
||||
/** Checks the json array of tag categories and removes those that are not present anymore. */
|
||||
def fixCategoryList(coll: Ident): ConnectionIO[Int] =
|
||||
(for {
|
||||
sett <- OptionT(findById(coll))
|
||||
cats <- OptionT.liftF(RTag.listCategories(coll))
|
||||
fixed = sett.categoryList.intersect(cats)
|
||||
n <- OptionT.liftF(
|
||||
if (fixed == sett.categoryList) 0.pure[ConnectionIO]
|
||||
else DML.update(T, T.cid === coll, DML.set(T.categories.setTo(fixed)))
|
||||
)
|
||||
} yield n).getOrElse(0)
|
||||
|
||||
case class Classifier(
|
||||
enabled: Boolean,
|
||||
schedule: CalEvent,
|
||||
itemCount: Int
|
||||
itemCount: Int,
|
||||
categories: List[String],
|
||||
listType: ListType
|
||||
) {
|
||||
def enabled: Boolean =
|
||||
listType match {
|
||||
case ListType.Blacklist =>
|
||||
true
|
||||
case ListType.Whitelist =>
|
||||
categories.nonEmpty
|
||||
}
|
||||
|
||||
def toRecord(coll: Ident, created: Timestamp): RClassifierSetting =
|
||||
RClassifierSetting(
|
||||
coll,
|
||||
enabled,
|
||||
schedule,
|
||||
itemCount,
|
||||
created
|
||||
created,
|
||||
categories,
|
||||
listType
|
||||
)
|
||||
}
|
||||
object Classifier {
|
||||
def fromRecord(r: RClassifierSetting): Classifier =
|
||||
Classifier(r.enabled, r.schedule, r.itemCount)
|
||||
Classifier(r.schedule, r.itemCount, r.categoryList, r.listType)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package docspell.store.records
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.data.{NonEmptyList, OptionT}
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.common._
|
||||
@ -73,13 +73,24 @@ object RCollective {
|
||||
.map(now => settings.classifier.map(_.toRecord(cid, now)))
|
||||
n2 <- cls match {
|
||||
case Some(cr) =>
|
||||
RClassifierSetting.updateSettings(cr)
|
||||
RClassifierSetting.update(cr)
|
||||
case None =>
|
||||
RClassifierSetting.delete(cid)
|
||||
}
|
||||
} yield n1 + n2
|
||||
|
||||
def getSettings(coll: Ident): ConnectionIO[Option[Settings]] = {
|
||||
// this hides categories that have been deleted in the meantime
|
||||
// they are finally removed from the json array once the learn classifier task is run
|
||||
def getSettings(coll: Ident): ConnectionIO[Option[Settings]] =
|
||||
(for {
|
||||
sett <- OptionT(getRawSettings(coll))
|
||||
prev <- OptionT.fromOption[ConnectionIO](sett.classifier)
|
||||
cats <- OptionT.liftF(RTag.listCategories(coll))
|
||||
next = prev.copy(categories = prev.categories.intersect(cats))
|
||||
} yield sett.copy(classifier = Some(next))).value
|
||||
|
||||
private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = {
|
||||
import RClassifierSetting.stringListMeta
|
||||
val c = RCollective.as("c")
|
||||
val cs = RClassifierSetting.as("cs")
|
||||
|
||||
@ -87,9 +98,10 @@ object RCollective {
|
||||
select(
|
||||
c.language.s,
|
||||
c.integration.s,
|
||||
cs.enabled.s,
|
||||
cs.schedule.s,
|
||||
cs.itemCount.s
|
||||
cs.itemCount.s,
|
||||
cs.categories.s,
|
||||
cs.listType.s
|
||||
),
|
||||
from(c).leftJoin(cs, cs.cid === c.id),
|
||||
c.id === coll
|
||||
|
@ -11,31 +11,38 @@ import Api
|
||||
import Api.Model.ClassifierSetting exposing (ClassifierSetting)
|
||||
import Api.Model.TagList exposing (TagList)
|
||||
import Comp.CalEventInput
|
||||
import Comp.Dropdown
|
||||
import Comp.FixedDropdown
|
||||
import Comp.IntField
|
||||
import Data.CalEvent exposing (CalEvent)
|
||||
import Data.Flags exposing (Flags)
|
||||
import Data.ListType exposing (ListType)
|
||||
import Data.UiSettings exposing (UiSettings)
|
||||
import Data.Validated exposing (Validated(..))
|
||||
import Html exposing (..)
|
||||
import Html.Attributes exposing (..)
|
||||
import Html.Events exposing (onCheck)
|
||||
import Http
|
||||
import Markdown
|
||||
import Util.Tag
|
||||
|
||||
|
||||
type alias Model =
|
||||
{ enabled : Bool
|
||||
, scheduleModel : Comp.CalEventInput.Model
|
||||
{ scheduleModel : Comp.CalEventInput.Model
|
||||
, schedule : Validated CalEvent
|
||||
, itemCountModel : Comp.IntField.Model
|
||||
, itemCount : Maybe Int
|
||||
, categoryListModel : Comp.Dropdown.Model String
|
||||
, categoryListType : ListType
|
||||
, categoryListTypeModel : Comp.FixedDropdown.Model ListType
|
||||
}
|
||||
|
||||
|
||||
type Msg
|
||||
= ScheduleMsg Comp.CalEventInput.Msg
|
||||
| ToggleEnabled
|
||||
| ItemCountMsg Comp.IntField.Msg
|
||||
| GetTagsResp (Result Http.Error TagList)
|
||||
| CategoryListMsg (Comp.Dropdown.Msg String)
|
||||
| CategoryListTypeMsg (Comp.FixedDropdown.Msg ListType)
|
||||
|
||||
|
||||
init : Flags -> ClassifierSetting -> ( Model, Cmd Msg )
|
||||
@ -48,13 +55,41 @@ init flags sett =
|
||||
( cem, cec ) =
|
||||
Comp.CalEventInput.init flags newSchedule
|
||||
in
|
||||
( { enabled = sett.enabled
|
||||
, scheduleModel = cem
|
||||
( { scheduleModel = cem
|
||||
, schedule = Data.Validated.Unknown newSchedule
|
||||
, itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count"
|
||||
, itemCount = Just sett.itemCount
|
||||
, categoryListModel =
|
||||
let
|
||||
mkOption s =
|
||||
{ value = s, text = s, additional = "" }
|
||||
|
||||
minit =
|
||||
Comp.Dropdown.makeModel
|
||||
{ multiple = True
|
||||
, searchable = \n -> n > 0
|
||||
, makeOption = mkOption
|
||||
, labelColor = \_ -> \_ -> "grey "
|
||||
, placeholder = "Choose categories …"
|
||||
}
|
||||
|
||||
lm =
|
||||
Comp.Dropdown.SetSelection sett.categoryList
|
||||
|
||||
( m_, _ ) =
|
||||
Comp.Dropdown.update lm minit
|
||||
in
|
||||
m_
|
||||
, categoryListType =
|
||||
Data.ListType.fromString sett.listType
|
||||
|> Maybe.withDefault Data.ListType.Whitelist
|
||||
, categoryListTypeModel =
|
||||
Comp.FixedDropdown.initMap Data.ListType.label Data.ListType.all
|
||||
}
|
||||
, Cmd.map ScheduleMsg cec
|
||||
, Cmd.batch
|
||||
[ Api.getTags flags "" GetTagsResp
|
||||
, Cmd.map ScheduleMsg cec
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@ -62,10 +97,11 @@ getSettings : Model -> Validated ClassifierSetting
|
||||
getSettings model =
|
||||
Data.Validated.map
|
||||
(\sch ->
|
||||
{ enabled = model.enabled
|
||||
, schedule =
|
||||
{ schedule =
|
||||
Data.CalEvent.makeEvent sch
|
||||
, itemCount = Maybe.withDefault 0 model.itemCount
|
||||
, listType = Data.ListType.toString model.categoryListType
|
||||
, categoryList = Comp.Dropdown.getSelected model.categoryListModel
|
||||
}
|
||||
)
|
||||
model.schedule
|
||||
@ -74,6 +110,20 @@ getSettings model =
|
||||
update : Flags -> Msg -> Model -> ( Model, Cmd Msg )
|
||||
update flags msg model =
|
||||
case msg of
|
||||
GetTagsResp (Ok tl) ->
|
||||
let
|
||||
categories =
|
||||
Util.Tag.getCategories tl.items
|
||||
|> List.sort
|
||||
|
||||
lm =
|
||||
Comp.Dropdown.SetOptions categories
|
||||
in
|
||||
update flags (CategoryListMsg lm) model
|
||||
|
||||
GetTagsResp (Err _) ->
|
||||
( model, Cmd.none )
|
||||
|
||||
ScheduleMsg lmsg ->
|
||||
let
|
||||
( cm, cc, ce ) =
|
||||
@ -90,11 +140,6 @@ update flags msg model =
|
||||
, Cmd.map ScheduleMsg cc
|
||||
)
|
||||
|
||||
ToggleEnabled ->
|
||||
( { model | enabled = not model.enabled }
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
ItemCountMsg lmsg ->
|
||||
let
|
||||
( im, iv ) =
|
||||
@ -107,32 +152,61 @@ update flags msg model =
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
CategoryListMsg lm ->
|
||||
let
|
||||
( m_, cmd_ ) =
|
||||
Comp.Dropdown.update lm model.categoryListModel
|
||||
in
|
||||
( { model | categoryListModel = m_ }
|
||||
, Cmd.map CategoryListMsg cmd_
|
||||
)
|
||||
|
||||
view : Model -> Html Msg
|
||||
view model =
|
||||
CategoryListTypeMsg lm ->
|
||||
let
|
||||
( m_, sel ) =
|
||||
Comp.FixedDropdown.update lm model.categoryListTypeModel
|
||||
|
||||
newListType =
|
||||
Maybe.withDefault model.categoryListType sel
|
||||
in
|
||||
( { model
|
||||
| categoryListTypeModel = m_
|
||||
, categoryListType = newListType
|
||||
}
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
|
||||
view : UiSettings -> Model -> Html Msg
|
||||
view settings model =
|
||||
let
|
||||
catListTypeItem =
|
||||
Comp.FixedDropdown.Item
|
||||
model.categoryListType
|
||||
(Data.ListType.label model.categoryListType)
|
||||
in
|
||||
div []
|
||||
[ div
|
||||
[ class "field"
|
||||
[ Markdown.toHtml [ class "ui basic segment" ]
|
||||
"""
|
||||
|
||||
Auto-tagging works by learning from existing documents. The more
|
||||
documents you have correctly tagged, the better. Learning is done
|
||||
periodically based on a schedule. You can specify tag-groups that
|
||||
should either be used (whitelist) or not used (blacklist) for
|
||||
learning.
|
||||
|
||||
Use an empty whitelist to disable auto tagging.
|
||||
|
||||
"""
|
||||
, div [ class "field" ]
|
||||
[ label [] [ text "Is the following a blacklist or whitelist?" ]
|
||||
, Html.map CategoryListTypeMsg
|
||||
(Comp.FixedDropdown.view (Just catListTypeItem) model.categoryListTypeModel)
|
||||
]
|
||||
[ div [ class "ui checkbox" ]
|
||||
[ input
|
||||
[ type_ "checkbox"
|
||||
, onCheck (\_ -> ToggleEnabled)
|
||||
, checked model.enabled
|
||||
]
|
||||
[]
|
||||
, label [] [ text "Enable classification" ]
|
||||
, span [ class "small-info" ]
|
||||
[ text "Disable document classification if not needed."
|
||||
]
|
||||
]
|
||||
]
|
||||
, div [ class "ui basic segment" ]
|
||||
[ text "Document classification tries to predict a tag for new incoming documents. This "
|
||||
, text "works by learning from existing documents in order to find common patterns within "
|
||||
, text "the text. The more documents you have correctly tagged, the better. Learning is done "
|
||||
, text "periodically based on a schedule and you need to specify a tag-group that should "
|
||||
, text "be used for learning."
|
||||
, div [ class "field" ]
|
||||
[ label [] [ text "Choose tag categories for learning" ]
|
||||
, Html.map CategoryListMsg
|
||||
(Comp.Dropdown.view settings model.categoryListModel)
|
||||
]
|
||||
, Html.map ItemCountMsg
|
||||
(Comp.IntField.viewWithInfo
|
||||
|
@ -280,7 +280,7 @@ view flags settings model =
|
||||
, ( "invisible hidden", not flags.config.showClassificationSettings )
|
||||
]
|
||||
]
|
||||
[ text "Document Classifier"
|
||||
[ text "Auto-Tagging"
|
||||
]
|
||||
, div
|
||||
[ classList
|
||||
@ -289,13 +289,10 @@ view flags settings model =
|
||||
]
|
||||
]
|
||||
[ Html.map ClassifierSettingMsg
|
||||
(Comp.ClassifierSettingsForm.view model.classifierModel)
|
||||
(Comp.ClassifierSettingsForm.view settings model.classifierModel)
|
||||
, div [ class "ui vertical segment" ]
|
||||
[ button
|
||||
[ classList
|
||||
[ ( "ui small secondary basic button", True )
|
||||
, ( "disabled", not model.classifierModel.enabled )
|
||||
]
|
||||
[ class "ui small secondary basic button"
|
||||
, title "Starts a task to train a classifier"
|
||||
, onClick StartClassifierTask
|
||||
]
|
||||
|
50
modules/webapp/src/main/elm/Data/ListType.elm
Normal file
50
modules/webapp/src/main/elm/Data/ListType.elm
Normal file
@ -0,0 +1,50 @@
|
||||
module Data.ListType exposing
|
||||
( ListType(..)
|
||||
, all
|
||||
, fromString
|
||||
, label
|
||||
, toString
|
||||
)
|
||||
|
||||
|
||||
type ListType
|
||||
= Blacklist
|
||||
| Whitelist
|
||||
|
||||
|
||||
all : List ListType
|
||||
all =
|
||||
[ Blacklist, Whitelist ]
|
||||
|
||||
|
||||
toString : ListType -> String
|
||||
toString lt =
|
||||
case lt of
|
||||
Blacklist ->
|
||||
"blacklist"
|
||||
|
||||
Whitelist ->
|
||||
"whitelist"
|
||||
|
||||
|
||||
label : ListType -> String
|
||||
label lt =
|
||||
case lt of
|
||||
Blacklist ->
|
||||
"Blacklist"
|
||||
|
||||
Whitelist ->
|
||||
"Whitelist"
|
||||
|
||||
|
||||
fromString : String -> Maybe ListType
|
||||
fromString str =
|
||||
case String.toLower str of
|
||||
"blacklist" ->
|
||||
Just Blacklist
|
||||
|
||||
"whitelist" ->
|
||||
Just Whitelist
|
||||
|
||||
_ ->
|
||||
Nothing
|
Loading…
x
Reference in New Issue
Block a user