Control what tag categories to use for auto-tagging

This commit is contained in:
Eike Kettner 2021-01-19 01:20:13 +01:00
parent cce8878898
commit a6f29153c4
16 changed files with 436 additions and 125 deletions

View File

@ -131,7 +131,8 @@ val openapiScalaSettings = Seq(
case "ident" => case "ident" =>
field => field.copy(typeDef = TypeDef("Ident", Imports("docspell.common.Ident"))) field => field.copy(typeDef = TypeDef("Ident", Imports("docspell.common.Ident")))
case "accountid" => case "accountid" =>
field => field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId"))) field =>
field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId")))
case "collectivestate" => case "collectivestate" =>
field => field =>
field.copy(typeDef = field.copy(typeDef =
@ -190,6 +191,9 @@ val openapiScalaSettings = Seq(
field.copy(typeDef = field.copy(typeDef =
TypeDef("CustomFieldType", Imports("docspell.common.CustomFieldType")) TypeDef("CustomFieldType", Imports("docspell.common.CustomFieldType"))
) )
case "listtype" =>
field =>
field.copy(typeDef = TypeDef("ListType", Imports("docspell.common.ListType")))
})) }))
) )

View File

@ -0,0 +1,33 @@
package docspell.common
import cats.data.NonEmptyList
import io.circe.{Decoder, Encoder}
sealed trait ListType { self: Product =>
def name: String =
productPrefix.toLowerCase
}
object ListType {
case object Whitelist extends ListType
val whitelist: ListType = Whitelist
case object Blacklist extends ListType
val blacklist: ListType = Blacklist
val all: NonEmptyList[ListType] = NonEmptyList.of(Whitelist, Blacklist)
def fromString(name: String): Either[String, ListType] =
all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown list type: $name")
def unsafeFromString(name: String): ListType =
fromString(name).fold(sys.error, identity)
implicit val jsonEncoder: Encoder[ListType] =
Encoder.encodeString.contramap(_.name)
implicit val jsonDecoder: Decoder[ListType] =
Decoder.decodeString.emap(fromString)
}

View File

@ -4,9 +4,7 @@ import cats.data.NonEmptyList
import cats.implicits._ import cats.implicits._
import docspell.common.Ident import docspell.common.Ident
import docspell.store.qb.DSL._ import docspell.store.records.{RClassifierModel, RClassifierSetting}
import docspell.store.qb._
import docspell.store.records.{RClassifierModel, RTag}
import doobie._ import doobie._
@ -16,7 +14,7 @@ object ClassifierName {
def apply(name: String): ClassifierName = def apply(name: String): ClassifierName =
new ClassifierName(name) new ClassifierName(name)
val categoryPrefix = "tagcategory-" private val categoryPrefix = "tagcategory-"
def tagCategory(cat: String): ClassifierName = def tagCategory(cat: String): ClassifierName =
apply(s"${categoryPrefix}${cat}") apply(s"${categoryPrefix}${cat}")
@ -35,7 +33,7 @@ object ClassifierName {
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
for { for {
categories <- RTag.listCategories(coll) categories <- RClassifierSetting.getActiveCategories(coll)
models <- NonEmptyList.fromList(categories) match { models <- NonEmptyList.fromList(categories) match {
case Some(nel) => case Some(nel) =>
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name)) RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
@ -44,22 +42,20 @@ object ClassifierName {
} }
} yield models } yield models
def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = { def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
val model = RClassifierModel.as("m") for {
val tag = RTag.as("t") cats <- RClassifierSetting.getActiveCategories(coll)
val sql = allModels = RClassifierModel.findAllByQuery(coll, s"${categoryPrefix}%")
Select( result <- NonEmptyList.fromList(cats) match {
select(model.all), case Some(nel) =>
from(model), allModels.flatMap(all =>
model.cid === coll && model.name.notIn( RClassifierModel
Select( .findAllByName(coll, nel.map(tagCategory).map(_.name))
select(concat(lit(categoryPrefix), tag.category.s)), .map(active => all.diff(active))
from(tag), )
tag.cid === coll && tag.category.isNotNull case None =>
).distinct allModels
) }
).build } yield result
sql.query[RClassifierModel].to[List]
}
} }

View File

@ -10,7 +10,7 @@ import docspell.backend.ops.OCollective
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler._ import docspell.joex.scheduler._
import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag} import docspell.store.records.{RClassifierModel, RClassifierSetting}
object LearnClassifierTask { object LearnClassifierTask {
val pageSep = " --n-- " val pageSep = " --n-- "
@ -26,15 +26,23 @@ object LearnClassifierTask {
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
(for { val learnTags =
sett <- findActiveSettings[F](ctx, cfg) for {
maxItems = math.min(cfg.classification.itemCount, sett.itemCount) sett <- findActiveSettings[F](ctx, cfg)
_ <- OptionT.liftF( maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx) _ <- OptionT.liftF(
) learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
_ <- OptionT.liftF(clearObsoleteModels(ctx)) )
} yield ()) } yield ()
.getOrElseF(logInactiveWarning(ctx.logger))
// learn classifier models from active tag categories
learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *>
// delete classifier model files for categories that have been removed
clearObsoleteTagModels(ctx) *>
// when tags are deleted, categories may get removed. fix the json array
ctx.store
.transact(RClassifierSetting.fixCategoryList(ctx.args.collective))
.map(_ => ())
} }
def learnTagCategory[F[_]: Sync: ContextShift, A]( def learnTagCategory[F[_]: Sync: ContextShift, A](
@ -64,13 +72,13 @@ object LearnClassifierTask {
): Task[F, A, Unit] = ): Task[F, A, Unit] =
Task { ctx => Task { ctx =>
for { for {
cats <- ctx.store.transact(RTag.listCategories(collective)) cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective))
task = learnTagCategory[F, A](analyser, collective, maxItems) _ task = learnTagCategory[F, A](analyser, collective, maxItems) _
_ <- cats.map(task).traverse(_.run(ctx)) _ <- cats.map(task).traverse(_.run(ctx))
} yield () } yield ()
} }
private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] =
for { for {
list <- ctx.store.transact( list <- ctx.store.transact(
ClassifierName.findOrphanTagModels(ctx.args.collective) ClassifierName.findOrphanTagModels(ctx.args.collective)
@ -98,6 +106,6 @@ object LearnClassifierTask {
private def logInactiveWarning[F[_]: Sync](logger: Logger[F]): F[Unit] = private def logInactiveWarning[F[_]: Sync](logger: Logger[F]): F[Unit] =
logger.warn( logger.warn(
"Classification is disabled. Check joex config and the collective settings." "Auto-tagging is disabled. Check joex config and the collective settings."
) )
} }

View File

@ -4850,12 +4850,11 @@ components:
description: | description: |
Settings for learning a document classifier. Settings for learning a document classifier.
required: required:
- enabled
- schedule - schedule
- itemCount - itemCount
- categoryList
- listType
properties: properties:
enabled:
type: boolean
itemCount: itemCount:
type: integer type: integer
format: int32 format: int32
@ -4865,6 +4864,16 @@ components:
schedule: schedule:
type: string type: string
format: calevent format: calevent
categoryList:
type: array
items:
type: string
listType:
type: string
format: listtype
enum:
- blacklist
- whitelist
SourceList: SourceList:
description: | description: |

View File

@ -6,7 +6,7 @@ import cats.implicits._
import docspell.backend.BackendApp import docspell.backend.BackendApp
import docspell.backend.auth.AuthToken import docspell.backend.auth.AuthToken
import docspell.backend.ops.OCollective import docspell.backend.ops.OCollective
import docspell.common.MakePreviewArgs import docspell.common.{ListType, MakePreviewArgs}
import docspell.restapi.model._ import docspell.restapi.model._
import docspell.restserver.conv.Conversions import docspell.restserver.conv.Conversions
import docspell.restserver.http4s._ import docspell.restserver.http4s._
@ -44,9 +44,10 @@ object CollectiveRoutes {
settings.integrationEnabled, settings.integrationEnabled,
Some( Some(
OCollective.Classifier( OCollective.Classifier(
settings.classifier.enabled,
settings.classifier.schedule, settings.classifier.schedule,
settings.classifier.itemCount settings.classifier.itemCount,
settings.classifier.categoryList,
settings.classifier.listType
) )
) )
) )
@ -64,11 +65,12 @@ object CollectiveRoutes {
c.language, c.language,
c.integrationEnabled, c.integrationEnabled,
ClassifierSetting( ClassifierSetting(
c.classifier.exists(_.enabled),
c.classifier.map(_.itemCount).getOrElse(0), c.classifier.map(_.itemCount).getOrElse(0),
c.classifier c.classifier
.map(_.schedule) .map(_.schedule)
.getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00")) .getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00")),
c.classifier.map(_.categories).getOrElse(Nil),
c.classifier.map(_.listType).getOrElse(ListType.whitelist)
) )
) )
) )

View File

@ -14,8 +14,31 @@ select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name
from "classifier_setting" from "classifier_setting"
where "file_id" is not null; where "file_id" is not null;
alter table "classifier_setting"
add column "categories" text;
alter table "classifier_setting"
add column "category_list_type" varchar(254);
update "classifier_setting"
set "category_list_type" = 'whitelist';
update "classifier_setting"
set "categories" = concat('["', category, '"]')
where category is not null;
update "classifier_setting"
set "categories" = '[]'
where category is null;
alter table "classifier_setting" alter table "classifier_setting"
drop column "category"; drop column "category";
alter table "classifier_setting" alter table "classifier_setting"
drop column "file_id"; drop column "file_id";
ALTER TABLE "classifier_setting"
ALTER COLUMN "categories" SET NOT NULL;
ALTER TABLE "classifier_setting"
ALTER COLUMN "category_list_type" SET NOT NULL;

View File

@ -14,13 +14,35 @@ select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `f
from `classifier_setting` from `classifier_setting`
where `file_id` is not null; where `file_id` is not null;
alter table `classifier_setting`
add column (`categories` mediumtext);
alter table `classifier_setting`
add column (`category_list_type` varchar(254));
update `classifier_setting`
set `category_list_type` = 'whitelist';
update `classifier_setting`
set `categories` = concat('[`', category, '`]')
where category is not null;
update `classifier_setting`
set `categories` = '[]'
where category is null;
alter table `classifier_setting` alter table `classifier_setting`
drop column `category`; drop column `category`;
-- mariadb needs special treatment when dropping a column that is part -- mariadb requires to drop constraint manually when dropping a column
-- of an index and foreign key
alter table `classifier_setting` alter table `classifier_setting`
drop constraint `classifier_setting_ibfk_2`; drop constraint `classifier_setting_ibfk_2`;
alter table `classifier_setting` alter table `classifier_setting`
drop column `file_id`; drop column `file_id`;
ALTER TABLE `classifier_setting`
MODIFY `categories` mediumtext NOT NULL;
ALTER TABLE `classifier_setting`
MODIFY `category_list_type` varchar(254) NOT NULL;

View File

@ -14,8 +14,31 @@ select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name",
from "classifier_setting" from "classifier_setting"
where "file_id" is not null; where "file_id" is not null;
alter table "classifier_setting"
add column "categories" text;
alter table "classifier_setting"
add column "category_list_type" varchar(254);
update "classifier_setting"
set "category_list_type" = 'whitelist';
update "classifier_setting"
set "categories" = concat('["', category, '"]')
where category is not null;
update "classifier_setting"
set "categories" = '[]'
where category is null;
alter table "classifier_setting" alter table "classifier_setting"
drop column "category"; drop column "category";
alter table "classifier_setting" alter table "classifier_setting"
drop column "file_id"; drop column "file_id";
ALTER TABLE "classifier_setting"
ALTER COLUMN "categories" SET NOT NULL;
ALTER TABLE "classifier_setting"
ALTER COLUMN "category_list_type" SET NOT NULL;

View File

@ -97,6 +97,9 @@ trait DoobieMeta extends EmilDoobieMeta {
implicit val metaCustomFieldType: Meta[CustomFieldType] = implicit val metaCustomFieldType: Meta[CustomFieldType] =
Meta[String].timap(CustomFieldType.unsafe)(_.name) Meta[String].timap(CustomFieldType.unsafe)(_.name)
implicit val metaListType: Meta[ListType] =
Meta[String].timap(ListType.unsafeFromString)(_.name)
} }
object DoobieMeta extends DoobieMeta { object DoobieMeta extends DoobieMeta {

View File

@ -57,7 +57,12 @@ object RClassifierModel {
def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] = def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
for { for {
n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid))) now <- Timestamp.current[ConnectionIO]
n <- DML.update(
T,
T.cid === coll && T.name === name,
DML.set(T.fileId.setTo(fid), T.created.setTo(now))
)
k <- k <-
if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert) if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert)
else 0.pure[ConnectionIO] else 0.pure[ConnectionIO]
@ -87,4 +92,11 @@ object RClassifierModel {
.query[RClassifierModel] .query[RClassifierModel]
.to[List] .to[List]
def findAllByQuery(
cid: Ident,
nameQuery: String
): ConnectionIO[List[RClassifierModel]] =
Select(select(T.all), from(T), T.cid === cid && T.name.like(nameQuery)).build
.query[RClassifierModel]
.to[List]
} }

View File

@ -1,6 +1,6 @@
package docspell.store.records package docspell.store.records
import cats.data.NonEmptyList import cats.data.{NonEmptyList, OptionT}
import cats.implicits._ import cats.implicits._
import docspell.common._ import docspell.common._
@ -13,23 +13,38 @@ import doobie.implicits._
case class RClassifierSetting( case class RClassifierSetting(
cid: Ident, cid: Ident,
enabled: Boolean,
schedule: CalEvent, schedule: CalEvent,
itemCount: Int, itemCount: Int,
created: Timestamp created: Timestamp,
) {} categoryList: List[String],
listType: ListType
) {
def enabled: Boolean =
listType match {
case ListType.Blacklist =>
true
case ListType.Whitelist =>
categoryList.nonEmpty
}
}
object RClassifierSetting { object RClassifierSetting {
// the categoryList is stored as a json array
implicit val stringListMeta: Meta[List[String]] =
jsonMeta[List[String]]
final case class Table(alias: Option[String]) extends TableDef { final case class Table(alias: Option[String]) extends TableDef {
val tableName = "classifier_setting" val tableName = "classifier_setting"
val cid = Column[Ident]("cid", this) val cid = Column[Ident]("cid", this)
val enabled = Column[Boolean]("enabled", this) val schedule = Column[CalEvent]("schedule", this)
val schedule = Column[CalEvent]("schedule", this) val itemCount = Column[Int]("item_count", this)
val itemCount = Column[Int]("item_count", this) val created = Column[Timestamp]("created", this)
val created = Column[Timestamp]("created", this) val categories = Column[List[String]]("categories", this)
val listType = Column[ListType]("category_list_type", this)
val all = NonEmptyList val all = NonEmptyList
.of[Column[_]](cid, enabled, schedule, itemCount, created) .of[Column[_]](cid, schedule, itemCount, created, categories, listType)
} }
val T = Table(None) val T = Table(None)
@ -40,29 +55,19 @@ object RClassifierSetting {
DML.insert( DML.insert(
T, T,
T.all, T.all,
fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}" fr"${v.cid},${v.schedule},${v.itemCount},${v.created},${v.categoryList},${v.listType}"
) )
def updateAll(v: RClassifierSetting): ConnectionIO[Int] = def update(v: RClassifierSetting): ConnectionIO[Int] =
DML.update(
T,
T.cid === v.cid,
DML.set(
T.enabled.setTo(v.enabled),
T.schedule.setTo(v.schedule),
T.itemCount.setTo(v.itemCount)
)
)
def updateSettings(v: RClassifierSetting): ConnectionIO[Int] =
for { for {
n1 <- DML.update( n1 <- DML.update(
T, T,
T.cid === v.cid, T.cid === v.cid,
DML.set( DML.set(
T.enabled.setTo(v.enabled),
T.schedule.setTo(v.schedule), T.schedule.setTo(v.schedule),
T.itemCount.setTo(v.itemCount) T.itemCount.setTo(v.itemCount),
T.categories.setTo(v.categoryList),
T.listType.setTo(v.listType)
) )
) )
n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO] n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO]
@ -76,24 +81,62 @@ object RClassifierSetting {
def delete(coll: Ident): ConnectionIO[Int] = def delete(coll: Ident): ConnectionIO[Int] =
DML.delete(T, T.cid === coll) DML.delete(T, T.cid === coll)
/** Finds tag categories that exist and match the classifier setting.
* If the setting contains a black list, they are removed from the
* existing categories. If it is a whitelist, the intersection is
* returned.
*/
def getActiveCategories(coll: Ident): ConnectionIO[List[String]] =
(for {
sett <- OptionT(findById(coll))
cats <- OptionT.liftF(RTag.listCategories(coll))
res = sett.listType match {
case ListType.Blacklist =>
cats.diff(sett.categoryList)
case ListType.Whitelist =>
sett.categoryList.intersect(cats)
}
} yield res).getOrElse(Nil)
/** Checks the json array of tag categories and removes those that are not present anymore. */
def fixCategoryList(coll: Ident): ConnectionIO[Int] =
(for {
sett <- OptionT(findById(coll))
cats <- OptionT.liftF(RTag.listCategories(coll))
fixed = sett.categoryList.intersect(cats)
n <- OptionT.liftF(
if (fixed == sett.categoryList) 0.pure[ConnectionIO]
else DML.update(T, T.cid === coll, DML.set(T.categories.setTo(fixed)))
)
} yield n).getOrElse(0)
case class Classifier( case class Classifier(
enabled: Boolean,
schedule: CalEvent, schedule: CalEvent,
itemCount: Int itemCount: Int,
categories: List[String],
listType: ListType
) { ) {
def enabled: Boolean =
listType match {
case ListType.Blacklist =>
true
case ListType.Whitelist =>
categories.nonEmpty
}
def toRecord(coll: Ident, created: Timestamp): RClassifierSetting = def toRecord(coll: Ident, created: Timestamp): RClassifierSetting =
RClassifierSetting( RClassifierSetting(
coll, coll,
enabled,
schedule, schedule,
itemCount, itemCount,
created created,
categories,
listType
) )
} }
object Classifier { object Classifier {
def fromRecord(r: RClassifierSetting): Classifier = def fromRecord(r: RClassifierSetting): Classifier =
Classifier(r.enabled, r.schedule, r.itemCount) Classifier(r.schedule, r.itemCount, r.categoryList, r.listType)
} }
} }

View File

@ -1,6 +1,6 @@
package docspell.store.records package docspell.store.records
import cats.data.NonEmptyList import cats.data.{NonEmptyList, OptionT}
import fs2.Stream import fs2.Stream
import docspell.common._ import docspell.common._
@ -73,13 +73,24 @@ object RCollective {
.map(now => settings.classifier.map(_.toRecord(cid, now))) .map(now => settings.classifier.map(_.toRecord(cid, now)))
n2 <- cls match { n2 <- cls match {
case Some(cr) => case Some(cr) =>
RClassifierSetting.updateSettings(cr) RClassifierSetting.update(cr)
case None => case None =>
RClassifierSetting.delete(cid) RClassifierSetting.delete(cid)
} }
} yield n1 + n2 } yield n1 + n2
def getSettings(coll: Ident): ConnectionIO[Option[Settings]] = { // this hides categories that have been deleted in the meantime
// they are finally removed from the json array once the learn classifier task is run
def getSettings(coll: Ident): ConnectionIO[Option[Settings]] =
(for {
sett <- OptionT(getRawSettings(coll))
prev <- OptionT.fromOption[ConnectionIO](sett.classifier)
cats <- OptionT.liftF(RTag.listCategories(coll))
next = prev.copy(categories = prev.categories.intersect(cats))
} yield sett.copy(classifier = Some(next))).value
private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = {
import RClassifierSetting.stringListMeta
val c = RCollective.as("c") val c = RCollective.as("c")
val cs = RClassifierSetting.as("cs") val cs = RClassifierSetting.as("cs")
@ -87,9 +98,10 @@ object RCollective {
select( select(
c.language.s, c.language.s,
c.integration.s, c.integration.s,
cs.enabled.s,
cs.schedule.s, cs.schedule.s,
cs.itemCount.s cs.itemCount.s,
cs.categories.s,
cs.listType.s
), ),
from(c).leftJoin(cs, cs.cid === c.id), from(c).leftJoin(cs, cs.cid === c.id),
c.id === coll c.id === coll

View File

@ -11,31 +11,38 @@ import Api
import Api.Model.ClassifierSetting exposing (ClassifierSetting) import Api.Model.ClassifierSetting exposing (ClassifierSetting)
import Api.Model.TagList exposing (TagList) import Api.Model.TagList exposing (TagList)
import Comp.CalEventInput import Comp.CalEventInput
import Comp.Dropdown
import Comp.FixedDropdown import Comp.FixedDropdown
import Comp.IntField import Comp.IntField
import Data.CalEvent exposing (CalEvent) import Data.CalEvent exposing (CalEvent)
import Data.Flags exposing (Flags) import Data.Flags exposing (Flags)
import Data.ListType exposing (ListType)
import Data.UiSettings exposing (UiSettings)
import Data.Validated exposing (Validated(..)) import Data.Validated exposing (Validated(..))
import Html exposing (..) import Html exposing (..)
import Html.Attributes exposing (..) import Html.Attributes exposing (..)
import Html.Events exposing (onCheck)
import Http import Http
import Markdown
import Util.Tag import Util.Tag
type alias Model = type alias Model =
{ enabled : Bool { scheduleModel : Comp.CalEventInput.Model
, scheduleModel : Comp.CalEventInput.Model
, schedule : Validated CalEvent , schedule : Validated CalEvent
, itemCountModel : Comp.IntField.Model , itemCountModel : Comp.IntField.Model
, itemCount : Maybe Int , itemCount : Maybe Int
, categoryListModel : Comp.Dropdown.Model String
, categoryListType : ListType
, categoryListTypeModel : Comp.FixedDropdown.Model ListType
} }
type Msg type Msg
= ScheduleMsg Comp.CalEventInput.Msg = ScheduleMsg Comp.CalEventInput.Msg
| ToggleEnabled
| ItemCountMsg Comp.IntField.Msg | ItemCountMsg Comp.IntField.Msg
| GetTagsResp (Result Http.Error TagList)
| CategoryListMsg (Comp.Dropdown.Msg String)
| CategoryListTypeMsg (Comp.FixedDropdown.Msg ListType)
init : Flags -> ClassifierSetting -> ( Model, Cmd Msg ) init : Flags -> ClassifierSetting -> ( Model, Cmd Msg )
@ -48,13 +55,41 @@ init flags sett =
( cem, cec ) = ( cem, cec ) =
Comp.CalEventInput.init flags newSchedule Comp.CalEventInput.init flags newSchedule
in in
( { enabled = sett.enabled ( { scheduleModel = cem
, scheduleModel = cem
, schedule = Data.Validated.Unknown newSchedule , schedule = Data.Validated.Unknown newSchedule
, itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count" , itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count"
, itemCount = Just sett.itemCount , itemCount = Just sett.itemCount
, categoryListModel =
let
mkOption s =
{ value = s, text = s, additional = "" }
minit =
Comp.Dropdown.makeModel
{ multiple = True
, searchable = \n -> n > 0
, makeOption = mkOption
, labelColor = \_ -> \_ -> "grey "
, placeholder = "Choose categories "
}
lm =
Comp.Dropdown.SetSelection sett.categoryList
( m_, _ ) =
Comp.Dropdown.update lm minit
in
m_
, categoryListType =
Data.ListType.fromString sett.listType
|> Maybe.withDefault Data.ListType.Whitelist
, categoryListTypeModel =
Comp.FixedDropdown.initMap Data.ListType.label Data.ListType.all
} }
, Cmd.map ScheduleMsg cec , Cmd.batch
[ Api.getTags flags "" GetTagsResp
, Cmd.map ScheduleMsg cec
]
) )
@ -62,10 +97,11 @@ getSettings : Model -> Validated ClassifierSetting
getSettings model = getSettings model =
Data.Validated.map Data.Validated.map
(\sch -> (\sch ->
{ enabled = model.enabled { schedule =
, schedule =
Data.CalEvent.makeEvent sch Data.CalEvent.makeEvent sch
, itemCount = Maybe.withDefault 0 model.itemCount , itemCount = Maybe.withDefault 0 model.itemCount
, listType = Data.ListType.toString model.categoryListType
, categoryList = Comp.Dropdown.getSelected model.categoryListModel
} }
) )
model.schedule model.schedule
@ -74,6 +110,20 @@ getSettings model =
update : Flags -> Msg -> Model -> ( Model, Cmd Msg ) update : Flags -> Msg -> Model -> ( Model, Cmd Msg )
update flags msg model = update flags msg model =
case msg of case msg of
GetTagsResp (Ok tl) ->
let
categories =
Util.Tag.getCategories tl.items
|> List.sort
lm =
Comp.Dropdown.SetOptions categories
in
update flags (CategoryListMsg lm) model
GetTagsResp (Err _) ->
( model, Cmd.none )
ScheduleMsg lmsg -> ScheduleMsg lmsg ->
let let
( cm, cc, ce ) = ( cm, cc, ce ) =
@ -90,11 +140,6 @@ update flags msg model =
, Cmd.map ScheduleMsg cc , Cmd.map ScheduleMsg cc
) )
ToggleEnabled ->
( { model | enabled = not model.enabled }
, Cmd.none
)
ItemCountMsg lmsg -> ItemCountMsg lmsg ->
let let
( im, iv ) = ( im, iv ) =
@ -107,32 +152,61 @@ update flags msg model =
, Cmd.none , Cmd.none
) )
CategoryListMsg lm ->
let
( m_, cmd_ ) =
Comp.Dropdown.update lm model.categoryListModel
in
( { model | categoryListModel = m_ }
, Cmd.map CategoryListMsg cmd_
)
view : Model -> Html Msg CategoryListTypeMsg lm ->
view model = let
( m_, sel ) =
Comp.FixedDropdown.update lm model.categoryListTypeModel
newListType =
Maybe.withDefault model.categoryListType sel
in
( { model
| categoryListTypeModel = m_
, categoryListType = newListType
}
, Cmd.none
)
view : UiSettings -> Model -> Html Msg
view settings model =
let
catListTypeItem =
Comp.FixedDropdown.Item
model.categoryListType
(Data.ListType.label model.categoryListType)
in
div [] div []
[ div [ Markdown.toHtml [ class "ui basic segment" ]
[ class "field" """
Auto-tagging works by learning from existing documents. The more
documents you have correctly tagged, the better. Learning is done
periodically based on a schedule. You can specify tag-groups that
should either be used (whitelist) or not used (blacklist) for
learning.
Use an empty whitelist to disable auto tagging.
"""
, div [ class "field" ]
[ label [] [ text "Is the following a blacklist or whitelist?" ]
, Html.map CategoryListTypeMsg
(Comp.FixedDropdown.view (Just catListTypeItem) model.categoryListTypeModel)
] ]
[ div [ class "ui checkbox" ] , div [ class "field" ]
[ input [ label [] [ text "Choose tag categories for learning" ]
[ type_ "checkbox" , Html.map CategoryListMsg
, onCheck (\_ -> ToggleEnabled) (Comp.Dropdown.view settings model.categoryListModel)
, checked model.enabled
]
[]
, label [] [ text "Enable classification" ]
, span [ class "small-info" ]
[ text "Disable document classification if not needed."
]
]
]
, div [ class "ui basic segment" ]
[ text "Document classification tries to predict a tag for new incoming documents. This "
, text "works by learning from existing documents in order to find common patterns within "
, text "the text. The more documents you have correctly tagged, the better. Learning is done "
, text "periodically based on a schedule and you need to specify a tag-group that should "
, text "be used for learning."
] ]
, Html.map ItemCountMsg , Html.map ItemCountMsg
(Comp.IntField.viewWithInfo (Comp.IntField.viewWithInfo

View File

@ -280,7 +280,7 @@ view flags settings model =
, ( "invisible hidden", not flags.config.showClassificationSettings ) , ( "invisible hidden", not flags.config.showClassificationSettings )
] ]
] ]
[ text "Document Classifier" [ text "Auto-Tagging"
] ]
, div , div
[ classList [ classList
@ -289,13 +289,10 @@ view flags settings model =
] ]
] ]
[ Html.map ClassifierSettingMsg [ Html.map ClassifierSettingMsg
(Comp.ClassifierSettingsForm.view model.classifierModel) (Comp.ClassifierSettingsForm.view settings model.classifierModel)
, div [ class "ui vertical segment" ] , div [ class "ui vertical segment" ]
[ button [ button
[ classList [ class "ui small secondary basic button"
[ ( "ui small secondary basic button", True )
, ( "disabled", not model.classifierModel.enabled )
]
, title "Starts a task to train a classifier" , title "Starts a task to train a classifier"
, onClick StartClassifierTask , onClick StartClassifierTask
] ]

View File

@ -0,0 +1,50 @@
module Data.ListType exposing
( ListType(..)
, all
, fromString
, label
, toString
)
type ListType
= Blacklist
| Whitelist
all : List ListType
all =
[ Blacklist, Whitelist ]
toString : ListType -> String
toString lt =
case lt of
Blacklist ->
"blacklist"
Whitelist ->
"whitelist"
label : ListType -> String
label lt =
case lt of
Blacklist ->
"Blacklist"
Whitelist ->
"Whitelist"
fromString : String -> Maybe ListType
fromString str =
case String.toLower str of
"blacklist" ->
Just Blacklist
"whitelist" ->
Just Whitelist
_ ->
Nothing