diff --git a/build.sbt b/build.sbt index 91016ca0..ccdc73ca 100644 --- a/build.sbt +++ b/build.sbt @@ -131,7 +131,8 @@ val openapiScalaSettings = Seq( case "ident" => field => field.copy(typeDef = TypeDef("Ident", Imports("docspell.common.Ident"))) case "accountid" => - field => field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId"))) + field => + field.copy(typeDef = TypeDef("AccountId", Imports("docspell.common.AccountId"))) case "collectivestate" => field => field.copy(typeDef = @@ -190,6 +191,9 @@ val openapiScalaSettings = Seq( field.copy(typeDef = TypeDef("CustomFieldType", Imports("docspell.common.CustomFieldType")) ) + case "listtype" => + field => + field.copy(typeDef = TypeDef("ListType", Imports("docspell.common.ListType"))) })) ) diff --git a/modules/common/src/main/scala/docspell/common/ListType.scala b/modules/common/src/main/scala/docspell/common/ListType.scala new file mode 100644 index 00000000..d2b29e91 --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/ListType.scala @@ -0,0 +1,33 @@ +package docspell.common + +import cats.data.NonEmptyList + +import io.circe.{Decoder, Encoder} + +sealed trait ListType { self: Product => + def name: String = + productPrefix.toLowerCase +} + +object ListType { + + case object Whitelist extends ListType + val whitelist: ListType = Whitelist + + case object Blacklist extends ListType + val blacklist: ListType = Blacklist + + val all: NonEmptyList[ListType] = NonEmptyList.of(Whitelist, Blacklist) + + def fromString(name: String): Either[String, ListType] = + all.find(_.name.equalsIgnoreCase(name)).toRight(s"Unknown list type: $name") + + def unsafeFromString(name: String): ListType = + fromString(name).fold(sys.error, identity) + + implicit val jsonEncoder: Encoder[ListType] = + Encoder.encodeString.contramap(_.name) + + implicit val jsonDecoder: Decoder[ListType] = + Decoder.decodeString.emap(fromString) +} diff --git a/modules/joex/src/main/scala/docspell/joex/learn/ClassifierName.scala b/modules/joex/src/main/scala/docspell/joex/learn/ClassifierName.scala index d667ff80..0ed2d97e 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/ClassifierName.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/ClassifierName.scala @@ -4,9 +4,7 @@ import cats.data.NonEmptyList import cats.implicits._ import docspell.common.Ident -import docspell.store.qb.DSL._ -import docspell.store.qb._ -import docspell.store.records.{RClassifierModel, RTag} +import docspell.store.records.{RClassifierModel, RClassifierSetting} import doobie._ @@ -16,7 +14,7 @@ object ClassifierName { def apply(name: String): ClassifierName = new ClassifierName(name) - val categoryPrefix = "tagcategory-" + private val categoryPrefix = "tagcategory-" def tagCategory(cat: String): ClassifierName = apply(s"${categoryPrefix}${cat}") @@ -35,7 +33,7 @@ object ClassifierName { def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = for { - categories <- RTag.listCategories(coll) + categories <- RClassifierSetting.getActiveCategories(coll) models <- NonEmptyList.fromList(categories) match { case Some(nel) => RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name)) @@ -44,22 +42,20 @@ object ClassifierName { } } yield models - def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = { - val model = RClassifierModel.as("m") - val tag = RTag.as("t") - val sql = - Select( - select(model.all), - from(model), - model.cid === coll && model.name.notIn( - Select( - select(concat(lit(categoryPrefix), tag.category.s)), - from(tag), - tag.cid === coll && tag.category.isNotNull - ).distinct - ) - ).build - sql.query[RClassifierModel].to[List] - } + def findOrphanTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] = + for { + cats <- RClassifierSetting.getActiveCategories(coll) + allModels = RClassifierModel.findAllByQuery(coll, s"${categoryPrefix}%") + result <- NonEmptyList.fromList(cats) match { + case Some(nel) => + allModels.flatMap(all => + RClassifierModel + .findAllByName(coll, nel.map(tagCategory).map(_.name)) + .map(active => all.diff(active)) + ) + case None => + allModels + } + } yield result } diff --git a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala index 52ee70ac..843ee951 100644 --- a/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala +++ b/modules/joex/src/main/scala/docspell/joex/learn/LearnClassifierTask.scala @@ -10,7 +10,7 @@ import docspell.backend.ops.OCollective import docspell.common._ import docspell.joex.Config import docspell.joex.scheduler._ -import docspell.store.records.{RClassifierModel, RClassifierSetting, RTag} +import docspell.store.records.{RClassifierModel, RClassifierSetting} object LearnClassifierTask { val pageSep = " --n-- " @@ -26,15 +26,23 @@ object LearnClassifierTask { analyser: TextAnalyser[F] ): Task[F, Args, Unit] = Task { ctx => - (for { - sett <- findActiveSettings[F](ctx, cfg) - maxItems = math.min(cfg.classification.itemCount, sett.itemCount) - _ <- OptionT.liftF( - learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx) - ) - _ <- OptionT.liftF(clearObsoleteModels(ctx)) - } yield ()) - .getOrElseF(logInactiveWarning(ctx.logger)) + val learnTags = + for { + sett <- findActiveSettings[F](ctx, cfg) + maxItems = math.min(cfg.classification.itemCount, sett.itemCount) + _ <- OptionT.liftF( + learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx) + ) + } yield () + + // learn classifier models from active tag categories + learnTags.getOrElseF(logInactiveWarning(ctx.logger)) *> + // delete classifier model files for categories that have been removed + clearObsoleteTagModels(ctx) *> + // when tags are deleted, categories may get removed. fix the json array + ctx.store + .transact(RClassifierSetting.fixCategoryList(ctx.args.collective)) + .map(_ => ()) } def learnTagCategory[F[_]: Sync: ContextShift, A]( @@ -64,13 +72,13 @@ object LearnClassifierTask { ): Task[F, A, Unit] = Task { ctx => for { - cats <- ctx.store.transact(RTag.listCategories(collective)) + cats <- ctx.store.transact(RClassifierSetting.getActiveCategories(collective)) task = learnTagCategory[F, A](analyser, collective, maxItems) _ _ <- cats.map(task).traverse(_.run(ctx)) } yield () } - private def clearObsoleteModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = + private def clearObsoleteTagModels[F[_]: Sync](ctx: Context[F, Args]): F[Unit] = for { list <- ctx.store.transact( ClassifierName.findOrphanTagModels(ctx.args.collective) @@ -98,6 +106,6 @@ object LearnClassifierTask { private def logInactiveWarning[F[_]: Sync](logger: Logger[F]): F[Unit] = logger.warn( - "Classification is disabled. Check joex config and the collective settings." + "Auto-tagging is disabled. Check joex config and the collective settings." ) } diff --git a/modules/restapi/src/main/resources/docspell-openapi.yml b/modules/restapi/src/main/resources/docspell-openapi.yml index d32d2352..90ce21ae 100644 --- a/modules/restapi/src/main/resources/docspell-openapi.yml +++ b/modules/restapi/src/main/resources/docspell-openapi.yml @@ -4850,12 +4850,11 @@ components: description: | Settings for learning a document classifier. required: - - enabled - schedule - itemCount + - categoryList + - listType properties: - enabled: - type: boolean itemCount: type: integer format: int32 @@ -4865,6 +4864,16 @@ components: schedule: type: string format: calevent + categoryList: + type: array + items: + type: string + listType: + type: string + format: listtype + enum: + - blacklist + - whitelist SourceList: description: | diff --git a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala index ee868254..663ca46b 100644 --- a/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala +++ b/modules/restserver/src/main/scala/docspell/restserver/routes/CollectiveRoutes.scala @@ -6,7 +6,7 @@ import cats.implicits._ import docspell.backend.BackendApp import docspell.backend.auth.AuthToken import docspell.backend.ops.OCollective -import docspell.common.MakePreviewArgs +import docspell.common.{ListType, MakePreviewArgs} import docspell.restapi.model._ import docspell.restserver.conv.Conversions import docspell.restserver.http4s._ @@ -44,9 +44,10 @@ object CollectiveRoutes { settings.integrationEnabled, Some( OCollective.Classifier( - settings.classifier.enabled, settings.classifier.schedule, - settings.classifier.itemCount + settings.classifier.itemCount, + settings.classifier.categoryList, + settings.classifier.listType ) ) ) @@ -64,11 +65,12 @@ object CollectiveRoutes { c.language, c.integrationEnabled, ClassifierSetting( - c.classifier.exists(_.enabled), c.classifier.map(_.itemCount).getOrElse(0), c.classifier .map(_.schedule) - .getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00")) + .getOrElse(CalEvent.unsafe("*-1/3-01 01:00:00")), + c.classifier.map(_.categories).getOrElse(Nil), + c.classifier.map(_.listType).getOrElse(ListType.whitelist) ) ) ) diff --git a/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql b/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql index 11be9909..d0aab38b 100644 --- a/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql +++ b/modules/store/src/main/resources/db/migration/h2/V1.17.1__classifier_model.sql @@ -14,8 +14,31 @@ select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name from "classifier_setting" where "file_id" is not null; +alter table "classifier_setting" +add column "categories" text; + +alter table "classifier_setting" +add column "category_list_type" varchar(254); + +update "classifier_setting" +set "category_list_type" = 'whitelist'; + +update "classifier_setting" +set "categories" = concat('["', category, '"]') +where category is not null; + +update "classifier_setting" +set "categories" = '[]' +where category is null; + alter table "classifier_setting" drop column "category"; alter table "classifier_setting" drop column "file_id"; + +ALTER TABLE "classifier_setting" +ALTER COLUMN "categories" SET NOT NULL; + +ALTER TABLE "classifier_setting" +ALTER COLUMN "category_list_type" SET NOT NULL; diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql index d6f9da6e..59bec4b2 100644 --- a/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.17.1__classifier_model.sql @@ -14,13 +14,35 @@ select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `f from `classifier_setting` where `file_id` is not null; +alter table `classifier_setting` +add column (`categories` mediumtext); + +alter table `classifier_setting` +add column (`category_list_type` varchar(254)); + +update `classifier_setting` +set `category_list_type` = 'whitelist'; + +update `classifier_setting` +set `categories` = concat('[`', category, '`]') +where category is not null; + +update `classifier_setting` +set `categories` = '[]' +where category is null; + alter table `classifier_setting` drop column `category`; --- mariadb needs special treatment when dropping a column that is part --- of an index and foreign key +-- mariadb requires to drop constraint manually when dropping a column alter table `classifier_setting` drop constraint `classifier_setting_ibfk_2`; alter table `classifier_setting` drop column `file_id`; + +ALTER TABLE `classifier_setting` +MODIFY `categories` mediumtext NOT NULL; + +ALTER TABLE `classifier_setting` +MODIFY `category_list_type` varchar(254) NOT NULL; diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql index 81e327ff..1e44679a 100644 --- a/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.17.1__classifier_model.sql @@ -14,8 +14,31 @@ select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name", from "classifier_setting" where "file_id" is not null; +alter table "classifier_setting" +add column "categories" text; + +alter table "classifier_setting" +add column "category_list_type" varchar(254); + +update "classifier_setting" +set "category_list_type" = 'whitelist'; + +update "classifier_setting" +set "categories" = concat('["', category, '"]') +where category is not null; + +update "classifier_setting" +set "categories" = '[]' +where category is null; + alter table "classifier_setting" drop column "category"; alter table "classifier_setting" drop column "file_id"; + +ALTER TABLE "classifier_setting" +ALTER COLUMN "categories" SET NOT NULL; + +ALTER TABLE "classifier_setting" +ALTER COLUMN "category_list_type" SET NOT NULL; diff --git a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala index cbe3ab0f..db60a19e 100644 --- a/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala +++ b/modules/store/src/main/scala/docspell/store/impl/DoobieMeta.scala @@ -97,6 +97,9 @@ trait DoobieMeta extends EmilDoobieMeta { implicit val metaCustomFieldType: Meta[CustomFieldType] = Meta[String].timap(CustomFieldType.unsafe)(_.name) + + implicit val metaListType: Meta[ListType] = + Meta[String].timap(ListType.unsafeFromString)(_.name) } object DoobieMeta extends DoobieMeta { diff --git a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala index cca0079c..2032e61e 100644 --- a/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala +++ b/modules/store/src/main/scala/docspell/store/records/RClassifierModel.scala @@ -57,7 +57,12 @@ object RClassifierModel { def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] = for { - n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid))) + now <- Timestamp.current[ConnectionIO] + n <- DML.update( + T, + T.cid === coll && T.name === name, + DML.set(T.fileId.setTo(fid), T.created.setTo(now)) + ) k <- if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert) else 0.pure[ConnectionIO] @@ -87,4 +92,11 @@ object RClassifierModel { .query[RClassifierModel] .to[List] + def findAllByQuery( + cid: Ident, + nameQuery: String + ): ConnectionIO[List[RClassifierModel]] = + Select(select(T.all), from(T), T.cid === cid && T.name.like(nameQuery)).build + .query[RClassifierModel] + .to[List] } diff --git a/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala b/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala index fe634161..9c31a5c2 100644 --- a/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala +++ b/modules/store/src/main/scala/docspell/store/records/RClassifierSetting.scala @@ -1,6 +1,6 @@ package docspell.store.records -import cats.data.NonEmptyList +import cats.data.{NonEmptyList, OptionT} import cats.implicits._ import docspell.common._ @@ -13,23 +13,38 @@ import doobie.implicits._ case class RClassifierSetting( cid: Ident, - enabled: Boolean, schedule: CalEvent, itemCount: Int, - created: Timestamp -) {} + created: Timestamp, + categoryList: List[String], + listType: ListType +) { + + def enabled: Boolean = + listType match { + case ListType.Blacklist => + true + case ListType.Whitelist => + categoryList.nonEmpty + } +} object RClassifierSetting { + // the categoryList is stored as a json array + implicit val stringListMeta: Meta[List[String]] = + jsonMeta[List[String]] + final case class Table(alias: Option[String]) extends TableDef { val tableName = "classifier_setting" - val cid = Column[Ident]("cid", this) - val enabled = Column[Boolean]("enabled", this) - val schedule = Column[CalEvent]("schedule", this) - val itemCount = Column[Int]("item_count", this) - val created = Column[Timestamp]("created", this) + val cid = Column[Ident]("cid", this) + val schedule = Column[CalEvent]("schedule", this) + val itemCount = Column[Int]("item_count", this) + val created = Column[Timestamp]("created", this) + val categories = Column[List[String]]("categories", this) + val listType = Column[ListType]("category_list_type", this) val all = NonEmptyList - .of[Column[_]](cid, enabled, schedule, itemCount, created) + .of[Column[_]](cid, schedule, itemCount, created, categories, listType) } val T = Table(None) @@ -40,29 +55,19 @@ object RClassifierSetting { DML.insert( T, T.all, - fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}" + fr"${v.cid},${v.schedule},${v.itemCount},${v.created},${v.categoryList},${v.listType}" ) - def updateAll(v: RClassifierSetting): ConnectionIO[Int] = - DML.update( - T, - T.cid === v.cid, - DML.set( - T.enabled.setTo(v.enabled), - T.schedule.setTo(v.schedule), - T.itemCount.setTo(v.itemCount) - ) - ) - - def updateSettings(v: RClassifierSetting): ConnectionIO[Int] = + def update(v: RClassifierSetting): ConnectionIO[Int] = for { n1 <- DML.update( T, T.cid === v.cid, DML.set( - T.enabled.setTo(v.enabled), T.schedule.setTo(v.schedule), - T.itemCount.setTo(v.itemCount) + T.itemCount.setTo(v.itemCount), + T.categories.setTo(v.categoryList), + T.listType.setTo(v.listType) ) ) n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO] @@ -76,24 +81,62 @@ object RClassifierSetting { def delete(coll: Ident): ConnectionIO[Int] = DML.delete(T, T.cid === coll) + /** Finds tag categories that exist and match the classifier setting. + * If the setting contains a black list, they are removed from the + * existing categories. If it is a whitelist, the intersection is + * returned. + */ + def getActiveCategories(coll: Ident): ConnectionIO[List[String]] = + (for { + sett <- OptionT(findById(coll)) + cats <- OptionT.liftF(RTag.listCategories(coll)) + res = sett.listType match { + case ListType.Blacklist => + cats.diff(sett.categoryList) + case ListType.Whitelist => + sett.categoryList.intersect(cats) + } + } yield res).getOrElse(Nil) + + /** Checks the json array of tag categories and removes those that are not present anymore. */ + def fixCategoryList(coll: Ident): ConnectionIO[Int] = + (for { + sett <- OptionT(findById(coll)) + cats <- OptionT.liftF(RTag.listCategories(coll)) + fixed = sett.categoryList.intersect(cats) + n <- OptionT.liftF( + if (fixed == sett.categoryList) 0.pure[ConnectionIO] + else DML.update(T, T.cid === coll, DML.set(T.categories.setTo(fixed))) + ) + } yield n).getOrElse(0) + case class Classifier( - enabled: Boolean, schedule: CalEvent, - itemCount: Int + itemCount: Int, + categories: List[String], + listType: ListType ) { + def enabled: Boolean = + listType match { + case ListType.Blacklist => + true + case ListType.Whitelist => + categories.nonEmpty + } def toRecord(coll: Ident, created: Timestamp): RClassifierSetting = RClassifierSetting( coll, - enabled, schedule, itemCount, - created + created, + categories, + listType ) } object Classifier { def fromRecord(r: RClassifierSetting): Classifier = - Classifier(r.enabled, r.schedule, r.itemCount) + Classifier(r.schedule, r.itemCount, r.categoryList, r.listType) } } diff --git a/modules/store/src/main/scala/docspell/store/records/RCollective.scala b/modules/store/src/main/scala/docspell/store/records/RCollective.scala index f6114a38..d1a0cb09 100644 --- a/modules/store/src/main/scala/docspell/store/records/RCollective.scala +++ b/modules/store/src/main/scala/docspell/store/records/RCollective.scala @@ -1,6 +1,6 @@ package docspell.store.records -import cats.data.NonEmptyList +import cats.data.{NonEmptyList, OptionT} import fs2.Stream import docspell.common._ @@ -73,13 +73,24 @@ object RCollective { .map(now => settings.classifier.map(_.toRecord(cid, now))) n2 <- cls match { case Some(cr) => - RClassifierSetting.updateSettings(cr) + RClassifierSetting.update(cr) case None => RClassifierSetting.delete(cid) } } yield n1 + n2 - def getSettings(coll: Ident): ConnectionIO[Option[Settings]] = { + // this hides categories that have been deleted in the meantime + // they are finally removed from the json array once the learn classifier task is run + def getSettings(coll: Ident): ConnectionIO[Option[Settings]] = + (for { + sett <- OptionT(getRawSettings(coll)) + prev <- OptionT.fromOption[ConnectionIO](sett.classifier) + cats <- OptionT.liftF(RTag.listCategories(coll)) + next = prev.copy(categories = prev.categories.intersect(cats)) + } yield sett.copy(classifier = Some(next))).value + + private def getRawSettings(coll: Ident): ConnectionIO[Option[Settings]] = { + import RClassifierSetting.stringListMeta val c = RCollective.as("c") val cs = RClassifierSetting.as("cs") @@ -87,9 +98,10 @@ object RCollective { select( c.language.s, c.integration.s, - cs.enabled.s, cs.schedule.s, - cs.itemCount.s + cs.itemCount.s, + cs.categories.s, + cs.listType.s ), from(c).leftJoin(cs, cs.cid === c.id), c.id === coll diff --git a/modules/webapp/src/main/elm/Comp/ClassifierSettingsForm.elm b/modules/webapp/src/main/elm/Comp/ClassifierSettingsForm.elm index 1181e239..579506d6 100644 --- a/modules/webapp/src/main/elm/Comp/ClassifierSettingsForm.elm +++ b/modules/webapp/src/main/elm/Comp/ClassifierSettingsForm.elm @@ -11,31 +11,38 @@ import Api import Api.Model.ClassifierSetting exposing (ClassifierSetting) import Api.Model.TagList exposing (TagList) import Comp.CalEventInput +import Comp.Dropdown import Comp.FixedDropdown import Comp.IntField import Data.CalEvent exposing (CalEvent) import Data.Flags exposing (Flags) +import Data.ListType exposing (ListType) +import Data.UiSettings exposing (UiSettings) import Data.Validated exposing (Validated(..)) import Html exposing (..) import Html.Attributes exposing (..) -import Html.Events exposing (onCheck) import Http +import Markdown import Util.Tag type alias Model = - { enabled : Bool - , scheduleModel : Comp.CalEventInput.Model + { scheduleModel : Comp.CalEventInput.Model , schedule : Validated CalEvent , itemCountModel : Comp.IntField.Model , itemCount : Maybe Int + , categoryListModel : Comp.Dropdown.Model String + , categoryListType : ListType + , categoryListTypeModel : Comp.FixedDropdown.Model ListType } type Msg = ScheduleMsg Comp.CalEventInput.Msg - | ToggleEnabled | ItemCountMsg Comp.IntField.Msg + | GetTagsResp (Result Http.Error TagList) + | CategoryListMsg (Comp.Dropdown.Msg String) + | CategoryListTypeMsg (Comp.FixedDropdown.Msg ListType) init : Flags -> ClassifierSetting -> ( Model, Cmd Msg ) @@ -48,13 +55,41 @@ init flags sett = ( cem, cec ) = Comp.CalEventInput.init flags newSchedule in - ( { enabled = sett.enabled - , scheduleModel = cem + ( { scheduleModel = cem , schedule = Data.Validated.Unknown newSchedule , itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count" , itemCount = Just sett.itemCount + , categoryListModel = + let + mkOption s = + { value = s, text = s, additional = "" } + + minit = + Comp.Dropdown.makeModel + { multiple = True + , searchable = \n -> n > 0 + , makeOption = mkOption + , labelColor = \_ -> \_ -> "grey " + , placeholder = "Choose categories …" + } + + lm = + Comp.Dropdown.SetSelection sett.categoryList + + ( m_, _ ) = + Comp.Dropdown.update lm minit + in + m_ + , categoryListType = + Data.ListType.fromString sett.listType + |> Maybe.withDefault Data.ListType.Whitelist + , categoryListTypeModel = + Comp.FixedDropdown.initMap Data.ListType.label Data.ListType.all } - , Cmd.map ScheduleMsg cec + , Cmd.batch + [ Api.getTags flags "" GetTagsResp + , Cmd.map ScheduleMsg cec + ] ) @@ -62,10 +97,11 @@ getSettings : Model -> Validated ClassifierSetting getSettings model = Data.Validated.map (\sch -> - { enabled = model.enabled - , schedule = + { schedule = Data.CalEvent.makeEvent sch , itemCount = Maybe.withDefault 0 model.itemCount + , listType = Data.ListType.toString model.categoryListType + , categoryList = Comp.Dropdown.getSelected model.categoryListModel } ) model.schedule @@ -74,6 +110,20 @@ getSettings model = update : Flags -> Msg -> Model -> ( Model, Cmd Msg ) update flags msg model = case msg of + GetTagsResp (Ok tl) -> + let + categories = + Util.Tag.getCategories tl.items + |> List.sort + + lm = + Comp.Dropdown.SetOptions categories + in + update flags (CategoryListMsg lm) model + + GetTagsResp (Err _) -> + ( model, Cmd.none ) + ScheduleMsg lmsg -> let ( cm, cc, ce ) = @@ -90,11 +140,6 @@ update flags msg model = , Cmd.map ScheduleMsg cc ) - ToggleEnabled -> - ( { model | enabled = not model.enabled } - , Cmd.none - ) - ItemCountMsg lmsg -> let ( im, iv ) = @@ -107,32 +152,61 @@ update flags msg model = , Cmd.none ) + CategoryListMsg lm -> + let + ( m_, cmd_ ) = + Comp.Dropdown.update lm model.categoryListModel + in + ( { model | categoryListModel = m_ } + , Cmd.map CategoryListMsg cmd_ + ) -view : Model -> Html Msg -view model = + CategoryListTypeMsg lm -> + let + ( m_, sel ) = + Comp.FixedDropdown.update lm model.categoryListTypeModel + + newListType = + Maybe.withDefault model.categoryListType sel + in + ( { model + | categoryListTypeModel = m_ + , categoryListType = newListType + } + , Cmd.none + ) + + +view : UiSettings -> Model -> Html Msg +view settings model = + let + catListTypeItem = + Comp.FixedDropdown.Item + model.categoryListType + (Data.ListType.label model.categoryListType) + in div [] - [ div - [ class "field" + [ Markdown.toHtml [ class "ui basic segment" ] + """ + +Auto-tagging works by learning from existing documents. The more +documents you have correctly tagged, the better. Learning is done +periodically based on a schedule. You can specify tag-groups that +should either be used (whitelist) or not used (blacklist) for +learning. + +Use an empty whitelist to disable auto tagging. + + """ + , div [ class "field" ] + [ label [] [ text "Is the following a blacklist or whitelist?" ] + , Html.map CategoryListTypeMsg + (Comp.FixedDropdown.view (Just catListTypeItem) model.categoryListTypeModel) ] - [ div [ class "ui checkbox" ] - [ input - [ type_ "checkbox" - , onCheck (\_ -> ToggleEnabled) - , checked model.enabled - ] - [] - , label [] [ text "Enable classification" ] - , span [ class "small-info" ] - [ text "Disable document classification if not needed." - ] - ] - ] - , div [ class "ui basic segment" ] - [ text "Document classification tries to predict a tag for new incoming documents. This " - , text "works by learning from existing documents in order to find common patterns within " - , text "the text. The more documents you have correctly tagged, the better. Learning is done " - , text "periodically based on a schedule and you need to specify a tag-group that should " - , text "be used for learning." + , div [ class "field" ] + [ label [] [ text "Choose tag categories for learning" ] + , Html.map CategoryListMsg + (Comp.Dropdown.view settings model.categoryListModel) ] , Html.map ItemCountMsg (Comp.IntField.viewWithInfo diff --git a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm index c73217e5..277e11bd 100644 --- a/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm +++ b/modules/webapp/src/main/elm/Comp/CollectiveSettingsForm.elm @@ -280,7 +280,7 @@ view flags settings model = , ( "invisible hidden", not flags.config.showClassificationSettings ) ] ] - [ text "Document Classifier" + [ text "Auto-Tagging" ] , div [ classList @@ -289,13 +289,10 @@ view flags settings model = ] ] [ Html.map ClassifierSettingMsg - (Comp.ClassifierSettingsForm.view model.classifierModel) + (Comp.ClassifierSettingsForm.view settings model.classifierModel) , div [ class "ui vertical segment" ] [ button - [ classList - [ ( "ui small secondary basic button", True ) - , ( "disabled", not model.classifierModel.enabled ) - ] + [ class "ui small secondary basic button" , title "Starts a task to train a classifier" , onClick StartClassifierTask ] diff --git a/modules/webapp/src/main/elm/Data/ListType.elm b/modules/webapp/src/main/elm/Data/ListType.elm new file mode 100644 index 00000000..8a9a75fb --- /dev/null +++ b/modules/webapp/src/main/elm/Data/ListType.elm @@ -0,0 +1,50 @@ +module Data.ListType exposing + ( ListType(..) + , all + , fromString + , label + , toString + ) + + +type ListType + = Blacklist + | Whitelist + + +all : List ListType +all = + [ Blacklist, Whitelist ] + + +toString : ListType -> String +toString lt = + case lt of + Blacklist -> + "blacklist" + + Whitelist -> + "whitelist" + + +label : ListType -> String +label lt = + case lt of + Blacklist -> + "Blacklist" + + Whitelist -> + "Whitelist" + + +fromString : String -> Maybe ListType +fromString str = + case String.toLower str of + "blacklist" -> + Just Blacklist + + "whitelist" -> + Just Whitelist + + _ -> + Nothing