Extend guessing tags to all tag categories

This commit is contained in:
Eike Kettner 2021-01-18 13:35:53 +01:00
parent c5778880d9
commit 249f9e6e2a
18 changed files with 384 additions and 168 deletions

View File

@ -11,6 +11,7 @@ import docspell.analysis.classifier
import docspell.analysis.classifier.TextClassifier._ import docspell.analysis.classifier.TextClassifier._
import docspell.analysis.nlp.Properties import docspell.analysis.nlp.Properties
import docspell.common._ import docspell.common._
import docspell.common.syntax.FileSyntax._
import edu.stanford.nlp.classify.ColumnDataClassifier import edu.stanford.nlp.classify.ColumnDataClassifier
@ -28,7 +29,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
.use { dir => .use { dir =>
for { for {
rawData <- writeDataFile(blocker, dir, data) rawData <- writeDataFile(blocker, dir, data)
_ <- logger.info(s"Learning from ${rawData.count} items.") _ <- logger.debug(s"Learning from ${rawData.count} items.")
trainData <- splitData(logger, rawData) trainData <- splitData(logger, rawData)
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m)) scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
sorted = scores.sortBy(-_.score) sorted = scores.sortBy(-_.score)
@ -138,9 +139,9 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
props: Map[String, String] props: Map[String, String]
): Map[String, String] = ): Map[String, String] =
prepend("2.", props) ++ Map( prepend("2.", props) ++ Map(
"trainFile" -> trainData.train.normalize().toAbsolutePath().toString(), "trainFile" -> trainData.train.absolutePathAsString,
"testFile" -> trainData.test.normalize().toAbsolutePath().toString(), "testFile" -> trainData.test.absolutePathAsString,
"serializeTo" -> trainData.modelFile.normalize().toAbsolutePath().toString() "serializeTo" -> trainData.modelFile.absolutePathAsString
).toList ).toList
case class RawData(count: Long, file: Path) case class RawData(count: Long, file: Path)

View File

@ -169,7 +169,7 @@ object JoexAppImpl {
.withTask( .withTask(
JobTask.json( JobTask.json(
LearnClassifierArgs.taskName, LearnClassifierArgs.taskName,
LearnClassifierTask[F](cfg.textAnalysis, blocker, analyser), LearnClassifierTask[F](cfg.textAnalysis, analyser),
LearnClassifierTask.onCancel[F] LearnClassifierTask.onCancel[F]
) )
) )

View File

@ -0,0 +1,45 @@
package docspell.joex.learn
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common.Ident
import docspell.store.records.{RClassifierModel, RTag}
import doobie._
final class ClassifierName(val name: String) extends AnyVal
object ClassifierName {
def apply(name: String): ClassifierName =
new ClassifierName(name)
val noCategory: ClassifierName =
apply("__docspell_no_category__")
val categoryPrefix = "tagcategory-"
def tagCategory(cat: String): ClassifierName =
apply(s"${categoryPrefix}${cat}")
val concernedPerson: ClassifierName =
apply("concernedperson")
val concernedEquip: ClassifierName =
apply("concernedequip")
val correspondentOrg: ClassifierName =
apply("correspondentorg")
val correspondentPerson: ClassifierName =
apply("correspondentperson")
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
for {
categories <- RTag.listCategories(coll, noCategory.name)
models <- NonEmptyList.fromList(categories) match {
case Some(nel) =>
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
case None =>
List.empty[RClassifierModel].pure[ConnectionIO]
}
} yield models
}

View File

@ -4,23 +4,16 @@ import cats.data.Kleisli
import cats.data.OptionT import cats.data.OptionT
import cats.effect._ import cats.effect._
import cats.implicits._ import cats.implicits._
import fs2.{Pipe, Stream}
import docspell.analysis.TextAnalyser import docspell.analysis.TextAnalyser
import docspell.analysis.classifier.ClassifierModel
import docspell.analysis.classifier.TextClassifier.Data
import docspell.backend.ops.OCollective import docspell.backend.ops.OCollective
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.scheduler._ import docspell.joex.scheduler._
import docspell.store.queries.QItem import docspell.store.records.{RClassifierSetting, RTag}
import docspell.store.records.RClassifierSetting
import bitpeace.MimetypeHint
object LearnClassifierTask { object LearnClassifierTask {
val noClass = "__NONE__"
val pageSep = " --n-- " val pageSep = " --n-- "
val noClass = "__NONE__"
type Args = LearnClassifierArgs type Args = LearnClassifierArgs
@ -29,67 +22,53 @@ object LearnClassifierTask {
def apply[F[_]: Sync: ContextShift]( def apply[F[_]: Sync: ContextShift](
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
blocker: Blocker,
analyser: TextAnalyser[F] analyser: TextAnalyser[F]
): Task[F, Args, Unit] = ): Task[F, Args, Unit] =
Task { ctx => Task { ctx =>
(for { (for {
sett <- findActiveSettings[F](ctx, cfg) sett <- findActiveSettings[F](ctx, cfg)
data = selectItems( maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
ctx,
math.min(cfg.classification.itemCount, sett.itemCount).toLong,
sett.category.getOrElse("")
)
_ <- OptionT.liftF( _ <- OptionT.liftF(
analyser.classifier learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
.trainClassifier[Unit](ctx.logger, data)(Kleisli(handleModel(ctx, blocker)))
) )
} yield ()) } yield ())
.getOrElseF(logInactiveWarning(ctx.logger)) .getOrElseF(logInactiveWarning(ctx.logger))
} }
private def handleModel[F[_]: Sync: ContextShift]( def learnTagCategory[F[_]: Sync: ContextShift, A](
ctx: Context[F, Args], analyser: TextAnalyser[F],
blocker: Blocker collective: Ident,
)(trainedModel: ClassifierModel): F[Unit] = maxItems: Int
for { )(
oldFile <- ctx.store.transact(
RClassifierSetting.findById(ctx.args.collective).map(_.flatMap(_.fileId))
)
_ <- ctx.logger.info("Storing new trained model")
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
newFile <-
ctx.store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
_ <- ctx.store.transact(
RClassifierSetting.updateFile(ctx.args.collective, Ident.unsafe(newFile.id))
)
_ <- ctx.logger.debug(s"New model stored at file ${newFile.id}")
_ <- oldFile match {
case Some(fid) =>
ctx.logger.debug(s"Deleting old model file ${fid.id}") *>
ctx.store.bitpeace.delete(fid.id).compile.drain
case None => ().pure[F]
}
} yield ()
private def selectItems[F[_]](
ctx: Context[F, Args],
max: Long,
category: String category: String
): Stream[F, Data] = { ): Task[F, A, Unit] =
val connStream = Task { ctx =>
for { val data = SelectItems.forCategory(ctx, collective)(maxItems, category)
item <- QItem.findAllNewesFirst(ctx.args.collective, 10).through(restrictTo(max)) ctx.logger.info(s"Learn classifier for tag category: $category") *>
tt <- Stream.eval( analyser.classifier.trainClassifier(ctx.logger, data)(
QItem.resolveTextAndTag(ctx.args.collective, item, category, pageSep) Kleisli(
StoreClassifierModel.handleModel(
ctx,
collective,
ClassifierName.tagCategory(category)
)
)
) )
} yield Data(tt.tag.map(_.name).getOrElse(noClass), item.id, tt.text.trim) }
ctx.store.transact(connStream.filter(_.text.nonEmpty))
}
private def restrictTo[F[_], A](max: Long): Pipe[F, A, A] = def learnAllTagCategories[F[_]: Sync: ContextShift, A](analyser: TextAnalyser[F])(
if (max <= 0) identity collective: Ident,
else _.take(max) maxItems: Int
): Task[F, A, Unit] =
Task { ctx =>
for {
cats <- ctx.store.transact(
RTag.listCategories(collective, ClassifierName.noCategory.name)
)
task = learnTagCategory[F, A](analyser, collective, maxItems) _
_ <- cats.map(task).traverse(_.run(ctx))
} yield ()
}
private def findActiveSettings[F[_]: Sync]( private def findActiveSettings[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
@ -98,7 +77,6 @@ object LearnClassifierTask {
if (cfg.classification.enabled) if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective))) OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective)))
.filter(_.enabled) .filter(_.enabled)
.filter(_.category.nonEmpty)
.map(OCollective.Classifier.fromRecord) .map(OCollective.Classifier.fromRecord)
else else
OptionT.none OptionT.none

View File

@ -0,0 +1,39 @@
package docspell.joex.learn
import fs2.Stream
import docspell.analysis.classifier.TextClassifier.Data
import docspell.common._
import docspell.joex.scheduler.Context
import docspell.store.Store
import docspell.store.qb.Batch
import docspell.store.queries.QItem
object SelectItems {
val pageSep = LearnClassifierTask.pageSep
val noClass = LearnClassifierTask.noClass
def forCategory[F[_]](ctx: Context[F, _], collective: Ident)(
max: Int,
category: String
): Stream[F, Data] =
forCategory(ctx.store, collective, max, category)
def forCategory[F[_]](
store: Store[F],
collective: Ident,
max: Int,
category: String
): Stream[F, Data] = {
val limit = if (max <= 0) Batch.all else Batch.limit(max)
val connStream =
for {
item <- QItem.findAllNewesFirst(collective, 10, limit)
tt <- Stream.eval(
QItem.resolveTextAndTag(collective, item, category, pageSep)
)
} yield Data(tt.tag.map(_.name).getOrElse(noClass), item.id, tt.text.trim)
store.transact(connStream.filter(_.text.nonEmpty))
}
}

View File

@ -0,0 +1,53 @@
package docspell.joex.learn
import cats.effect._
import cats.implicits._
import docspell.analysis.classifier.ClassifierModel
import docspell.common._
import docspell.joex.scheduler._
import docspell.store.Store
import docspell.store.records.RClassifierModel
import bitpeace.MimetypeHint
object StoreClassifierModel {
def handleModel[F[_]: Sync: ContextShift](
ctx: Context[F, _],
collective: Ident,
modelName: ClassifierName
)(
trainedModel: ClassifierModel
): F[Unit] =
handleModel(ctx.store, ctx.blocker, ctx.logger)(collective, modelName, trainedModel)
def handleModel[F[_]: Sync: ContextShift](
store: Store[F],
blocker: Blocker,
logger: Logger[F]
)(
collective: Ident,
modelName: ClassifierName,
trainedModel: ClassifierModel
): F[Unit] =
for {
oldFile <- store.transact(
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
)
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
newFile <-
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
_ <- store.transact(
RClassifierModel.updateFile(collective, modelName.name, Ident.unsafe(newFile.id))
)
_ <- logger.debug(s"New model stored at file ${newFile.id}")
_ <- oldFile match {
case Some(fid) =>
logger.debug(s"Deleting old model file ${fid.id}") *>
store.bitpeace.delete(fid.id).compile.drain
case None => ().pure[F]
}
} yield ()
}

View File

@ -9,12 +9,11 @@ import docspell.analysis.{NlpSettings, TextAnalyser}
import docspell.common._ import docspell.common._
import docspell.joex.Config import docspell.joex.Config
import docspell.joex.analysis.RegexNerFile import docspell.joex.analysis.RegexNerFile
import docspell.joex.learn.LearnClassifierTask import docspell.joex.learn.{ClassifierName, LearnClassifierTask}
import docspell.joex.process.ItemData.AttachmentDates import docspell.joex.process.ItemData.AttachmentDates
import docspell.joex.scheduler.Context import docspell.joex.scheduler.Context
import docspell.joex.scheduler.Task import docspell.joex.scheduler.Task
import docspell.store.records.RAttachmentMeta import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
import docspell.store.records.RClassifierSetting
import bitpeace.RangeDef import bitpeace.RangeDef
@ -42,10 +41,13 @@ object TextAnalysis {
e <- s e <- s
_ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}") _ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}")
v = t.toVector v = t.toVector
tag <- predictTag(ctx, cfg, item.metas, analyser.classifier).value classifierEnabled <- getActive(ctx, cfg)
tag <-
if (classifierEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier)
else List.empty[String].pure[F]
} yield item } yield item
.copy(metas = v.map(_._1), dateLabels = v.map(_._2)) .copy(metas = v.map(_._1), dateLabels = v.map(_._2))
.appendTags(tag.toSeq) .appendTags(tag)
} }
def annotateAttachment[F[_]: Sync]( def annotateAttachment[F[_]: Sync](
@ -66,15 +68,29 @@ object TextAnalysis {
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates)) } yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
} }
def predictTags[F[_]: Sync: ContextShift](
ctx: Context[F, Args],
cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F]
): F[List[String]] =
for {
models <- ctx.store.transact(ClassifierName.findTagModels(ctx.args.meta.collective))
_ <- ctx.logger.debug(s"Guessing tags for ${models.size} categories")
tags <- models
.map(_.fileId.some)
.traverse(predictTag(ctx, cfg, metas, classifier))
} yield tags.flatten
def predictTag[F[_]: Sync: ContextShift]( def predictTag[F[_]: Sync: ContextShift](
ctx: Context[F, Args], ctx: Context[F, Args],
cfg: Config.TextAnalysis, cfg: Config.TextAnalysis,
metas: Vector[RAttachmentMeta], metas: Vector[RAttachmentMeta],
classifier: TextClassifier[F] classifier: TextClassifier[F]
): OptionT[F, String] = )(modelFileId: Option[Ident]): F[Option[String]] =
for { (for {
model <- findActiveModel(ctx, cfg) _ <- OptionT.liftF(ctx.logger.info(s"Guessing tag for ${modelFileId}"))
_ <- OptionT.liftF(ctx.logger.info(s"Guessing tag …")) model <- OptionT.fromOption[F](modelFileId)
text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep) text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep)
modelData = modelData =
ctx.store.bitpeace ctx.store.bitpeace
@ -90,20 +106,21 @@ object TextAnalysis {
.flatMap(_ => classifier.classify(ctx.logger, ClassifierModel(modelFile), text)) .flatMap(_ => classifier.classify(ctx.logger, ClassifierModel(modelFile), text))
}).filter(_ != LearnClassifierTask.noClass) }).filter(_ != LearnClassifierTask.noClass)
_ <- OptionT.liftF(ctx.logger.debug(s"Guessed tag: ${cls}")) _ <- OptionT.liftF(ctx.logger.debug(s"Guessed tag: ${cls}"))
} yield cls } yield cls).value
private def findActiveModel[F[_]: Sync]( private def getActive[F[_]: Sync](
ctx: Context[F, Args], ctx: Context[F, Args],
cfg: Config.TextAnalysis cfg: Config.TextAnalysis
): OptionT[F, Ident] = ): F[Boolean] =
(if (cfg.classification.enabled) if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.meta.collective))) ctx.store
.filter(_.enabled) .transact(RClassifierSetting.findById(ctx.args.meta.collective))
.mapFilter(_.fileId) .map(_.exists(_.enabled))
else .flatTap(enabled =>
OptionT.none[F, Ident]).orElse( if (enabled) ().pure[F]
OptionT.liftF(ctx.logger.info("Classification is disabled.")) *> OptionT else ctx.logger.info("Classification is disabled. Check config or settings.")
.none[F, Ident] )
) else
ctx.logger.info("Classification is disabled.") *> false.pure[F]
} }

View File

@ -4856,8 +4856,6 @@ components:
properties: properties:
enabled: enabled:
type: boolean type: boolean
category:
type: string
itemCount: itemCount:
type: integer type: integer
format: int32 format: int32

View File

@ -46,8 +46,7 @@ object CollectiveRoutes {
OCollective.Classifier( OCollective.Classifier(
settings.classifier.enabled, settings.classifier.enabled,
settings.classifier.schedule, settings.classifier.schedule,
settings.classifier.itemCount, settings.classifier.itemCount
settings.classifier.category
) )
) )
) )
@ -65,8 +64,7 @@ object CollectiveRoutes {
c.language, c.language,
c.integrationEnabled, c.integrationEnabled,
ClassifierSetting( ClassifierSetting(
c.classifier.map(_.enabled).getOrElse(false), c.classifier.exists(_.enabled),
c.classifier.flatMap(_.category),
c.classifier.map(_.itemCount).getOrElse(0), c.classifier.map(_.itemCount).getOrElse(0),
c.classifier c.classifier
.map(_.schedule) .map(_.schedule)

View File

@ -0,0 +1,21 @@
CREATE TABLE "classifier_model"(
"id" varchar(254) not null primary key,
"cid" varchar(254) not null,
"name" varchar(254) not null,
"file_id" varchar(254) not null,
"created" timestamp not null,
foreign key ("cid") references "collective"("cid"),
foreign key ("file_id") references "filemeta"("id"),
unique ("cid", "name")
);
insert into "classifier_model"
select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name", "file_id", "created"
from "classifier_setting"
where "file_id" is not null;
alter table "classifier_setting"
drop column "category";
alter table "classifier_setting"
drop column "file_id";

View File

@ -0,0 +1,26 @@
CREATE TABLE `classifier_model`(
`id` varchar(254) not null primary key,
`cid` varchar(254) not null,
`name` varchar(254) not null,
`file_id` varchar(254) not null,
`created` timestamp not null,
foreign key (`cid`) references `collective`(`cid`),
foreign key (`file_id`) references `filemeta`(`id`),
unique (`cid`, `name`)
);
insert into `classifier_model`
select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `file_id`, `created`
from `classifier_setting`
where `file_id` is not null;
alter table `classifier_setting`
drop column `category`;
-- mariadb needs special treatment when dropping a column that is part
-- of an index and foreign key
alter table `classifier_setting`
drop constraint `classifier_setting_ibfk_2`;
alter table `classifier_setting`
drop column `file_id`;

View File

@ -0,0 +1,21 @@
CREATE TABLE "classifier_model"(
"id" varchar(254) not null primary key,
"cid" varchar(254) not null,
"name" varchar(254) not null,
"file_id" varchar(254) not null,
"created" timestamp not null,
foreign key ("cid") references "collective"("cid"),
foreign key ("file_id") references "filemeta"("id"),
unique ("cid", "name")
);
insert into "classifier_model"
select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name", "file_id", "created"
from "classifier_setting"
where "file_id" is not null;
alter table "classifier_setting"
drop column "category";
alter table "classifier_setting"
drop column "file_id";

View File

@ -543,11 +543,14 @@ object QItem {
def findAllNewesFirst( def findAllNewesFirst(
collective: Ident, collective: Ident,
chunkSize: Int chunkSize: Int,
limit: Batch
): Stream[ConnectionIO, Ident] = { ): Stream[ConnectionIO, Ident] = {
val i = RItem.as("i") val i = RItem.as("i")
Select(i.id.s, from(i), i.cid === collective && i.state === ItemState.confirmed) Select(i.id.s, from(i), i.cid === collective && i.state === ItemState.confirmed)
.orderBy(i.created.desc) .orderBy(i.created.desc)
.limit(limit)
.build .build
.query[Ident] .query[Ident]
.streamWithChunkSize(chunkSize) .streamWithChunkSize(chunkSize)

View File

@ -0,0 +1,78 @@
package docspell.store.records
import cats.effect._
import cats.data.NonEmptyList
import cats.implicits._
import docspell.common._
import docspell.store.qb.DSL._
import docspell.store.qb._
import doobie._
import doobie.implicits._
final case class RClassifierModel(
id: Ident,
cid: Ident,
name: String,
fileId: Ident,
created: Timestamp
) {}
object RClassifierModel {
def createNew[F[_]: Sync](
cid: Ident,
name: String,
fileId: Ident
): F[RClassifierModel] =
for {
id <- Ident.randomId[F]
now <- Timestamp.current[F]
} yield RClassifierModel(id, cid, name, fileId, now)
final case class Table(alias: Option[String]) extends TableDef {
val tableName = "classifier_model"
val id = Column[Ident]("id", this)
val cid = Column[Ident]("cid", this)
val name = Column[String]("name", this)
val fileId = Column[Ident]("file_id", this)
val created = Column[Timestamp]("created", this)
val all = NonEmptyList.of[Column[_]](id, cid, name, fileId, created)
}
def as(alias: String): Table =
Table(Some(alias))
val T = Table(None)
def insert(v: RClassifierModel): ConnectionIO[Int] =
DML.insert(
T,
T.all,
fr"${v.id},${v.cid},${v.name},${v.fileId},${v.created}"
)
def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
for {
n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid)))
k <-
if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert)
else 0.pure[ConnectionIO]
} yield n + k
def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
Select(select(T.all), from(T), T.cid === cid && T.name === name).build
.query[RClassifierModel]
.option
def findAllByName(
cid: Ident,
names: NonEmptyList[String]
): ConnectionIO[List[RClassifierModel]] =
Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
.query[RClassifierModel]
.to[List]
}

View File

@ -15,9 +15,7 @@ case class RClassifierSetting(
cid: Ident, cid: Ident,
enabled: Boolean, enabled: Boolean,
schedule: CalEvent, schedule: CalEvent,
category: String,
itemCount: Int, itemCount: Int,
fileId: Option[Ident],
created: Timestamp created: Timestamp
) {} ) {}
@ -28,12 +26,10 @@ object RClassifierSetting {
val cid = Column[Ident]("cid", this) val cid = Column[Ident]("cid", this)
val enabled = Column[Boolean]("enabled", this) val enabled = Column[Boolean]("enabled", this)
val schedule = Column[CalEvent]("schedule", this) val schedule = Column[CalEvent]("schedule", this)
val category = Column[String]("category", this)
val itemCount = Column[Int]("item_count", this) val itemCount = Column[Int]("item_count", this)
val fileId = Column[Ident]("file_id", this)
val created = Column[Timestamp]("created", this) val created = Column[Timestamp]("created", this)
val all = NonEmptyList val all = NonEmptyList
.of[Column[_]](cid, enabled, schedule, category, itemCount, fileId, created) .of[Column[_]](cid, enabled, schedule, itemCount, created)
} }
val T = Table(None) val T = Table(None)
@ -44,7 +40,7 @@ object RClassifierSetting {
DML.insert( DML.insert(
T, T,
T.all, T.all,
fr"${v.cid},${v.enabled},${v.schedule},${v.category},${v.itemCount},${v.fileId},${v.created}" fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}"
) )
def updateAll(v: RClassifierSetting): ConnectionIO[Int] = def updateAll(v: RClassifierSetting): ConnectionIO[Int] =
@ -54,15 +50,10 @@ object RClassifierSetting {
DML.set( DML.set(
T.enabled.setTo(v.enabled), T.enabled.setTo(v.enabled),
T.schedule.setTo(v.schedule), T.schedule.setTo(v.schedule),
T.category.setTo(v.category), T.itemCount.setTo(v.itemCount)
T.itemCount.setTo(v.itemCount),
T.fileId.setTo(v.fileId)
) )
) )
def updateFile(coll: Ident, fid: Ident): ConnectionIO[Int] =
DML.update(T, T.cid === coll, DML.set(T.fileId.setTo(fid)))
def updateSettings(v: RClassifierSetting): ConnectionIO[Int] = def updateSettings(v: RClassifierSetting): ConnectionIO[Int] =
for { for {
n1 <- DML.update( n1 <- DML.update(
@ -71,8 +62,7 @@ object RClassifierSetting {
DML.set( DML.set(
T.enabled.setTo(v.enabled), T.enabled.setTo(v.enabled),
T.schedule.setTo(v.schedule), T.schedule.setTo(v.schedule),
T.itemCount.setTo(v.itemCount), T.itemCount.setTo(v.itemCount)
T.category.setTo(v.category)
) )
) )
n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO] n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO]
@ -89,8 +79,7 @@ object RClassifierSetting {
case class Classifier( case class Classifier(
enabled: Boolean, enabled: Boolean,
schedule: CalEvent, schedule: CalEvent,
itemCount: Int, itemCount: Int
category: Option[String]
) { ) {
def toRecord(coll: Ident, created: Timestamp): RClassifierSetting = def toRecord(coll: Ident, created: Timestamp): RClassifierSetting =
@ -98,15 +87,13 @@ object RClassifierSetting {
coll, coll,
enabled, enabled,
schedule, schedule,
category.getOrElse(""),
itemCount, itemCount,
None,
created created
) )
} }
object Classifier { object Classifier {
def fromRecord(r: RClassifierSetting): Classifier = def fromRecord(r: RClassifierSetting): Classifier =
Classifier(r.enabled, r.schedule, r.itemCount, r.category.some) Classifier(r.enabled, r.schedule, r.itemCount)
} }
} }

View File

@ -89,8 +89,7 @@ object RCollective {
c.integration.s, c.integration.s,
cs.enabled.s, cs.enabled.s,
cs.schedule.s, cs.schedule.s,
cs.itemCount.s, cs.itemCount.s
cs.category.s
), ),
from(c).leftJoin(cs, cs.cid === c.id), from(c).leftJoin(cs, cs.cid === c.id),
c.id === coll c.id === coll

View File

@ -148,6 +148,13 @@ object RTag {
).orderBy(T.name.asc).build.query[RTag].to[List] ).orderBy(T.name.asc).build.query[RTag].to[List]
} }
def listCategories(coll: Ident, fallback: String): ConnectionIO[List[String]] =
Select(
coalesce(T.category.s, lit(fallback)).s,
from(T),
T.cid === coll
).distinct.build.query[String].to[List]
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] = def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
DML.delete(T, T.tid === tagId && T.cid === coll) DML.delete(T, T.tid === tagId && T.cid === coll)
} }

View File

@ -25,8 +25,6 @@ import Util.Tag
type alias Model = type alias Model =
{ enabled : Bool { enabled : Bool
, categoryModel : Comp.FixedDropdown.Model String
, category : Maybe String
, scheduleModel : Comp.CalEventInput.Model , scheduleModel : Comp.CalEventInput.Model
, schedule : Validated CalEvent , schedule : Validated CalEvent
, itemCountModel : Comp.IntField.Model , itemCountModel : Comp.IntField.Model
@ -35,10 +33,8 @@ type alias Model =
type Msg type Msg
= GetTagsResp (Result Http.Error TagList) = ScheduleMsg Comp.CalEventInput.Msg
| ScheduleMsg Comp.CalEventInput.Msg
| ToggleEnabled | ToggleEnabled
| CategoryMsg (Comp.FixedDropdown.Msg String)
| ItemCountMsg Comp.IntField.Msg | ItemCountMsg Comp.IntField.Msg
@ -53,17 +49,12 @@ init flags sett =
Comp.CalEventInput.init flags newSchedule Comp.CalEventInput.init flags newSchedule
in in
( { enabled = sett.enabled ( { enabled = sett.enabled
, categoryModel = Comp.FixedDropdown.initString []
, category = sett.category
, scheduleModel = cem , scheduleModel = cem
, schedule = Data.Validated.Unknown newSchedule , schedule = Data.Validated.Unknown newSchedule
, itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count" , itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count"
, itemCount = Just sett.itemCount , itemCount = Just sett.itemCount
} }
, Cmd.batch , Cmd.map ScheduleMsg cec
[ Api.getTags flags "" GetTagsResp
, Cmd.map ScheduleMsg cec
]
) )
@ -72,7 +63,6 @@ getSettings model =
Data.Validated.map Data.Validated.map
(\sch -> (\sch ->
{ enabled = model.enabled { enabled = model.enabled
, category = model.category
, schedule = , schedule =
Data.CalEvent.makeEvent sch Data.CalEvent.makeEvent sch
, itemCount = Maybe.withDefault 0 model.itemCount , itemCount = Maybe.withDefault 0 model.itemCount
@ -84,27 +74,6 @@ getSettings model =
update : Flags -> Msg -> Model -> ( Model, Cmd Msg ) update : Flags -> Msg -> Model -> ( Model, Cmd Msg )
update flags msg model = update flags msg model =
case msg of case msg of
GetTagsResp (Ok tl) ->
let
categories =
Util.Tag.getCategories tl.items
|> List.sort
in
( { model
| categoryModel = Comp.FixedDropdown.initString categories
, category =
if model.category == Nothing then
List.head categories
else
model.category
}
, Cmd.none
)
GetTagsResp (Err _) ->
( model, Cmd.none )
ScheduleMsg lmsg -> ScheduleMsg lmsg ->
let let
( cm, cc, ce ) = ( cm, cc, ce ) =
@ -126,23 +95,6 @@ update flags msg model =
, Cmd.none , Cmd.none
) )
CategoryMsg lmsg ->
let
( mm, ma ) =
Comp.FixedDropdown.update lmsg model.categoryModel
in
( { model
| categoryModel = mm
, category =
if ma == Nothing then
model.category
else
ma
}
, Cmd.none
)
ItemCountMsg lmsg -> ItemCountMsg lmsg ->
let let
( im, iv ) = ( im, iv ) =
@ -182,13 +134,6 @@ view model =
, text "periodically based on a schedule and you need to specify a tag-group that should " , text "periodically based on a schedule and you need to specify a tag-group that should "
, text "be used for learning." , text "be used for learning."
] ]
, div [ class "field" ]
[ label [] [ text "Category" ]
, Html.map CategoryMsg
(Comp.FixedDropdown.viewString model.category
model.categoryModel
)
]
, Html.map ItemCountMsg , Html.map ItemCountMsg
(Comp.IntField.viewWithInfo (Comp.IntField.viewWithInfo
"The maximum number of items to learn from, order by date newest first. Use 0 to mean all." "The maximum number of items to learn from, order by date newest first. Use 0 to mean all."