mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 18:39:33 +00:00
Extend guessing tags to all tag categories
This commit is contained in:
parent
c5778880d9
commit
249f9e6e2a
@ -11,6 +11,7 @@ import docspell.analysis.classifier
|
||||
import docspell.analysis.classifier.TextClassifier._
|
||||
import docspell.analysis.nlp.Properties
|
||||
import docspell.common._
|
||||
import docspell.common.syntax.FileSyntax._
|
||||
|
||||
import edu.stanford.nlp.classify.ColumnDataClassifier
|
||||
|
||||
@ -28,7 +29,7 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
||||
.use { dir =>
|
||||
for {
|
||||
rawData <- writeDataFile(blocker, dir, data)
|
||||
_ <- logger.info(s"Learning from ${rawData.count} items.")
|
||||
_ <- logger.debug(s"Learning from ${rawData.count} items.")
|
||||
trainData <- splitData(logger, rawData)
|
||||
scores <- cfg.classifierConfigs.traverse(m => train(logger, trainData, m))
|
||||
sorted = scores.sortBy(-_.score)
|
||||
@ -138,9 +139,9 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
|
||||
props: Map[String, String]
|
||||
): Map[String, String] =
|
||||
prepend("2.", props) ++ Map(
|
||||
"trainFile" -> trainData.train.normalize().toAbsolutePath().toString(),
|
||||
"testFile" -> trainData.test.normalize().toAbsolutePath().toString(),
|
||||
"serializeTo" -> trainData.modelFile.normalize().toAbsolutePath().toString()
|
||||
"trainFile" -> trainData.train.absolutePathAsString,
|
||||
"testFile" -> trainData.test.absolutePathAsString,
|
||||
"serializeTo" -> trainData.modelFile.absolutePathAsString
|
||||
).toList
|
||||
|
||||
case class RawData(count: Long, file: Path)
|
||||
|
@ -169,7 +169,7 @@ object JoexAppImpl {
|
||||
.withTask(
|
||||
JobTask.json(
|
||||
LearnClassifierArgs.taskName,
|
||||
LearnClassifierTask[F](cfg.textAnalysis, blocker, analyser),
|
||||
LearnClassifierTask[F](cfg.textAnalysis, analyser),
|
||||
LearnClassifierTask.onCancel[F]
|
||||
)
|
||||
)
|
||||
|
@ -0,0 +1,45 @@
|
||||
package docspell.joex.learn
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
import docspell.common.Ident
|
||||
import docspell.store.records.{RClassifierModel, RTag}
|
||||
import doobie._
|
||||
|
||||
final class ClassifierName(val name: String) extends AnyVal
|
||||
|
||||
object ClassifierName {
|
||||
def apply(name: String): ClassifierName =
|
||||
new ClassifierName(name)
|
||||
|
||||
val noCategory: ClassifierName =
|
||||
apply("__docspell_no_category__")
|
||||
|
||||
val categoryPrefix = "tagcategory-"
|
||||
|
||||
def tagCategory(cat: String): ClassifierName =
|
||||
apply(s"${categoryPrefix}${cat}")
|
||||
|
||||
val concernedPerson: ClassifierName =
|
||||
apply("concernedperson")
|
||||
|
||||
val concernedEquip: ClassifierName =
|
||||
apply("concernedequip")
|
||||
|
||||
val correspondentOrg: ClassifierName =
|
||||
apply("correspondentorg")
|
||||
|
||||
val correspondentPerson: ClassifierName =
|
||||
apply("correspondentperson")
|
||||
|
||||
def findTagModels[F[_]](coll: Ident): ConnectionIO[List[RClassifierModel]] =
|
||||
for {
|
||||
categories <- RTag.listCategories(coll, noCategory.name)
|
||||
models <- NonEmptyList.fromList(categories) match {
|
||||
case Some(nel) =>
|
||||
RClassifierModel.findAllByName(coll, nel.map(tagCategory).map(_.name))
|
||||
case None =>
|
||||
List.empty[RClassifierModel].pure[ConnectionIO]
|
||||
}
|
||||
} yield models
|
||||
}
|
@ -4,23 +4,16 @@ import cats.data.Kleisli
|
||||
import cats.data.OptionT
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.{Pipe, Stream}
|
||||
|
||||
import docspell.analysis.TextAnalyser
|
||||
import docspell.analysis.classifier.ClassifierModel
|
||||
import docspell.analysis.classifier.TextClassifier.Data
|
||||
import docspell.backend.ops.OCollective
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.queries.QItem
|
||||
import docspell.store.records.RClassifierSetting
|
||||
|
||||
import bitpeace.MimetypeHint
|
||||
import docspell.store.records.{RClassifierSetting, RTag}
|
||||
|
||||
object LearnClassifierTask {
|
||||
val noClass = "__NONE__"
|
||||
val pageSep = " --n-- "
|
||||
val noClass = "__NONE__"
|
||||
|
||||
type Args = LearnClassifierArgs
|
||||
|
||||
@ -29,67 +22,53 @@ object LearnClassifierTask {
|
||||
|
||||
def apply[F[_]: Sync: ContextShift](
|
||||
cfg: Config.TextAnalysis,
|
||||
blocker: Blocker,
|
||||
analyser: TextAnalyser[F]
|
||||
): Task[F, Args, Unit] =
|
||||
Task { ctx =>
|
||||
(for {
|
||||
sett <- findActiveSettings[F](ctx, cfg)
|
||||
data = selectItems(
|
||||
ctx,
|
||||
math.min(cfg.classification.itemCount, sett.itemCount).toLong,
|
||||
sett.category.getOrElse("")
|
||||
)
|
||||
maxItems = math.min(cfg.classification.itemCount, sett.itemCount)
|
||||
_ <- OptionT.liftF(
|
||||
analyser.classifier
|
||||
.trainClassifier[Unit](ctx.logger, data)(Kleisli(handleModel(ctx, blocker)))
|
||||
learnAllTagCategories(analyser)(ctx.args.collective, maxItems).run(ctx)
|
||||
)
|
||||
} yield ())
|
||||
.getOrElseF(logInactiveWarning(ctx.logger))
|
||||
}
|
||||
|
||||
private def handleModel[F[_]: Sync: ContextShift](
|
||||
ctx: Context[F, Args],
|
||||
blocker: Blocker
|
||||
)(trainedModel: ClassifierModel): F[Unit] =
|
||||
for {
|
||||
oldFile <- ctx.store.transact(
|
||||
RClassifierSetting.findById(ctx.args.collective).map(_.flatMap(_.fileId))
|
||||
)
|
||||
_ <- ctx.logger.info("Storing new trained model")
|
||||
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
|
||||
newFile <-
|
||||
ctx.store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
||||
_ <- ctx.store.transact(
|
||||
RClassifierSetting.updateFile(ctx.args.collective, Ident.unsafe(newFile.id))
|
||||
)
|
||||
_ <- ctx.logger.debug(s"New model stored at file ${newFile.id}")
|
||||
_ <- oldFile match {
|
||||
case Some(fid) =>
|
||||
ctx.logger.debug(s"Deleting old model file ${fid.id}") *>
|
||||
ctx.store.bitpeace.delete(fid.id).compile.drain
|
||||
case None => ().pure[F]
|
||||
}
|
||||
} yield ()
|
||||
|
||||
private def selectItems[F[_]](
|
||||
ctx: Context[F, Args],
|
||||
max: Long,
|
||||
def learnTagCategory[F[_]: Sync: ContextShift, A](
|
||||
analyser: TextAnalyser[F],
|
||||
collective: Ident,
|
||||
maxItems: Int
|
||||
)(
|
||||
category: String
|
||||
): Stream[F, Data] = {
|
||||
val connStream =
|
||||
for {
|
||||
item <- QItem.findAllNewesFirst(ctx.args.collective, 10).through(restrictTo(max))
|
||||
tt <- Stream.eval(
|
||||
QItem.resolveTextAndTag(ctx.args.collective, item, category, pageSep)
|
||||
): Task[F, A, Unit] =
|
||||
Task { ctx =>
|
||||
val data = SelectItems.forCategory(ctx, collective)(maxItems, category)
|
||||
ctx.logger.info(s"Learn classifier for tag category: $category") *>
|
||||
analyser.classifier.trainClassifier(ctx.logger, data)(
|
||||
Kleisli(
|
||||
StoreClassifierModel.handleModel(
|
||||
ctx,
|
||||
collective,
|
||||
ClassifierName.tagCategory(category)
|
||||
)
|
||||
)
|
||||
)
|
||||
} yield Data(tt.tag.map(_.name).getOrElse(noClass), item.id, tt.text.trim)
|
||||
ctx.store.transact(connStream.filter(_.text.nonEmpty))
|
||||
}
|
||||
}
|
||||
|
||||
private def restrictTo[F[_], A](max: Long): Pipe[F, A, A] =
|
||||
if (max <= 0) identity
|
||||
else _.take(max)
|
||||
def learnAllTagCategories[F[_]: Sync: ContextShift, A](analyser: TextAnalyser[F])(
|
||||
collective: Ident,
|
||||
maxItems: Int
|
||||
): Task[F, A, Unit] =
|
||||
Task { ctx =>
|
||||
for {
|
||||
cats <- ctx.store.transact(
|
||||
RTag.listCategories(collective, ClassifierName.noCategory.name)
|
||||
)
|
||||
task = learnTagCategory[F, A](analyser, collective, maxItems) _
|
||||
_ <- cats.map(task).traverse(_.run(ctx))
|
||||
} yield ()
|
||||
}
|
||||
|
||||
private def findActiveSettings[F[_]: Sync](
|
||||
ctx: Context[F, Args],
|
||||
@ -98,7 +77,6 @@ object LearnClassifierTask {
|
||||
if (cfg.classification.enabled)
|
||||
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.collective)))
|
||||
.filter(_.enabled)
|
||||
.filter(_.category.nonEmpty)
|
||||
.map(OCollective.Classifier.fromRecord)
|
||||
else
|
||||
OptionT.none
|
||||
|
@ -0,0 +1,39 @@
|
||||
package docspell.joex.learn
|
||||
|
||||
import fs2.Stream
|
||||
|
||||
import docspell.analysis.classifier.TextClassifier.Data
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.store.Store
|
||||
import docspell.store.qb.Batch
|
||||
import docspell.store.queries.QItem
|
||||
|
||||
object SelectItems {
|
||||
val pageSep = LearnClassifierTask.pageSep
|
||||
val noClass = LearnClassifierTask.noClass
|
||||
|
||||
def forCategory[F[_]](ctx: Context[F, _], collective: Ident)(
|
||||
max: Int,
|
||||
category: String
|
||||
): Stream[F, Data] =
|
||||
forCategory(ctx.store, collective, max, category)
|
||||
|
||||
def forCategory[F[_]](
|
||||
store: Store[F],
|
||||
collective: Ident,
|
||||
max: Int,
|
||||
category: String
|
||||
): Stream[F, Data] = {
|
||||
val limit = if (max <= 0) Batch.all else Batch.limit(max)
|
||||
val connStream =
|
||||
for {
|
||||
item <- QItem.findAllNewesFirst(collective, 10, limit)
|
||||
tt <- Stream.eval(
|
||||
QItem.resolveTextAndTag(collective, item, category, pageSep)
|
||||
)
|
||||
} yield Data(tt.tag.map(_.name).getOrElse(noClass), item.id, tt.text.trim)
|
||||
store.transact(connStream.filter(_.text.nonEmpty))
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package docspell.joex.learn
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.analysis.classifier.ClassifierModel
|
||||
import docspell.common._
|
||||
import docspell.joex.scheduler._
|
||||
import docspell.store.Store
|
||||
import docspell.store.records.RClassifierModel
|
||||
|
||||
import bitpeace.MimetypeHint
|
||||
|
||||
object StoreClassifierModel {
|
||||
|
||||
def handleModel[F[_]: Sync: ContextShift](
|
||||
ctx: Context[F, _],
|
||||
collective: Ident,
|
||||
modelName: ClassifierName
|
||||
)(
|
||||
trainedModel: ClassifierModel
|
||||
): F[Unit] =
|
||||
handleModel(ctx.store, ctx.blocker, ctx.logger)(collective, modelName, trainedModel)
|
||||
|
||||
def handleModel[F[_]: Sync: ContextShift](
|
||||
store: Store[F],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
)(
|
||||
collective: Ident,
|
||||
modelName: ClassifierName,
|
||||
trainedModel: ClassifierModel
|
||||
): F[Unit] =
|
||||
for {
|
||||
oldFile <- store.transact(
|
||||
RClassifierModel.findByName(collective, modelName.name).map(_.map(_.fileId))
|
||||
)
|
||||
_ <- logger.debug(s"Storing new trained model for: ${modelName.name}")
|
||||
fileData = fs2.io.file.readAll(trainedModel.model, blocker, 4096)
|
||||
newFile <-
|
||||
store.bitpeace.saveNew(fileData, 4096, MimetypeHint.none).compile.lastOrError
|
||||
_ <- store.transact(
|
||||
RClassifierModel.updateFile(collective, modelName.name, Ident.unsafe(newFile.id))
|
||||
)
|
||||
_ <- logger.debug(s"New model stored at file ${newFile.id}")
|
||||
_ <- oldFile match {
|
||||
case Some(fid) =>
|
||||
logger.debug(s"Deleting old model file ${fid.id}") *>
|
||||
store.bitpeace.delete(fid.id).compile.drain
|
||||
case None => ().pure[F]
|
||||
}
|
||||
} yield ()
|
||||
}
|
@ -9,12 +9,11 @@ import docspell.analysis.{NlpSettings, TextAnalyser}
|
||||
import docspell.common._
|
||||
import docspell.joex.Config
|
||||
import docspell.joex.analysis.RegexNerFile
|
||||
import docspell.joex.learn.LearnClassifierTask
|
||||
import docspell.joex.learn.{ClassifierName, LearnClassifierTask}
|
||||
import docspell.joex.process.ItemData.AttachmentDates
|
||||
import docspell.joex.scheduler.Context
|
||||
import docspell.joex.scheduler.Task
|
||||
import docspell.store.records.RAttachmentMeta
|
||||
import docspell.store.records.RClassifierSetting
|
||||
import docspell.store.records.{RAttachmentMeta, RClassifierSetting}
|
||||
|
||||
import bitpeace.RangeDef
|
||||
|
||||
@ -42,10 +41,13 @@ object TextAnalysis {
|
||||
e <- s
|
||||
_ <- ctx.logger.info(s"Text-Analysis finished in ${e.formatExact}")
|
||||
v = t.toVector
|
||||
tag <- predictTag(ctx, cfg, item.metas, analyser.classifier).value
|
||||
classifierEnabled <- getActive(ctx, cfg)
|
||||
tag <-
|
||||
if (classifierEnabled) predictTags(ctx, cfg, item.metas, analyser.classifier)
|
||||
else List.empty[String].pure[F]
|
||||
} yield item
|
||||
.copy(metas = v.map(_._1), dateLabels = v.map(_._2))
|
||||
.appendTags(tag.toSeq)
|
||||
.appendTags(tag)
|
||||
}
|
||||
|
||||
def annotateAttachment[F[_]: Sync](
|
||||
@ -66,15 +68,29 @@ object TextAnalysis {
|
||||
} yield (rm.copy(nerlabels = labels.all.toList), AttachmentDates(rm, labels.dates))
|
||||
}
|
||||
|
||||
def predictTags[F[_]: Sync: ContextShift](
|
||||
ctx: Context[F, Args],
|
||||
cfg: Config.TextAnalysis,
|
||||
metas: Vector[RAttachmentMeta],
|
||||
classifier: TextClassifier[F]
|
||||
): F[List[String]] =
|
||||
for {
|
||||
models <- ctx.store.transact(ClassifierName.findTagModels(ctx.args.meta.collective))
|
||||
_ <- ctx.logger.debug(s"Guessing tags for ${models.size} categories")
|
||||
tags <- models
|
||||
.map(_.fileId.some)
|
||||
.traverse(predictTag(ctx, cfg, metas, classifier))
|
||||
} yield tags.flatten
|
||||
|
||||
def predictTag[F[_]: Sync: ContextShift](
|
||||
ctx: Context[F, Args],
|
||||
cfg: Config.TextAnalysis,
|
||||
metas: Vector[RAttachmentMeta],
|
||||
classifier: TextClassifier[F]
|
||||
): OptionT[F, String] =
|
||||
for {
|
||||
model <- findActiveModel(ctx, cfg)
|
||||
_ <- OptionT.liftF(ctx.logger.info(s"Guessing tag …"))
|
||||
)(modelFileId: Option[Ident]): F[Option[String]] =
|
||||
(for {
|
||||
_ <- OptionT.liftF(ctx.logger.info(s"Guessing tag for ${modelFileId} …"))
|
||||
model <- OptionT.fromOption[F](modelFileId)
|
||||
text = metas.flatMap(_.content).mkString(LearnClassifierTask.pageSep)
|
||||
modelData =
|
||||
ctx.store.bitpeace
|
||||
@ -90,20 +106,21 @@ object TextAnalysis {
|
||||
.flatMap(_ => classifier.classify(ctx.logger, ClassifierModel(modelFile), text))
|
||||
}).filter(_ != LearnClassifierTask.noClass)
|
||||
_ <- OptionT.liftF(ctx.logger.debug(s"Guessed tag: ${cls}"))
|
||||
} yield cls
|
||||
} yield cls).value
|
||||
|
||||
private def findActiveModel[F[_]: Sync](
|
||||
private def getActive[F[_]: Sync](
|
||||
ctx: Context[F, Args],
|
||||
cfg: Config.TextAnalysis
|
||||
): OptionT[F, Ident] =
|
||||
(if (cfg.classification.enabled)
|
||||
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.meta.collective)))
|
||||
.filter(_.enabled)
|
||||
.mapFilter(_.fileId)
|
||||
else
|
||||
OptionT.none[F, Ident]).orElse(
|
||||
OptionT.liftF(ctx.logger.info("Classification is disabled.")) *> OptionT
|
||||
.none[F, Ident]
|
||||
)
|
||||
): F[Boolean] =
|
||||
if (cfg.classification.enabled)
|
||||
ctx.store
|
||||
.transact(RClassifierSetting.findById(ctx.args.meta.collective))
|
||||
.map(_.exists(_.enabled))
|
||||
.flatTap(enabled =>
|
||||
if (enabled) ().pure[F]
|
||||
else ctx.logger.info("Classification is disabled. Check config or settings.")
|
||||
)
|
||||
else
|
||||
ctx.logger.info("Classification is disabled.") *> false.pure[F]
|
||||
|
||||
}
|
||||
|
@ -4856,8 +4856,6 @@ components:
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
category:
|
||||
type: string
|
||||
itemCount:
|
||||
type: integer
|
||||
format: int32
|
||||
|
@ -46,8 +46,7 @@ object CollectiveRoutes {
|
||||
OCollective.Classifier(
|
||||
settings.classifier.enabled,
|
||||
settings.classifier.schedule,
|
||||
settings.classifier.itemCount,
|
||||
settings.classifier.category
|
||||
settings.classifier.itemCount
|
||||
)
|
||||
)
|
||||
)
|
||||
@ -65,8 +64,7 @@ object CollectiveRoutes {
|
||||
c.language,
|
||||
c.integrationEnabled,
|
||||
ClassifierSetting(
|
||||
c.classifier.map(_.enabled).getOrElse(false),
|
||||
c.classifier.flatMap(_.category),
|
||||
c.classifier.exists(_.enabled),
|
||||
c.classifier.map(_.itemCount).getOrElse(0),
|
||||
c.classifier
|
||||
.map(_.schedule)
|
||||
|
@ -0,0 +1,21 @@
|
||||
CREATE TABLE "classifier_model"(
|
||||
"id" varchar(254) not null primary key,
|
||||
"cid" varchar(254) not null,
|
||||
"name" varchar(254) not null,
|
||||
"file_id" varchar(254) not null,
|
||||
"created" timestamp not null,
|
||||
foreign key ("cid") references "collective"("cid"),
|
||||
foreign key ("file_id") references "filemeta"("id"),
|
||||
unique ("cid", "name")
|
||||
);
|
||||
|
||||
insert into "classifier_model"
|
||||
select random_uuid() as "id", "cid", concat('tagcategory-', "category") as "name", "file_id", "created"
|
||||
from "classifier_setting"
|
||||
where "file_id" is not null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "category";
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "file_id";
|
@ -0,0 +1,26 @@
|
||||
CREATE TABLE `classifier_model`(
|
||||
`id` varchar(254) not null primary key,
|
||||
`cid` varchar(254) not null,
|
||||
`name` varchar(254) not null,
|
||||
`file_id` varchar(254) not null,
|
||||
`created` timestamp not null,
|
||||
foreign key (`cid`) references `collective`(`cid`),
|
||||
foreign key (`file_id`) references `filemeta`(`id`),
|
||||
unique (`cid`, `name`)
|
||||
);
|
||||
|
||||
insert into `classifier_model`
|
||||
select md5(rand()) as id, `cid`,concat('tagcategory-', `category`) as `name`, `file_id`, `created`
|
||||
from `classifier_setting`
|
||||
where `file_id` is not null;
|
||||
|
||||
alter table `classifier_setting`
|
||||
drop column `category`;
|
||||
|
||||
-- mariadb needs special treatment when dropping a column that is part
|
||||
-- of an index and foreign key
|
||||
alter table `classifier_setting`
|
||||
drop constraint `classifier_setting_ibfk_2`;
|
||||
|
||||
alter table `classifier_setting`
|
||||
drop column `file_id`;
|
@ -0,0 +1,21 @@
|
||||
CREATE TABLE "classifier_model"(
|
||||
"id" varchar(254) not null primary key,
|
||||
"cid" varchar(254) not null,
|
||||
"name" varchar(254) not null,
|
||||
"file_id" varchar(254) not null,
|
||||
"created" timestamp not null,
|
||||
foreign key ("cid") references "collective"("cid"),
|
||||
foreign key ("file_id") references "filemeta"("id"),
|
||||
unique ("cid", "name")
|
||||
);
|
||||
|
||||
insert into "classifier_model"
|
||||
select md5(random()::text) as id, "cid",'tagcategory-' || "category" as "name", "file_id", "created"
|
||||
from "classifier_setting"
|
||||
where "file_id" is not null;
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "category";
|
||||
|
||||
alter table "classifier_setting"
|
||||
drop column "file_id";
|
@ -543,11 +543,14 @@ object QItem {
|
||||
|
||||
def findAllNewesFirst(
|
||||
collective: Ident,
|
||||
chunkSize: Int
|
||||
chunkSize: Int,
|
||||
limit: Batch
|
||||
): Stream[ConnectionIO, Ident] = {
|
||||
|
||||
val i = RItem.as("i")
|
||||
Select(i.id.s, from(i), i.cid === collective && i.state === ItemState.confirmed)
|
||||
.orderBy(i.created.desc)
|
||||
.limit(limit)
|
||||
.build
|
||||
.query[Ident]
|
||||
.streamWithChunkSize(chunkSize)
|
||||
|
@ -0,0 +1,78 @@
|
||||
package docspell.store.records
|
||||
|
||||
import cats.effect._
|
||||
import cats.data.NonEmptyList
|
||||
import cats.implicits._
|
||||
|
||||
import docspell.common._
|
||||
import docspell.store.qb.DSL._
|
||||
import docspell.store.qb._
|
||||
|
||||
import doobie._
|
||||
import doobie.implicits._
|
||||
|
||||
final case class RClassifierModel(
|
||||
id: Ident,
|
||||
cid: Ident,
|
||||
name: String,
|
||||
fileId: Ident,
|
||||
created: Timestamp
|
||||
) {}
|
||||
|
||||
object RClassifierModel {
|
||||
|
||||
def createNew[F[_]: Sync](
|
||||
cid: Ident,
|
||||
name: String,
|
||||
fileId: Ident
|
||||
): F[RClassifierModel] =
|
||||
for {
|
||||
id <- Ident.randomId[F]
|
||||
now <- Timestamp.current[F]
|
||||
} yield RClassifierModel(id, cid, name, fileId, now)
|
||||
|
||||
final case class Table(alias: Option[String]) extends TableDef {
|
||||
val tableName = "classifier_model"
|
||||
|
||||
val id = Column[Ident]("id", this)
|
||||
val cid = Column[Ident]("cid", this)
|
||||
val name = Column[String]("name", this)
|
||||
val fileId = Column[Ident]("file_id", this)
|
||||
val created = Column[Timestamp]("created", this)
|
||||
|
||||
val all = NonEmptyList.of[Column[_]](id, cid, name, fileId, created)
|
||||
}
|
||||
|
||||
def as(alias: String): Table =
|
||||
Table(Some(alias))
|
||||
|
||||
val T = Table(None)
|
||||
|
||||
def insert(v: RClassifierModel): ConnectionIO[Int] =
|
||||
DML.insert(
|
||||
T,
|
||||
T.all,
|
||||
fr"${v.id},${v.cid},${v.name},${v.fileId},${v.created}"
|
||||
)
|
||||
|
||||
def updateFile(coll: Ident, name: String, fid: Ident): ConnectionIO[Int] =
|
||||
for {
|
||||
n <- DML.update(T, T.cid === coll && T.name === name, DML.set(T.fileId.setTo(fid)))
|
||||
k <-
|
||||
if (n == 0) createNew[ConnectionIO](coll, name, fid).flatMap(insert)
|
||||
else 0.pure[ConnectionIO]
|
||||
} yield n + k
|
||||
|
||||
def findByName(cid: Ident, name: String): ConnectionIO[Option[RClassifierModel]] =
|
||||
Select(select(T.all), from(T), T.cid === cid && T.name === name).build
|
||||
.query[RClassifierModel]
|
||||
.option
|
||||
|
||||
def findAllByName(
|
||||
cid: Ident,
|
||||
names: NonEmptyList[String]
|
||||
): ConnectionIO[List[RClassifierModel]] =
|
||||
Select(select(T.all), from(T), T.cid === cid && T.name.in(names)).build
|
||||
.query[RClassifierModel]
|
||||
.to[List]
|
||||
}
|
@ -15,9 +15,7 @@ case class RClassifierSetting(
|
||||
cid: Ident,
|
||||
enabled: Boolean,
|
||||
schedule: CalEvent,
|
||||
category: String,
|
||||
itemCount: Int,
|
||||
fileId: Option[Ident],
|
||||
created: Timestamp
|
||||
) {}
|
||||
|
||||
@ -28,12 +26,10 @@ object RClassifierSetting {
|
||||
val cid = Column[Ident]("cid", this)
|
||||
val enabled = Column[Boolean]("enabled", this)
|
||||
val schedule = Column[CalEvent]("schedule", this)
|
||||
val category = Column[String]("category", this)
|
||||
val itemCount = Column[Int]("item_count", this)
|
||||
val fileId = Column[Ident]("file_id", this)
|
||||
val created = Column[Timestamp]("created", this)
|
||||
val all = NonEmptyList
|
||||
.of[Column[_]](cid, enabled, schedule, category, itemCount, fileId, created)
|
||||
.of[Column[_]](cid, enabled, schedule, itemCount, created)
|
||||
}
|
||||
|
||||
val T = Table(None)
|
||||
@ -44,7 +40,7 @@ object RClassifierSetting {
|
||||
DML.insert(
|
||||
T,
|
||||
T.all,
|
||||
fr"${v.cid},${v.enabled},${v.schedule},${v.category},${v.itemCount},${v.fileId},${v.created}"
|
||||
fr"${v.cid},${v.enabled},${v.schedule},${v.itemCount},${v.created}"
|
||||
)
|
||||
|
||||
def updateAll(v: RClassifierSetting): ConnectionIO[Int] =
|
||||
@ -54,15 +50,10 @@ object RClassifierSetting {
|
||||
DML.set(
|
||||
T.enabled.setTo(v.enabled),
|
||||
T.schedule.setTo(v.schedule),
|
||||
T.category.setTo(v.category),
|
||||
T.itemCount.setTo(v.itemCount),
|
||||
T.fileId.setTo(v.fileId)
|
||||
T.itemCount.setTo(v.itemCount)
|
||||
)
|
||||
)
|
||||
|
||||
def updateFile(coll: Ident, fid: Ident): ConnectionIO[Int] =
|
||||
DML.update(T, T.cid === coll, DML.set(T.fileId.setTo(fid)))
|
||||
|
||||
def updateSettings(v: RClassifierSetting): ConnectionIO[Int] =
|
||||
for {
|
||||
n1 <- DML.update(
|
||||
@ -71,8 +62,7 @@ object RClassifierSetting {
|
||||
DML.set(
|
||||
T.enabled.setTo(v.enabled),
|
||||
T.schedule.setTo(v.schedule),
|
||||
T.itemCount.setTo(v.itemCount),
|
||||
T.category.setTo(v.category)
|
||||
T.itemCount.setTo(v.itemCount)
|
||||
)
|
||||
)
|
||||
n2 <- if (n1 <= 0) insert(v) else 0.pure[ConnectionIO]
|
||||
@ -89,8 +79,7 @@ object RClassifierSetting {
|
||||
case class Classifier(
|
||||
enabled: Boolean,
|
||||
schedule: CalEvent,
|
||||
itemCount: Int,
|
||||
category: Option[String]
|
||||
itemCount: Int
|
||||
) {
|
||||
|
||||
def toRecord(coll: Ident, created: Timestamp): RClassifierSetting =
|
||||
@ -98,15 +87,13 @@ object RClassifierSetting {
|
||||
coll,
|
||||
enabled,
|
||||
schedule,
|
||||
category.getOrElse(""),
|
||||
itemCount,
|
||||
None,
|
||||
created
|
||||
)
|
||||
}
|
||||
object Classifier {
|
||||
def fromRecord(r: RClassifierSetting): Classifier =
|
||||
Classifier(r.enabled, r.schedule, r.itemCount, r.category.some)
|
||||
Classifier(r.enabled, r.schedule, r.itemCount)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -89,8 +89,7 @@ object RCollective {
|
||||
c.integration.s,
|
||||
cs.enabled.s,
|
||||
cs.schedule.s,
|
||||
cs.itemCount.s,
|
||||
cs.category.s
|
||||
cs.itemCount.s
|
||||
),
|
||||
from(c).leftJoin(cs, cs.cid === c.id),
|
||||
c.id === coll
|
||||
|
@ -148,6 +148,13 @@ object RTag {
|
||||
).orderBy(T.name.asc).build.query[RTag].to[List]
|
||||
}
|
||||
|
||||
def listCategories(coll: Ident, fallback: String): ConnectionIO[List[String]] =
|
||||
Select(
|
||||
coalesce(T.category.s, lit(fallback)).s,
|
||||
from(T),
|
||||
T.cid === coll
|
||||
).distinct.build.query[String].to[List]
|
||||
|
||||
def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] =
|
||||
DML.delete(T, T.tid === tagId && T.cid === coll)
|
||||
}
|
||||
|
@ -25,8 +25,6 @@ import Util.Tag
|
||||
|
||||
type alias Model =
|
||||
{ enabled : Bool
|
||||
, categoryModel : Comp.FixedDropdown.Model String
|
||||
, category : Maybe String
|
||||
, scheduleModel : Comp.CalEventInput.Model
|
||||
, schedule : Validated CalEvent
|
||||
, itemCountModel : Comp.IntField.Model
|
||||
@ -35,10 +33,8 @@ type alias Model =
|
||||
|
||||
|
||||
type Msg
|
||||
= GetTagsResp (Result Http.Error TagList)
|
||||
| ScheduleMsg Comp.CalEventInput.Msg
|
||||
= ScheduleMsg Comp.CalEventInput.Msg
|
||||
| ToggleEnabled
|
||||
| CategoryMsg (Comp.FixedDropdown.Msg String)
|
||||
| ItemCountMsg Comp.IntField.Msg
|
||||
|
||||
|
||||
@ -53,17 +49,12 @@ init flags sett =
|
||||
Comp.CalEventInput.init flags newSchedule
|
||||
in
|
||||
( { enabled = sett.enabled
|
||||
, categoryModel = Comp.FixedDropdown.initString []
|
||||
, category = sett.category
|
||||
, scheduleModel = cem
|
||||
, schedule = Data.Validated.Unknown newSchedule
|
||||
, itemCountModel = Comp.IntField.init (Just 0) Nothing True "Item Count"
|
||||
, itemCount = Just sett.itemCount
|
||||
}
|
||||
, Cmd.batch
|
||||
[ Api.getTags flags "" GetTagsResp
|
||||
, Cmd.map ScheduleMsg cec
|
||||
]
|
||||
, Cmd.map ScheduleMsg cec
|
||||
)
|
||||
|
||||
|
||||
@ -72,7 +63,6 @@ getSettings model =
|
||||
Data.Validated.map
|
||||
(\sch ->
|
||||
{ enabled = model.enabled
|
||||
, category = model.category
|
||||
, schedule =
|
||||
Data.CalEvent.makeEvent sch
|
||||
, itemCount = Maybe.withDefault 0 model.itemCount
|
||||
@ -84,27 +74,6 @@ getSettings model =
|
||||
update : Flags -> Msg -> Model -> ( Model, Cmd Msg )
|
||||
update flags msg model =
|
||||
case msg of
|
||||
GetTagsResp (Ok tl) ->
|
||||
let
|
||||
categories =
|
||||
Util.Tag.getCategories tl.items
|
||||
|> List.sort
|
||||
in
|
||||
( { model
|
||||
| categoryModel = Comp.FixedDropdown.initString categories
|
||||
, category =
|
||||
if model.category == Nothing then
|
||||
List.head categories
|
||||
|
||||
else
|
||||
model.category
|
||||
}
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
GetTagsResp (Err _) ->
|
||||
( model, Cmd.none )
|
||||
|
||||
ScheduleMsg lmsg ->
|
||||
let
|
||||
( cm, cc, ce ) =
|
||||
@ -126,23 +95,6 @@ update flags msg model =
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
CategoryMsg lmsg ->
|
||||
let
|
||||
( mm, ma ) =
|
||||
Comp.FixedDropdown.update lmsg model.categoryModel
|
||||
in
|
||||
( { model
|
||||
| categoryModel = mm
|
||||
, category =
|
||||
if ma == Nothing then
|
||||
model.category
|
||||
|
||||
else
|
||||
ma
|
||||
}
|
||||
, Cmd.none
|
||||
)
|
||||
|
||||
ItemCountMsg lmsg ->
|
||||
let
|
||||
( im, iv ) =
|
||||
@ -182,13 +134,6 @@ view model =
|
||||
, text "periodically based on a schedule and you need to specify a tag-group that should "
|
||||
, text "be used for learning."
|
||||
]
|
||||
, div [ class "field" ]
|
||||
[ label [] [ text "Category" ]
|
||||
, Html.map CategoryMsg
|
||||
(Comp.FixedDropdown.viewString model.category
|
||||
model.categoryModel
|
||||
)
|
||||
]
|
||||
, Html.map ItemCountMsg
|
||||
(Comp.IntField.viewWithInfo
|
||||
"The maximum number of items to learn from, order by date newest first. Use 0 to mean all."
|
||||
|
Loading…
x
Reference in New Issue
Block a user