Only learn from confirmed items

Text classification should only learn from confirmed items. Log if
classification is disabled when processing an item.
This commit is contained in:
Eike Kettner 2020-09-07 13:04:40 +02:00
parent cb1a9e0699
commit 76ccfb8a81
4 changed files with 24 additions and 9 deletions

View File

@ -23,6 +23,11 @@ object ItemState {
case object Created extends ItemState
case object Confirmed extends ItemState
def premature: ItemState = Premature
def processing: ItemState = Processing
def created: ItemState = Created
def confirmed: ItemState = Confirmed
def fromString(str: String): Either[String, ItemState] =
str.toLowerCase match {
case "premature" => Right(Premature)

View File

@ -157,8 +157,9 @@ object RegexNerFile {
.reduce(_ ++ fr"UNION ALL" ++ _)
selectSimple(fr"MAX(t)", fr"(" ++ sql ++ fr") as x", Fragment.empty)
.query[Timestamp]
.query[Option[Timestamp]]
.option
.map(_.flatten)
}
}
}

View File

@ -98,11 +98,14 @@ object TextAnalysis {
ctx: Context[F, Args],
cfg: Config.TextAnalysis
): OptionT[F, Ident] =
if (cfg.classification.enabled)
(if (cfg.classification.enabled)
OptionT(ctx.store.transact(RClassifierSetting.findById(ctx.args.meta.collective)))
.filter(_.enabled)
.mapFilter(_.fileId)
else
OptionT.none
OptionT.none[F, Ident]).orElse(
OptionT.liftF(ctx.logger.info("Classification is disabled.")) *> OptionT
.none[F, Ident]
)
}

View File

@ -622,7 +622,13 @@ object QItem {
chunkSize: Int
): Stream[ConnectionIO, Ident] = {
val cols = Seq(RItem.Columns.id)
(selectSimple(cols, RItem.table, RItem.Columns.cid.is(collective)) ++
val iColl = RItem.Columns.cid
val iState = RItem.Columns.state
(selectSimple(
cols,
RItem.table,
and(iColl.is(collective), iState.is(ItemState.confirmed))
) ++
orderBy(RItem.Columns.created.desc))
.query[Ident]
.streamWithChunkSize(chunkSize)