Fix potential index-out-of-bounds error in classifier

The stanford library expects a non-empty text.
This commit is contained in:
Eike Kettner 2020-11-09 00:04:13 +01:00
parent 709848244c
commit cf6e63785d

View File

@ -37,14 +37,19 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
def classify( def classify(
logger: Logger[F], logger: Logger[F],
model: ClassifierModel, model: ClassifierModel,
text: String txt: String
): F[Option[String]] = ): F[Option[String]] =
Sync[F].delay { Option(txt).map(_.trim).filter(_.nonEmpty) match {
val cls = ColumnDataClassifier.getClassifier( case Some(text) =>
model.model.normalize().toAbsolutePath().toString() Sync[F].delay {
) val cls = ColumnDataClassifier.getClassifier(
val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text))) model.model.normalize().toAbsolutePath().toString()
Option(cat) )
val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
Option(cat)
}
case None =>
(None: Option[String]).pure[F]
} }
// --- helpers // --- helpers