Fix potential index-out-of-bounds error in classifier

The stanford library expects a non-empty text.
2025-07-04 16:48:26 +00:00 · 2020-11-09 00:04:13 +01:00
parent 709848244c
commit cf6e63785d
1 changed files with 12 additions and 7 deletions
--- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala
+++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala
@ -37,14 +37,19 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift](
  def classify(
      logger: Logger[F],
      model: ClassifierModel,
-      text: String
+      txt: String
  ): F[Option[String]] =
-    Sync[F].delay {
+    Option(txt).map(_.trim).filter(_.nonEmpty) match {
-      val cls = ColumnDataClassifier.getClassifier(
+      case Some(text) =>
-        model.model.normalize().toAbsolutePath().toString()
+        Sync[F].delay {
-      )
+          val cls = ColumnDataClassifier.getClassifier(
-      val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
+            model.model.normalize().toAbsolutePath().toString()
-      Option(cat)
+          )
          val cat = cls.classOf(cls.makeDatumFromLine("\t\t" + normalisedText(text)))
          Option(cat)
        }
      case None =>
        (None: Option[String]).pure[F]
    }
  // --- helpers