From c9bd57592bc4278cd2e0a7f456582d3647a02731 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Mon, 7 Sep 2020 20:01:30 +0200 Subject: [PATCH] Don't use test data if there is just one config If classifier models cannot be compared, there is no reason to test. --- .../scala/docspell/analysis/nlp/StanfordTextClassifier.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala index d8846fc4..4e93844c 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/nlp/StanfordTextClassifier.scala @@ -66,7 +66,8 @@ final class StanfordTextClassifier[F[_]: Sync: ContextShift]( } yield res def splitData(logger: Logger[F], in: RawData): F[TrainData] = { - val nTest = (in.count * 0.15).toLong + val f = if (cfg.classifierConfigs.size > 1) 0.15 else 0.0 + val nTest = (in.count * f).toLong val td = TrainData(in.file.resolveSibling("train.txt"), in.file.resolveSibling("test.txt"))