diff --git a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala index 1c8f9349..81333a7f 100644 --- a/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala +++ b/modules/analysis/src/main/scala/docspell/analysis/date/DateFind.scala @@ -4,6 +4,7 @@ import java.time.LocalDate import scala.util.Try +import cats.implicits._ import fs2.{Pure, Stream} import docspell.analysis.split._ @@ -56,7 +57,12 @@ object DateFind { case Language.German => p1.or(p0).or(p2) case Language.French => p1.or(p0).or(p2) } - p.read(parts).toOption + p.read(parts) match { + case Result.Success(sd, _) => + Either.catchNonFatal(sd.toLocalDate).map(_ => sd).toOption + case Result.Failure => + None + } } def readYear: Reader[Int] = diff --git a/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala b/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala index 5ffc853f..d954f7a7 100644 --- a/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala +++ b/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala @@ -2,13 +2,45 @@ package docspell.analysis.date import docspell.files.TestFiles import minitest.SimpleTestSuite -import docspell.common.Language +import docspell.common._ +import java.time._ object DateFindSpec extends SimpleTestSuite { test("find simple dates") { + val expect = Vector( + NerDateLabel( + LocalDate.parse("2016-11-07"), + NerLabel("November 7, 2016", NerTag.Date, 50, 60) + ), + NerDateLabel( + LocalDate.parse("2016-11-07"), + NerLabel("November 7, 2016", NerTag.Date, 119, 129) + ), + NerDateLabel( + LocalDate.parse("2019-09-03"), + NerLabel("September 3, 2019", NerTag.Date, 249, 260) + ), + NerDateLabel( + LocalDate.parse("2016-12-12"), + NerLabel("December 12, 2016", NerTag.Date, 1076, 1087) + ) + ) - //println(DateFind.findDates(TestFiles.letterDEText, Language.German).toVector) - println(DateFind.findDates(TestFiles.letterENText, Language.English).toVector) + assertEquals( + DateFind.findDates(TestFiles.letterENText, Language.English).toVector, + expect + ) + } + + test("skip invalid dates") { + assertEquals( + DateFind.findDates("Feb 29, 2005", Language.English).toVector, + Vector.empty + ) + assertEquals( + DateFind.findDates("30. Februar 1990", Language.German).toVector, + Vector.empty + ) } }