Merge pull request #301 from eikek/fix-date-validation

Skip invalid dates find in texts
This commit is contained in:
eikek 2020-10-02 22:49:50 +02:00 committed by GitHub
commit c5676f9f56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 4 deletions

View File

@ -4,6 +4,7 @@ import java.time.LocalDate
import scala.util.Try
import cats.implicits._
import fs2.{Pure, Stream}
import docspell.analysis.split._
@ -56,7 +57,12 @@ object DateFind {
case Language.German => p1.or(p0).or(p2)
case Language.French => p1.or(p0).or(p2)
}
p.read(parts).toOption
p.read(parts) match {
case Result.Success(sd, _) =>
Either.catchNonFatal(sd.toLocalDate).map(_ => sd).toOption
case Result.Failure =>
None
}
}
def readYear: Reader[Int] =

View File

@ -2,13 +2,45 @@ package docspell.analysis.date
import docspell.files.TestFiles
import minitest.SimpleTestSuite
import docspell.common.Language
import docspell.common._
import java.time._
object DateFindSpec extends SimpleTestSuite {
test("find simple dates") {
val expect = Vector(
NerDateLabel(
LocalDate.parse("2016-11-07"),
NerLabel("November 7, 2016", NerTag.Date, 50, 60)
),
NerDateLabel(
LocalDate.parse("2016-11-07"),
NerLabel("November 7, 2016", NerTag.Date, 119, 129)
),
NerDateLabel(
LocalDate.parse("2019-09-03"),
NerLabel("September 3, 2019", NerTag.Date, 249, 260)
),
NerDateLabel(
LocalDate.parse("2016-12-12"),
NerLabel("December 12, 2016", NerTag.Date, 1076, 1087)
)
)
//println(DateFind.findDates(TestFiles.letterDEText, Language.German).toVector)
println(DateFind.findDates(TestFiles.letterENText, Language.English).toVector)
assertEquals(
DateFind.findDates(TestFiles.letterENText, Language.English).toVector,
expect
)
}
test("skip invalid dates") {
assertEquals(
DateFind.findDates("Feb 29, 2005", Language.English).toVector,
Vector.empty
)
assertEquals(
DateFind.findDates("30. Februar 1990", Language.German).toVector,
Vector.empty
)
}
}