Allow more suggestions for date variants in English

This commit is contained in:
Eike Kettner 2021-02-26 00:34:57 +01:00
parent 7fa183b2ca
commit c7d4c77e6d
2 changed files with 58 additions and 13 deletions

View File

@ -4,7 +4,9 @@ import java.time.LocalDate
import scala.util.Try
import cats.data.{NonEmptyList => Nel}
import cats.implicits._
import cats.kernel.Semigroup
import fs2.{Pure, Stream}
import docspell.analysis.split._
@ -61,8 +63,7 @@ object DateFind {
val mdy = pattern2(lang)
// most is from wikipedia
val p = lang match {
case Language.English =>
mdy.alt(dmy).map(t => t._1 ++ t._2).or(mdy).or(ymd).or(dmy)
case Language.English => Reader.all(dmy, mdy, ymd)
case Language.German => dmy.or(ymd).or(mdy)
case Language.French => dmy.or(ymd).or(mdy)
case Language.Italian => dmy.or(ymd).or(mdy)
@ -117,9 +118,6 @@ object DateFind {
case Result.Failure => Result.Failure
})
def alt(other: Reader[A]): Reader[(A, A)] =
Reader(words => Result.combine(read(words), other.read(words)))
def or(other: Reader[A]): Reader[A] =
Reader(words =>
read(words) match {
@ -133,6 +131,9 @@ object DateFind {
def fail[A]: Reader[A] =
Reader(_ => Result.Failure)
def all[A: Semigroup](reader: Reader[A], more: Reader[A]*): Reader[A] =
Reader(words => Nel.of(reader, more: _*).map(_.read(words)).reduce)
def readFirst[A](f: Word => Option[A]): Reader[A] =
Reader({
case Nil => Result.Failure
@ -162,12 +163,22 @@ object DateFind {
def map[B](f: Nothing => B): Result[B] = this
def next[B](r: Reader[B]): Result[(Nothing, B)] = this
}
def combine[A](r0: Result[A], r1: Result[A]): Result[(A, A)] =
(r0, r1) match {
case (Success(a0, _), Success(a1, r1)) =>
Success((a0, a1), r1)
case _ =>
Failure
implicit def resultSemigroup[A: Semigroup]: Semigroup[Result[A]] =
Semigroup.instance { (r0, r1) =>
(r0, r1) match {
case (Success(a0, r0), Success(a1, r1)) =>
Success(Semigroup[A].combine(a0, a1), if (r0.size < r1.size) r0 else r1)
case (s @ Success(_, _), Failure) =>
s
case (Failure, s @ Success(_, _)) =>
s
case (Failure, Failure) =>
Failure
}
}
}
}

View File

@ -49,11 +49,11 @@ object DateFindSpec extends SimpleTestSuite {
DateFind.findDates("on 11/05/2020", Language.English).toVector,
Vector(
NerDateLabel(
LocalDate.of(2020, 11, 5),
LocalDate.of(2020, 5, 11),
NerLabel("11/05/2020", NerTag.Date, 3, 13)
),
NerDateLabel(
LocalDate.of(2020, 5, 11),
LocalDate.of(2020, 11, 5),
NerLabel("11/05/2020", NerTag.Date, 3, 13)
)
)
@ -69,4 +69,38 @@ object DateFindSpec extends SimpleTestSuite {
)
}
test("more english variants") {
assertEquals(
DateFind.findDates("on 26/01/15", Language.English).toVector,
Vector(
NerDateLabel(
LocalDate.of(2015, 1, 26),
NerLabel("26/01/15", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2026, 1, 15),
NerLabel("26/01/15", NerTag.Date, 3, 11)
)
)
)
assertEquals(
DateFind.findDates("on 10/09/11", Language.English).toVector,
Vector(
NerDateLabel(
LocalDate.of(2011, 9, 10),
NerLabel("10/09/11", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2011, 10, 9),
NerLabel("10/09/11", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2010, 9, 11),
NerLabel("10/09/11", NerTag.Date, 3, 11)
)
)
)
}
}