Merge pull request #666 from eikek/datefind-fix

Allow more suggestions for date variants in English
This commit is contained in:
mergify[bot] 2021-02-25 23:50:44 +00:00 committed by GitHub
commit 5a73863c59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 13 deletions

View File

@ -4,7 +4,9 @@ import java.time.LocalDate
import scala.util.Try import scala.util.Try
import cats.data.{NonEmptyList => Nel}
import cats.implicits._ import cats.implicits._
import cats.kernel.Semigroup
import fs2.{Pure, Stream} import fs2.{Pure, Stream}
import docspell.analysis.split._ import docspell.analysis.split._
@ -61,8 +63,7 @@ object DateFind {
val mdy = pattern2(lang) val mdy = pattern2(lang)
// most is from wikipedia // most is from wikipedia
val p = lang match { val p = lang match {
case Language.English => case Language.English => Reader.all(dmy, mdy, ymd)
mdy.alt(dmy).map(t => t._1 ++ t._2).or(mdy).or(ymd).or(dmy)
case Language.German => dmy.or(ymd).or(mdy) case Language.German => dmy.or(ymd).or(mdy)
case Language.French => dmy.or(ymd).or(mdy) case Language.French => dmy.or(ymd).or(mdy)
case Language.Italian => dmy.or(ymd).or(mdy) case Language.Italian => dmy.or(ymd).or(mdy)
@ -117,9 +118,6 @@ object DateFind {
case Result.Failure => Result.Failure case Result.Failure => Result.Failure
}) })
def alt(other: Reader[A]): Reader[(A, A)] =
Reader(words => Result.combine(read(words), other.read(words)))
def or(other: Reader[A]): Reader[A] = def or(other: Reader[A]): Reader[A] =
Reader(words => Reader(words =>
read(words) match { read(words) match {
@ -133,6 +131,9 @@ object DateFind {
def fail[A]: Reader[A] = def fail[A]: Reader[A] =
Reader(_ => Result.Failure) Reader(_ => Result.Failure)
def all[A: Semigroup](reader: Reader[A], more: Reader[A]*): Reader[A] =
Reader(words => Nel.of(reader, more: _*).map(_.read(words)).reduce)
def readFirst[A](f: Word => Option[A]): Reader[A] = def readFirst[A](f: Word => Option[A]): Reader[A] =
Reader({ Reader({
case Nil => Result.Failure case Nil => Result.Failure
@ -162,12 +163,22 @@ object DateFind {
def map[B](f: Nothing => B): Result[B] = this def map[B](f: Nothing => B): Result[B] = this
def next[B](r: Reader[B]): Result[(Nothing, B)] = this def next[B](r: Reader[B]): Result[(Nothing, B)] = this
} }
def combine[A](r0: Result[A], r1: Result[A]): Result[(A, A)] =
(r0, r1) match { implicit def resultSemigroup[A: Semigroup]: Semigroup[Result[A]] =
case (Success(a0, _), Success(a1, r1)) => Semigroup.instance { (r0, r1) =>
Success((a0, a1), r1) (r0, r1) match {
case _ => case (Success(a0, r0), Success(a1, r1)) =>
Failure Success(Semigroup[A].combine(a0, a1), if (r0.size < r1.size) r0 else r1)
case (s @ Success(_, _), Failure) =>
s
case (Failure, s @ Success(_, _)) =>
s
case (Failure, Failure) =>
Failure
}
} }
} }
} }

View File

@ -49,11 +49,11 @@ object DateFindSpec extends SimpleTestSuite {
DateFind.findDates("on 11/05/2020", Language.English).toVector, DateFind.findDates("on 11/05/2020", Language.English).toVector,
Vector( Vector(
NerDateLabel( NerDateLabel(
LocalDate.of(2020, 11, 5), LocalDate.of(2020, 5, 11),
NerLabel("11/05/2020", NerTag.Date, 3, 13) NerLabel("11/05/2020", NerTag.Date, 3, 13)
), ),
NerDateLabel( NerDateLabel(
LocalDate.of(2020, 5, 11), LocalDate.of(2020, 11, 5),
NerLabel("11/05/2020", NerTag.Date, 3, 13) NerLabel("11/05/2020", NerTag.Date, 3, 13)
) )
) )
@ -69,4 +69,38 @@ object DateFindSpec extends SimpleTestSuite {
) )
} }
test("more english variants") {
assertEquals(
DateFind.findDates("on 26/01/15", Language.English).toVector,
Vector(
NerDateLabel(
LocalDate.of(2015, 1, 26),
NerLabel("26/01/15", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2026, 1, 15),
NerLabel("26/01/15", NerTag.Date, 3, 11)
)
)
)
assertEquals(
DateFind.findDates("on 10/09/11", Language.English).toVector,
Vector(
NerDateLabel(
LocalDate.of(2011, 9, 10),
NerLabel("10/09/11", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2011, 10, 9),
NerLabel("10/09/11", NerTag.Date, 3, 11)
),
NerDateLabel(
LocalDate.of(2010, 9, 11),
NerLabel("10/09/11", NerTag.Date, 3, 11)
)
)
)
}
} }