mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Add latvian language
This commit is contained in:
@ -17,6 +17,7 @@ object DateFind {
|
||||
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
|
||||
TextSplitter
|
||||
.splitToken(text, " \t.,\n\r/".toSet)
|
||||
.filter(w => lang != Language.Latvian || w.value != "gada")
|
||||
.sliding(3)
|
||||
.filter(_.length == 3)
|
||||
.flatMap(q =>
|
||||
@ -55,6 +56,10 @@ object DateFind {
|
||||
case ((m, d), y) =>
|
||||
List(SimpleDate(y, m, d))
|
||||
}
|
||||
def lavLong =
|
||||
(readYear >> readDay >> readMonth(Language.Latvian)).map { case ((y, d), m) =>
|
||||
List(SimpleDate(y, m, d))
|
||||
}
|
||||
|
||||
// ymd ✔, ydm, dmy ✔, dym, myd, mdy ✔
|
||||
def fromParts(parts: List[Word], lang: Language): List[SimpleDate] = {
|
||||
@ -77,6 +82,7 @@ object DateFind {
|
||||
case Language.Russian => dmy.or(ymd).or(mdy)
|
||||
case Language.Swedish => ymd.or(dmy).or(mdy)
|
||||
case Language.Dutch => dmy.or(ymd).or(mdy)
|
||||
case Language.Latvian => dmy.or(lavLong).or(ymd)
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -42,6 +42,8 @@ object MonthName {
|
||||
finnish
|
||||
case Language.Russian =>
|
||||
russian
|
||||
case Language.Latvian =>
|
||||
latvian
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -267,4 +269,19 @@ object MonthName {
|
||||
List("nov", "november"),
|
||||
List("dec", "december")
|
||||
)
|
||||
|
||||
private val latvian = List(
|
||||
List("janvāris", "janv."),
|
||||
List("februāris", "febr."),
|
||||
List("marts"),
|
||||
List("aprīlis", "apr."),
|
||||
List("maijs"),
|
||||
List("jūnijs", "jūn."),
|
||||
List("jūlijs", "jūl."),
|
||||
List("augusts", "aug."),
|
||||
List("septembris", "sept."),
|
||||
List("oktobris", "okt."),
|
||||
List("novembris", "nov."),
|
||||
List("decembris", "dec.")
|
||||
)
|
||||
}
|
||||
|
@ -103,4 +103,36 @@ object DateFindSpec extends SimpleTestSuite {
|
||||
)
|
||||
}
|
||||
|
||||
test("find latvian dates") {
|
||||
assertEquals(
|
||||
DateFind.findDates("on 2020. gada 30. jūlijs there", Language.Latvian).toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2020, 7, 30),
|
||||
NerLabel("2020. gada 30. jūlijs", NerTag.Date, 3, 24)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind.findDates("Lai gan 30.07.2020", Language.Latvian).toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2020, 7, 30),
|
||||
NerLabel("30.07.2020", NerTag.Date, 8, 18)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("Es nevaru šodien 2020.gada 30.oktobris iet uz", Language.Latvian)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2020, 10, 30),
|
||||
NerLabel("2020.gada 30.oktobris", NerTag.Date, 17, 38)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user