Add Lithuanian to processing languages

SOLR doesn't support Lithuanian, maybe it can be added via plugins. A
manual setup of solr is required then. It has been added with basic
support.

Closes: #1540
This commit is contained in:
eikek
2022-05-21 14:11:38 +02:00
parent 0f1c3abd6e
commit 9d69401fea
9 changed files with 96 additions and 4 deletions

View File

@ -52,7 +52,7 @@ object DateFind {
} else text
TextSplitter
.splitToken(stext, " -\t.,\n\r/年月日".toSet)
.splitToken(stext, " -\t.,\n\r/年月日md".toSet)
.filter(w => lang != Language.Latvian || w.value != "gada")
.filter(w => lang != Language.Spanish || w.value != "de")
}
@ -105,6 +105,7 @@ object DateFind {
case Language.Latvian => dmy.or(lavLong).or(ymd)
case Language.Japanese => ymd
case Language.Hebrew => dmy
case Language.Lithuanian => ymd
}
p.read(parts) match {
case Result.Success(sds, _) =>

View File

@ -56,6 +56,8 @@ object MonthName {
japanese
case Language.Hebrew =>
hebrew
case Language.Lithuanian =>
lithuanian
}
private val numbers = List(
@ -341,4 +343,19 @@ object MonthName {
List("XI", "nov", "november"),
List("XII", "dec", "december")
)
private val lithuanian = List(
List("sausis", "sausio", "saus"),
List("vasaris", "vasario", "vas"),
List("kovas", "kovo", "kov"),
List("balandis", "balandžio", "bal"),
List("gegužis", "gegužės", "geg"),
List("birželis", "birželio", "birž"),
List("liepa", "liepos", "liep"),
List("rugpjūtis", "rugpjūčio", "rugp"),
List("rugsėjis", "rugsėjo", "rugs"),
List("spalis", "spalio", "spal"),
List("lapkritis", "lapkričio", "lapkr"),
List("gruodis", "gruodžio", "gruod")
)
}

View File

@ -191,7 +191,6 @@ class DateFindTest extends FunSuite {
)
)
)
println(DateFind.splitWords("2021-11-19", Language.Spanish).toList)
assertEquals(
DateFind
.findDates("2021-11-19", Language.Spanish)
@ -204,4 +203,32 @@ class DateFindTest extends FunSuite {
)
)
}
test("find lithuanian dates") {
assertEquals(
DateFind
.findDates(
"Some text in lithuanian 2022 m. gegužės 21 d. and stuff",
Language.Lithuanian
)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2022, 5, 21),
NerLabel("2022 m. gegužės 21", NerTag.Date, 24, 42)
)
)
)
assertEquals(
DateFind
.findDates("2021-11-19", Language.Lithuanian)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2021, 11, 19),
NerLabel("2021-11-19", NerTag.Date, 0, 10)
)
)
)
}
}