mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Add Lithuanian to processing languages
SOLR doesn't support Lithuanian, maybe it can be added via plugins. A manual setup of solr is required then. It has been added with basic support. Closes: #1540
This commit is contained in:
@ -52,7 +52,7 @@ object DateFind {
|
||||
} else text
|
||||
|
||||
TextSplitter
|
||||
.splitToken(stext, " -\t.,\n\r/年月日".toSet)
|
||||
.splitToken(stext, " -\t.,\n\r/年月日md".toSet)
|
||||
.filter(w => lang != Language.Latvian || w.value != "gada")
|
||||
.filter(w => lang != Language.Spanish || w.value != "de")
|
||||
}
|
||||
@ -105,6 +105,7 @@ object DateFind {
|
||||
case Language.Latvian => dmy.or(lavLong).or(ymd)
|
||||
case Language.Japanese => ymd
|
||||
case Language.Hebrew => dmy
|
||||
case Language.Lithuanian => ymd
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -56,6 +56,8 @@ object MonthName {
|
||||
japanese
|
||||
case Language.Hebrew =>
|
||||
hebrew
|
||||
case Language.Lithuanian =>
|
||||
lithuanian
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -341,4 +343,19 @@ object MonthName {
|
||||
List("XI", "nov", "november"),
|
||||
List("XII", "dec", "december")
|
||||
)
|
||||
|
||||
private val lithuanian = List(
|
||||
List("sausis", "sausio", "saus"),
|
||||
List("vasaris", "vasario", "vas"),
|
||||
List("kovas", "kovo", "kov"),
|
||||
List("balandis", "balandžio", "bal"),
|
||||
List("gegužis", "gegužės", "geg"),
|
||||
List("birželis", "birželio", "birž"),
|
||||
List("liepa", "liepos", "liep"),
|
||||
List("rugpjūtis", "rugpjūčio", "rugp"),
|
||||
List("rugsėjis", "rugsėjo", "rugs"),
|
||||
List("spalis", "spalio", "spal"),
|
||||
List("lapkritis", "lapkričio", "lapkr"),
|
||||
List("gruodis", "gruodžio", "gruod")
|
||||
)
|
||||
}
|
||||
|
@ -191,7 +191,6 @@ class DateFindTest extends FunSuite {
|
||||
)
|
||||
)
|
||||
)
|
||||
println(DateFind.splitWords("2021-11-19", Language.Spanish).toList)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("2021-11-19", Language.Spanish)
|
||||
@ -204,4 +203,32 @@ class DateFindTest extends FunSuite {
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
test("find lithuanian dates") {
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates(
|
||||
"Some text in lithuanian 2022 m. gegužės 21 d. and stuff",
|
||||
Language.Lithuanian
|
||||
)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2022, 5, 21),
|
||||
NerLabel("2022 m. gegužės 21", NerTag.Date, 24, 42)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("2021-11-19", Language.Lithuanian)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2021, 11, 19),
|
||||
NerLabel("2021-11-19", NerTag.Date, 0, 10)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user