mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-22 02:18:26 +00:00
Add japanese document language
This commit is contained in:
@ -22,7 +22,7 @@ object DateFind {
|
||||
|
||||
def findDates(text: String, lang: Language): Stream[Pure, NerDateLabel] =
|
||||
TextSplitter
|
||||
.splitToken(text, " \t.,\n\r/".toSet)
|
||||
.splitToken(text, " \t.,\n\r/年月日".toSet)
|
||||
.filter(w => lang != Language.Latvian || w.value != "gada")
|
||||
.sliding(3)
|
||||
.filter(_.size == 3)
|
||||
@ -89,6 +89,7 @@ object DateFind {
|
||||
case Language.Swedish => ymd.or(dmy).or(mdy)
|
||||
case Language.Dutch => dmy.or(ymd).or(mdy)
|
||||
case Language.Latvian => dmy.or(lavLong).or(ymd)
|
||||
case Language.Japanese => ymd
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -50,6 +50,8 @@ object MonthName {
|
||||
russian
|
||||
case Language.Latvian =>
|
||||
latvian
|
||||
case Language.Japanese =>
|
||||
japanese
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -290,4 +292,19 @@ object MonthName {
|
||||
List("novembris", "nov."),
|
||||
List("decembris", "dec.")
|
||||
)
|
||||
|
||||
private val japanese = List(
|
||||
List("1", "一"),
|
||||
List("2", "二"),
|
||||
List("3", "三"),
|
||||
List("4", "四"),
|
||||
List("5", "五"),
|
||||
List("6", "六"),
|
||||
List("7", "七"),
|
||||
List("8", "八"),
|
||||
List("9", "九"),
|
||||
List("10", "十"),
|
||||
List("11", "十一"),
|
||||
List("12", "十二")
|
||||
)
|
||||
}
|
||||
|
@ -143,4 +143,29 @@ class DateFindSpec extends FunSuite {
|
||||
)
|
||||
}
|
||||
|
||||
test("find japanese dates") {
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("some text in japanese 2021.7.21 and more", Language.Japanese)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2021, 7, 21),
|
||||
NerLabel("2021.7.21", NerTag.Date, 22, 31)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("some text in japanese 2021年7月21日 and more", Language.Japanese)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2021, 7, 21),
|
||||
NerLabel("2021年7月21", NerTag.Date, 22, 31)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user