mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-21 18:08:25 +00:00
Merge pull request #2208 from mprasil/add-slovak-language-support
Add support for Slovak language
This commit is contained in:
@ -131,6 +131,7 @@ object DateFind {
|
||||
case Language.Estonian => dmy
|
||||
case Language.Khmer => dmy
|
||||
case Language.Ukrainian => dmy.or(ymd)
|
||||
case Language.Slovak => dmy.or(ymd)
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -66,6 +66,8 @@ object MonthName {
|
||||
ukrainian
|
||||
case Language.Khmer =>
|
||||
khmer
|
||||
case Language.Slovak =>
|
||||
slovak
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -426,4 +428,19 @@ object MonthName {
|
||||
List("листопада", "лист", "лис"),
|
||||
List("грудня", "груд", "гру")
|
||||
)
|
||||
|
||||
private val slovak = List(
|
||||
List("jan", "január", "januára"),
|
||||
List("feb", "február", "februára"),
|
||||
List("mar", "marec", "marca"),
|
||||
List("apr", "apríl", "apríla"),
|
||||
List("maj", "máj", "mája"),
|
||||
List("jun", "jún", "júna"),
|
||||
List("jul", "júl", "júla"),
|
||||
List("aug", "august", "augusta"),
|
||||
List("sep", "september", "septembra"),
|
||||
List("okt", "október", "októbra"),
|
||||
List("nov", "november", "novembra"),
|
||||
List("dec", "december", "decembra")
|
||||
)
|
||||
}
|
||||
|
@ -365,4 +365,57 @@ class DateFindTest extends FunSuite {
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
test("find slovak dates") {
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates(
|
||||
"Do funkcie bola inaugurovaná 15. júna 2019 pred Národnou radou SR",
|
||||
Language.Slovak
|
||||
)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2019, 6, 15),
|
||||
NerLabel("15. júna 2019", NerTag.Date, 29, 42)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates(
|
||||
"Dátum narodenia: 14. feb 2015",
|
||||
Language.Slovak
|
||||
)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2015, 2, 14),
|
||||
NerLabel("14. feb 2015", NerTag.Date, 17, 29)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("19.11.2021", Language.Slovak)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2021, 11, 19),
|
||||
NerLabel("19.11.2021", NerTag.Date, 0, 10)
|
||||
)
|
||||
)
|
||||
)
|
||||
assertEquals(
|
||||
DateFind
|
||||
.findDates("Dátum: 2022.11.05", Language.Slovak)
|
||||
.toVector,
|
||||
Vector(
|
||||
NerDateLabel(
|
||||
LocalDate.of(2022, 11, 5),
|
||||
NerLabel("2022.11.05", NerTag.Date, 7, 17)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -148,6 +148,11 @@ object Language {
|
||||
val iso3 = "ukr"
|
||||
}
|
||||
|
||||
case object Slovak extends Language {
|
||||
val iso2 = "sk"
|
||||
val iso3 = "svk"
|
||||
}
|
||||
|
||||
val all: List[Language] =
|
||||
List(
|
||||
German,
|
||||
@ -172,6 +177,7 @@ object Language {
|
||||
Polish,
|
||||
Estonian,
|
||||
Ukrainian,
|
||||
Slovak,
|
||||
Khmer
|
||||
)
|
||||
|
||||
|
@ -207,5 +207,6 @@ object FtsRepository extends DoobieMeta {
|
||||
case Language.Estonian => "simple"
|
||||
case Language.Ukrainian => "simple"
|
||||
case Language.Khmer => "simple"
|
||||
case Language.Slovak => "simple"
|
||||
}
|
||||
}
|
||||
|
@ -183,7 +183,18 @@ object SolrSetup {
|
||||
"Add Khmer",
|
||||
addContentField(Language.Khmer)
|
||||
),
|
||||
SolrMigration.reIndexAll(34, "Re-Index after adding Khmer")
|
||||
SolrMigration.reIndexAll(34, "Re-Index after adding Khmer"),
|
||||
SolrMigration[F](
|
||||
35,
|
||||
"Add new field type for slovak content",
|
||||
addFieldType(AddFieldType.textSvk)
|
||||
),
|
||||
SolrMigration[F](
|
||||
36,
|
||||
"Add Slovak",
|
||||
addContentField(Language.Slovak)
|
||||
),
|
||||
SolrMigration.reIndexAll(37, "Re-Index after adding Slovak")
|
||||
)
|
||||
|
||||
def addFolderField: F[Unit] =
|
||||
@ -368,6 +379,17 @@ object SolrSetup {
|
||||
)
|
||||
)
|
||||
|
||||
val textSvk = AddFieldType(
|
||||
"text_sk",
|
||||
"solr.TextField",
|
||||
Analyzer(
|
||||
Tokenizer("solr.StandardTokenizerFactory", Map.empty),
|
||||
List(
|
||||
Filter("solr.LowerCaseFilterFactory", Map.empty)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
final case class Filter(`class`: String, attr: Map[String, String])
|
||||
final case class Tokenizer(`class`: String, attr: Map[String, String])
|
||||
final case class Analyzer(tokenizer: Tokenizer, filter: List[Filter])
|
||||
|
@ -37,6 +37,7 @@ type Language
|
||||
| Estonian
|
||||
| Ukrainian
|
||||
| Khmer
|
||||
| Slovak
|
||||
|
||||
|
||||
fromString : String -> Maybe Language
|
||||
@ -110,6 +111,9 @@ fromString str =
|
||||
else if str == "khm" || str == "kh" || str == "khmer" then
|
||||
Just Khmer
|
||||
|
||||
else if str == "svk" || str == "sk" || str == "slovak" then
|
||||
Just Slovak
|
||||
|
||||
else
|
||||
Nothing
|
||||
|
||||
@ -186,6 +190,9 @@ toIso3 lang =
|
||||
Khmer ->
|
||||
"khm"
|
||||
|
||||
Slovak ->
|
||||
"svk"
|
||||
|
||||
|
||||
all : List Language
|
||||
all =
|
||||
@ -212,4 +219,5 @@ all =
|
||||
, Estonian
|
||||
, Ukrainian
|
||||
, Khmer
|
||||
, Slovak
|
||||
]
|
||||
|
@ -86,6 +86,9 @@ gb lang =
|
||||
Khmer ->
|
||||
"Khmer"
|
||||
|
||||
Slovak ->
|
||||
"Slovak"
|
||||
|
||||
|
||||
de : Language -> String
|
||||
de lang =
|
||||
@ -159,6 +162,9 @@ de lang =
|
||||
Khmer ->
|
||||
"Khmer"
|
||||
|
||||
Slovak ->
|
||||
"Slowakisch"
|
||||
|
||||
|
||||
fr : Language -> String
|
||||
fr lang =
|
||||
@ -231,3 +237,6 @@ fr lang =
|
||||
|
||||
Khmer ->
|
||||
"Khmer"
|
||||
|
||||
Slovak ->
|
||||
"Slovaquie"
|
||||
|
Reference in New Issue
Block a user