Merge pull request #1818 from eikek/estonian-language

Add Estonian language
This commit is contained in:
mergify[bot] 2022-11-01 00:15:41 +00:00 committed by GitHub
commit a779d4342c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 78 additions and 2 deletions

View File

@ -33,6 +33,7 @@ RUN JDKPKG="openjdk11-jre"; \
tesseract-ocr-data-heb \ tesseract-ocr-data-heb \
tesseract-ocr-data-lit \ tesseract-ocr-data-lit \
tesseract-ocr-data-pol \ tesseract-ocr-data-pol \
tesseract-ocr-data-est \
unpaper \ unpaper \
wkhtmltopdf \ wkhtmltopdf \
libreoffice \ libreoffice \

View File

@ -110,6 +110,7 @@ object DateFind {
case Language.Hebrew => dmy case Language.Hebrew => dmy
case Language.Lithuanian => ymd case Language.Lithuanian => ymd
case Language.Polish => dmy case Language.Polish => dmy
case Language.Estonian => dmy
} }
p.read(parts) match { p.read(parts) match {
case Result.Success(sds, _) => case Result.Success(sds, _) =>

View File

@ -60,6 +60,8 @@ object MonthName {
lithuanian lithuanian
case Language.Polish => case Language.Polish =>
polish polish
case Language.Estonian =>
estonian
} }
private val numbers = List( private val numbers = List(
@ -375,4 +377,19 @@ object MonthName {
List("listopada", "lis"), List("listopada", "lis"),
List("grudnia", "gru") List("grudnia", "gru")
) )
private val estonian = List(
List("jaanuar", "jaan"),
List("veebruar", "veebr"),
List("märts"),
List("aprill", "apr"),
List("mai"),
List("juuni"),
List("juuli"),
List("august", "aug"),
List("september", "sept"),
List("oktoober", "okt"),
List("november", "nov"),
List("detsember", "dets")
)
} }

View File

@ -259,4 +259,32 @@ class DateFindTest extends FunSuite {
) )
) )
} }
test("find estonian dates") {
assertEquals(
DateFind
.findDates(
"Some text in estonian 21 juuli 2022 and stuff",
Language.Estonian
)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2022, 7, 21),
NerLabel("21 juuli 2022", NerTag.Date, 22, 35)
)
)
)
assertEquals(
DateFind
.findDates("19.11.21", Language.Estonian)
.toVector,
Vector(
NerDateLabel(
LocalDate.of(2021, 11, 19),
NerLabel("19.11.21", NerTag.Date, 0, 8)
)
)
)
}
} }

View File

@ -133,6 +133,11 @@ object Language {
val iso3 = "pol" val iso3 = "pol"
} }
case object Estonian extends Language {
val iso2 = "et"
val iso3 = "est"
}
val all: List[Language] = val all: List[Language] =
List( List(
German, German,
@ -154,7 +159,8 @@ object Language {
Japanese, Japanese,
Hebrew, Hebrew,
Lithuanian, Lithuanian,
Polish Polish,
Estonian
) )
def fromString(str: String): Either[String, Language] = { def fromString(str: String): Either[String, Language] = {

View File

@ -204,5 +204,6 @@ object FtsRepository extends DoobieMeta {
case Language.Hebrew => "simple" case Language.Hebrew => "simple"
case Language.Lithuanian => "simple" case Language.Lithuanian => "simple"
case Language.Polish => "simple" case Language.Polish => "simple"
case Language.Estonian => "simple"
} }
} }

View File

@ -156,7 +156,12 @@ object SolrSetup {
addContentField(Language.Polish) addContentField(Language.Polish)
), ),
SolrMigration.reIndexAll(26, "Re-Index after adding polish content field"), SolrMigration.reIndexAll(26, "Re-Index after adding polish content field"),
SolrMigration.reIndexAll(27, "Re-Index after collective-id change") SolrMigration.reIndexAll(27, "Re-Index after collective-id change"),
SolrMigration[F](
28,
"Add Estonian",
addContentField(Language.Estonian)
)
) )
def addFolderField: F[Unit] = def addFolderField: F[Unit] =

View File

@ -34,6 +34,7 @@ type Language
| Hungarian | Hungarian
| Lithuanian | Lithuanian
| Polish | Polish
| Estonian
fromString : String -> Maybe Language fromString : String -> Maybe Language
@ -98,6 +99,9 @@ fromString str =
else if str == "pol" || str == "pl" || str == "polish" then else if str == "pol" || str == "pl" || str == "polish" then
Just Polish Just Polish
else if str == "est" || str == "et" || str == "estonian" then
Just Estonian
else else
Nothing Nothing
@ -165,6 +169,9 @@ toIso3 lang =
Polish -> Polish ->
"pol" "pol"
Estonian ->
"est"
all : List Language all : List Language
all = all =
@ -188,4 +195,5 @@ all =
, Hungarian , Hungarian
, Lithuanian , Lithuanian
, Polish , Polish
, Estonian
] ]

View File

@ -77,6 +77,9 @@ gb lang =
Polish -> Polish ->
"Polish" "Polish"
Estonian ->
"Estonian"
de : Language -> String de : Language -> String
de lang = de lang =
@ -141,6 +144,9 @@ de lang =
Polish -> Polish ->
"Polnisch" "Polnisch"
Estonian ->
"Estnisch"
fr : Language -> String fr : Language -> String
fr lang = fr lang =
@ -204,3 +210,6 @@ fr lang =
Polish -> Polish ->
"Polonais" "Polonais"
Estonian ->
"Estonien"