mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-04-04 10:29:34 +00:00
Add 9 more lanugages to the list of document lanugages
This commit is contained in:
parent
94bb18c152
commit
3f75af0807
@ -17,6 +17,15 @@ RUN apk add --no-cache openjdk11-jre \
|
||||
tesseract-ocr-data-fra \
|
||||
tesseract-ocr-data-ita \
|
||||
tesseract-ocr-data-spa \
|
||||
tesseract-ocr-data-por \
|
||||
tesseract-ocr-data-ces \
|
||||
tesseract-ocr-data-nld \
|
||||
tesseract-ocr-data-dan \
|
||||
tesseract-ocr-data-fin \
|
||||
tesseract-ocr-data-nor \
|
||||
tesseract-ocr-data-swe \
|
||||
tesseract-ocr-data-rus \
|
||||
tesseract-ocr-data-ron \
|
||||
unpaper \
|
||||
wkhtmltopdf \
|
||||
libreoffice \
|
||||
|
@ -56,16 +56,26 @@ object DateFind {
|
||||
|
||||
// ymd ✔, ydm, dmy ✔, dym, myd, mdy ✔
|
||||
def fromParts(parts: List[Word], lang: Language): List[SimpleDate] = {
|
||||
val p0 = pattern0(lang)
|
||||
val p1 = pattern1(lang)
|
||||
val p2 = pattern2(lang)
|
||||
val ymd = pattern0(lang)
|
||||
val dmy = pattern1(lang)
|
||||
val mdy = pattern2(lang)
|
||||
// most is from wikipedia…
|
||||
val p = lang match {
|
||||
case Language.English =>
|
||||
p2.alt(p1).map(t => t._1 ++ t._2).or(p2).or(p0).or(p1)
|
||||
case Language.German => p1.or(p0).or(p2)
|
||||
case Language.French => p1.or(p0).or(p2)
|
||||
case Language.Italian => p1.or(p0).or(p2)
|
||||
case Language.Spanish => p1.or(p0).or(p2)
|
||||
mdy.alt(dmy).map(t => t._1 ++ t._2).or(mdy).or(ymd).or(dmy)
|
||||
case Language.German => dmy.or(ymd).or(mdy)
|
||||
case Language.French => dmy.or(ymd).or(mdy)
|
||||
case Language.Italian => dmy.or(ymd).or(mdy)
|
||||
case Language.Spanish => dmy.or(ymd).or(mdy)
|
||||
case Language.Czech => dmy.or(ymd).or(mdy)
|
||||
case Language.Danish => dmy.or(ymd).or(mdy)
|
||||
case Language.Finnish => dmy.or(ymd).or(mdy)
|
||||
case Language.Norwegian => dmy.or(ymd).or(mdy)
|
||||
case Language.Portuguese => dmy.or(ymd).or(mdy)
|
||||
case Language.Romanian => dmy.or(ymd).or(mdy)
|
||||
case Language.Russian => dmy.or(ymd).or(mdy)
|
||||
case Language.Swedish => ymd.or(dmy).or(mdy)
|
||||
case Language.Dutch => dmy.or(ymd).or(mdy)
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -24,6 +24,24 @@ object MonthName {
|
||||
italian
|
||||
case Language.Spanish =>
|
||||
spanish
|
||||
case Language.Swedish =>
|
||||
swedish
|
||||
case Language.Norwegian =>
|
||||
norwegian
|
||||
case Language.Dutch =>
|
||||
dutch
|
||||
case Language.Czech =>
|
||||
czech
|
||||
case Language.Danish =>
|
||||
danish
|
||||
case Language.Portuguese =>
|
||||
portuguese
|
||||
case Language.Romanian =>
|
||||
romanian
|
||||
case Language.Finnish =>
|
||||
finnish
|
||||
case Language.Russian =>
|
||||
russian
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -115,4 +133,138 @@ object MonthName {
|
||||
List("nov", "noviembre"),
|
||||
List("dic", "diciembre")
|
||||
)
|
||||
|
||||
private val swedish = List(
|
||||
List("jan", "januari"),
|
||||
List("febr", "februari"),
|
||||
List("mars"),
|
||||
List("april"),
|
||||
List("maj"),
|
||||
List("juni"),
|
||||
List("juli"),
|
||||
List("aug", "augusti"),
|
||||
List("sept", "september"),
|
||||
List("okt", "oktober"),
|
||||
List("nov", "november"),
|
||||
List("dec", "december")
|
||||
)
|
||||
private val norwegian = List(
|
||||
List("jan", "januar"),
|
||||
List("febr", "februar"),
|
||||
List("mars"),
|
||||
List("april"),
|
||||
List("mai"),
|
||||
List("juni"),
|
||||
List("juli"),
|
||||
List("aug", "august"),
|
||||
List("sept", "september"),
|
||||
List("okt", "oktober"),
|
||||
List("nov", "november"),
|
||||
List("des", "desember")
|
||||
)
|
||||
|
||||
private val czech = List(
|
||||
List("led", "leden"),
|
||||
List("un", "ún", "únor", "unor"),
|
||||
List("brez", "březen", "brezen"),
|
||||
List("dub", "duben"),
|
||||
List("kvet", "květen"),
|
||||
List("cerv", "červen"),
|
||||
List("cerven", "červenec"),
|
||||
List("srp", "srpen"),
|
||||
List("zari", "září"),
|
||||
List("ríj", "rij", "říjen"),
|
||||
List("list", "listopad"),
|
||||
List("pros", "prosinec")
|
||||
)
|
||||
|
||||
private val romanian = List(
|
||||
List("ian", "ianuarie"),
|
||||
List("feb", "februarie"),
|
||||
List("mar", "martie"),
|
||||
List("apr", "aprilie"),
|
||||
List("mai"),
|
||||
List("iunie"),
|
||||
List("iulie"),
|
||||
List("aug", "august"),
|
||||
List("sept", "septembrie"),
|
||||
List("oct", "octombrie"),
|
||||
List("noem", "nov", "noiembrie"),
|
||||
List("dec", "decembrie")
|
||||
)
|
||||
|
||||
private val danish = List(
|
||||
List("jan", "januar"),
|
||||
List("febr", "februar"),
|
||||
List("marts"),
|
||||
List("april"),
|
||||
List("maj"),
|
||||
List("juni"),
|
||||
List("juli"),
|
||||
List("aug", "august"),
|
||||
List("sept", "september"),
|
||||
List("okt", "oktober"),
|
||||
List("nov", "november"),
|
||||
List("dec", "december")
|
||||
)
|
||||
|
||||
private val portuguese = List(
|
||||
List("jan", "janeiro"),
|
||||
List("fev", "fevereiro"),
|
||||
List("março", "marco"),
|
||||
List("abril"),
|
||||
List("maio"),
|
||||
List("junho"),
|
||||
List("julho"),
|
||||
List("agosto"),
|
||||
List("set", "setembro"),
|
||||
List("out", "outubro"),
|
||||
List("nov", "novembro"),
|
||||
List("dez", "dezembro")
|
||||
)
|
||||
|
||||
private val finnish = List(
|
||||
List("tammikuu"),
|
||||
List("helmikuu"),
|
||||
List("maaliskuu"),
|
||||
List("huhtikuu"),
|
||||
List("toukokuu"),
|
||||
List("kesäkuu"),
|
||||
List("heinäkuu"),
|
||||
List("elokuu"),
|
||||
List("syyskuu"),
|
||||
List("lokakuu"),
|
||||
List("marraskuu"),
|
||||
List("joulukuu")
|
||||
)
|
||||
|
||||
private val russian = List(
|
||||
List("январь"),
|
||||
List("февраль"),
|
||||
List("март"),
|
||||
List("апрель"),
|
||||
List("май"),
|
||||
List("июнь"),
|
||||
List("июль"),
|
||||
List("август"),
|
||||
List("сентябрь"),
|
||||
List("октябрь"),
|
||||
List("ноябрь"),
|
||||
List("декабрь")
|
||||
)
|
||||
|
||||
private val dutch = List(
|
||||
List("jan", "januari"),
|
||||
List("feb", "februari"),
|
||||
List("maart"),
|
||||
List("apr", "april"),
|
||||
List("mei"),
|
||||
List("juni"),
|
||||
List("juli"),
|
||||
List("aug", "augustus"),
|
||||
List("sept", "september"),
|
||||
List("okt", "oct", "oktober"),
|
||||
List("nov", "november"),
|
||||
List("dec", "december")
|
||||
)
|
||||
}
|
||||
|
@ -52,7 +52,68 @@ object Language {
|
||||
val iso3 = "spa"
|
||||
}
|
||||
|
||||
val all: List[Language] = List(German, English, French, Italian, Spanish)
|
||||
case object Portuguese extends Language {
|
||||
val iso2 = "pt"
|
||||
val iso3 = "por"
|
||||
}
|
||||
|
||||
case object Czech extends Language {
|
||||
val iso2 = "cs"
|
||||
val iso3 = "ces"
|
||||
}
|
||||
|
||||
case object Danish extends Language {
|
||||
val iso2 = "da"
|
||||
val iso3 = "dan"
|
||||
}
|
||||
|
||||
case object Finnish extends Language {
|
||||
val iso2 = "fi"
|
||||
val iso3 = "fin"
|
||||
}
|
||||
|
||||
case object Norwegian extends Language {
|
||||
val iso2 = "no"
|
||||
val iso3 = "nor"
|
||||
}
|
||||
|
||||
case object Swedish extends Language {
|
||||
val iso2 = "sv"
|
||||
val iso3 = "swe"
|
||||
}
|
||||
|
||||
case object Russian extends Language {
|
||||
val iso2 = "ru"
|
||||
val iso3 = "rus"
|
||||
}
|
||||
|
||||
case object Romanian extends Language {
|
||||
val iso2 = "ro"
|
||||
val iso3 = "ron"
|
||||
}
|
||||
|
||||
case object Dutch extends Language {
|
||||
val iso2 = "nl"
|
||||
val iso3 = "nld"
|
||||
}
|
||||
|
||||
val all: List[Language] =
|
||||
List(
|
||||
German,
|
||||
English,
|
||||
French,
|
||||
Italian,
|
||||
Spanish,
|
||||
Dutch,
|
||||
Portuguese,
|
||||
Czech,
|
||||
Danish,
|
||||
Finnish,
|
||||
Norwegian,
|
||||
Swedish,
|
||||
Russian,
|
||||
Romanian
|
||||
)
|
||||
|
||||
def fromString(str: String): Either[String, Language] = {
|
||||
val lang = str.toLowerCase
|
||||
|
@ -32,7 +32,8 @@ object Field {
|
||||
.map(contentField)
|
||||
|
||||
def contentField(lang: Language): Field =
|
||||
Field(s"content_${lang.iso2}")
|
||||
if (lang == Language.Czech) Field(s"content_cz")
|
||||
else Field(s"content_${lang.iso2}")
|
||||
|
||||
implicit val jsonEncoder: Encoder[Field] =
|
||||
Encoder.encodeString.contramap(_.name)
|
||||
|
@ -75,12 +75,33 @@ object SolrSetup {
|
||||
solrEngine,
|
||||
"Add content_es field",
|
||||
addContentField(Language.Spanish).map(_ => FtsMigration.Result.reIndexAll)
|
||||
),
|
||||
FtsMigration[F](
|
||||
9,
|
||||
solrEngine,
|
||||
"Add more content fields",
|
||||
addMoreContentFields.map(_ => FtsMigration.Result.reIndexAll)
|
||||
)
|
||||
)
|
||||
|
||||
def addFolderField: F[Unit] =
|
||||
addStringField(Field.folderId)
|
||||
|
||||
def addMoreContentFields: F[Unit] = {
|
||||
val remain = List[Language](
|
||||
Language.Norwegian,
|
||||
Language.Romanian,
|
||||
Language.Swedish,
|
||||
Language.Finnish,
|
||||
Language.Danish,
|
||||
Language.Czech,
|
||||
Language.Dutch,
|
||||
Language.Portuguese,
|
||||
Language.Russian
|
||||
)
|
||||
remain.traverse(addContentField).map(_ => ())
|
||||
}
|
||||
|
||||
def setupCoreSchema: F[Unit] = {
|
||||
val cmds0 =
|
||||
List(
|
||||
@ -162,7 +183,8 @@ object SolrSetup {
|
||||
AddField(field, "text_general", true, true, false)
|
||||
|
||||
def textLang(field: Field, lang: Language): AddField =
|
||||
AddField(field, s"text_${lang.iso2}", true, true, false)
|
||||
if (lang == Language.Czech) AddField(field, s"text_cz", true, true, false)
|
||||
else AddField(field, s"text_${lang.iso2}", true, true, false)
|
||||
}
|
||||
|
||||
case class DeleteField(name: Field)
|
||||
|
@ -13,6 +13,15 @@ type Language
|
||||
| French
|
||||
| Italian
|
||||
| Spanish
|
||||
| Portuguese
|
||||
| Czech
|
||||
| Danish
|
||||
| Finnish
|
||||
| Norwegian
|
||||
| Swedish
|
||||
| Russian
|
||||
| Romanian
|
||||
| Dutch
|
||||
|
||||
|
||||
fromString : String -> Maybe Language
|
||||
@ -32,6 +41,33 @@ fromString str =
|
||||
else if str == "spa" || str == "es" || str == "spanish" then
|
||||
Just Spanish
|
||||
|
||||
else if str == "por" || str == "pt" || str == "portuguese" then
|
||||
Just Portuguese
|
||||
|
||||
else if str == "ces" || str == "cs" || str == "czech" then
|
||||
Just Czech
|
||||
|
||||
else if str == "dan" || str == "da" || str == "danish" then
|
||||
Just Danish
|
||||
|
||||
else if str == "nld" || str == "nd" || str == "dutch" then
|
||||
Just Dutch
|
||||
|
||||
else if str == "fin" || str == "fi" || str == "finnish" then
|
||||
Just Finnish
|
||||
|
||||
else if str == "nor" || str == "no" || str == "norwegian" then
|
||||
Just Norwegian
|
||||
|
||||
else if str == "swe" || str == "sv" || str == "swedish" then
|
||||
Just Swedish
|
||||
|
||||
else if str == "rus" || str == "ru" || str == "russian" then
|
||||
Just Russian
|
||||
|
||||
else if str == "ron" || str == "ro" || str == "romanian" then
|
||||
Just Romanian
|
||||
|
||||
else
|
||||
Nothing
|
||||
|
||||
@ -54,6 +90,33 @@ toIso3 lang =
|
||||
Spanish ->
|
||||
"spa"
|
||||
|
||||
Portuguese ->
|
||||
"por"
|
||||
|
||||
Czech ->
|
||||
"ces"
|
||||
|
||||
Danish ->
|
||||
"dan"
|
||||
|
||||
Finnish ->
|
||||
"fin"
|
||||
|
||||
Norwegian ->
|
||||
"nor"
|
||||
|
||||
Swedish ->
|
||||
"swe"
|
||||
|
||||
Russian ->
|
||||
"rus"
|
||||
|
||||
Romanian ->
|
||||
"ron"
|
||||
|
||||
Dutch ->
|
||||
"nld"
|
||||
|
||||
|
||||
toName : Language -> String
|
||||
toName lang =
|
||||
@ -73,7 +136,48 @@ toName lang =
|
||||
Spanish ->
|
||||
"Spanish"
|
||||
|
||||
Portuguese ->
|
||||
"Portuguese"
|
||||
|
||||
Czech ->
|
||||
"Czech"
|
||||
|
||||
Danish ->
|
||||
"Danish"
|
||||
|
||||
Finnish ->
|
||||
"Finnish"
|
||||
|
||||
Norwegian ->
|
||||
"Norwegian"
|
||||
|
||||
Swedish ->
|
||||
"Swedish"
|
||||
|
||||
Russian ->
|
||||
"Russian"
|
||||
|
||||
Romanian ->
|
||||
"Romanian"
|
||||
|
||||
Dutch ->
|
||||
"Dutch"
|
||||
|
||||
|
||||
all : List Language
|
||||
all =
|
||||
[ German, English, French, Italian, Spanish ]
|
||||
[ German
|
||||
, English
|
||||
, French
|
||||
, Italian
|
||||
, Spanish
|
||||
, Portuguese
|
||||
, Czech
|
||||
, Dutch
|
||||
, Danish
|
||||
, Finnish
|
||||
, Norwegian
|
||||
, Swedish
|
||||
, Russian
|
||||
, Romanian
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user