Add spanish as an example

Adding a new language without nlp requires now only to fill out the
pieces:

- define a list of month names to support date recognition
- add it to joex' dockerfile to be available for tesseract
- update the solr migration/field definitions
- update the elm file so it shows up on the client
This commit is contained in:
Eike Kettner 2021-01-17 12:18:23 +01:00
parent 360cad3304
commit 26dff18ae0
8 changed files with 49 additions and 2 deletions

View File

@ -16,6 +16,7 @@ RUN apk add --no-cache openjdk11-jre \
tesseract-ocr-data-deu \
tesseract-ocr-data-fra \
tesseract-ocr-data-ita \
tesseract-ocr-data-spa \
unpaper \
wkhtmltopdf \
libreoffice \

View File

@ -65,6 +65,7 @@ object DateFind {
case Language.German => p1.or(p0).or(p2)
case Language.French => p1.or(p0).or(p2)
case Language.Italian => p1.or(p0).or(p2)
case Language.Spanish => p1.or(p0).or(p2)
}
p.read(parts) match {
case Result.Success(sds, _) =>

View File

@ -22,6 +22,8 @@ object MonthName {
french
case Language.Italian =>
italian
case Language.Spanish =>
spanish
}
private val numbers = List(
@ -98,4 +100,19 @@ object MonthName {
List("nov", "novembre"),
List("dic", "dicembre")
)
private val spanish = List(
List("ene", "enero"),
List("feb", "febrero"),
List("mar", "marzo"),
List("abr", "abril"),
List("may", "mayo"),
List("jun"),
List("jul"),
List("ago", "agosto"),
List("sep", "septiembre"),
List("oct", "octubre"),
List("nov", "noviembre"),
List("dic", "diciembre")
)
}

View File

@ -47,7 +47,12 @@ object Language {
val iso3 = "ita"
}
val all: List[Language] = List(German, English, French, Italian)
case object Spanish extends Language {
val iso2 = "es"
val iso3 = "spa"
}
val all: List[Language] = List(German, English, French, Italian, Spanish)
def fromString(str: String): Either[String, Language] = {
val lang = str.toLowerCase

View File

@ -25,6 +25,7 @@ object Field {
val content_en = Field("content_en")
val content_fr = Field("content_fr")
val content_it = Field("content_it")
val content_es = Field("content_es")
val itemName = Field("itemName")
val itemNotes = Field("itemNotes")
val folderId = Field("folder")
@ -39,6 +40,8 @@ object Field {
Field.content_fr
case Language.Italian =>
Field.content_it
case Language.Spanish =>
Field.content_es
}
implicit val jsonEncoder: Encoder[Field] =

View File

@ -41,6 +41,7 @@ object SolrQuery {
Field.content_en,
Field.content_fr,
Field.content_it,
Field.content_es,
Field.itemName,
Field.itemNotes,
Field.attachmentName

View File

@ -69,6 +69,14 @@ object SolrSetup {
solrEngine,
"Add content_it field",
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
),
FtsMigration[F](
8,
solrEngine,
"Add content_es field",
addTextField(Some(Language.Spanish))(Field.content_es).map(_ =>
FtsMigration.Result.reIndexAll
)
)
)

View File

@ -12,6 +12,7 @@ type Language
| English
| French
| Italian
| Spanish
fromString : String -> Maybe Language
@ -27,6 +28,10 @@ fromString str =
else if str == "ita" || str == "it" || str == "italian" then
Just Italian
else if str == "spa" || str == "es" || str == "spanish" then
Just Spanish
else
Nothing
@ -46,6 +51,9 @@ toIso3 lang =
Italian ->
"ita"
Spanish ->
"spa"
toName : Language -> String
toName lang =
@ -62,7 +70,10 @@ toName lang =
Italian ->
"Italian"
Spanish ->
"Spanish"
all : List Language
all =
[ German, English, French, Italian ]
[ German, English, French, Italian, Spanish ]