mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-02 21:42:52 +00:00
Add spanish as an example
Adding a new language without nlp requires now only to fill out the pieces: - define a list of month names to support date recognition - add it to joex' dockerfile to be available for tesseract - update the solr migration/field definitions - update the elm file so it shows up on the client
This commit is contained in:
parent
360cad3304
commit
26dff18ae0
@ -16,6 +16,7 @@ RUN apk add --no-cache openjdk11-jre \
|
||||
tesseract-ocr-data-deu \
|
||||
tesseract-ocr-data-fra \
|
||||
tesseract-ocr-data-ita \
|
||||
tesseract-ocr-data-spa \
|
||||
unpaper \
|
||||
wkhtmltopdf \
|
||||
libreoffice \
|
||||
|
@ -65,6 +65,7 @@ object DateFind {
|
||||
case Language.German => p1.or(p0).or(p2)
|
||||
case Language.French => p1.or(p0).or(p2)
|
||||
case Language.Italian => p1.or(p0).or(p2)
|
||||
case Language.Spanish => p1.or(p0).or(p2)
|
||||
}
|
||||
p.read(parts) match {
|
||||
case Result.Success(sds, _) =>
|
||||
|
@ -22,6 +22,8 @@ object MonthName {
|
||||
french
|
||||
case Language.Italian =>
|
||||
italian
|
||||
case Language.Spanish =>
|
||||
spanish
|
||||
}
|
||||
|
||||
private val numbers = List(
|
||||
@ -98,4 +100,19 @@ object MonthName {
|
||||
List("nov", "novembre"),
|
||||
List("dic", "dicembre")
|
||||
)
|
||||
|
||||
private val spanish = List(
|
||||
List("ene", "enero"),
|
||||
List("feb", "febrero"),
|
||||
List("mar", "marzo"),
|
||||
List("abr", "abril"),
|
||||
List("may", "mayo"),
|
||||
List("jun"),
|
||||
List("jul"),
|
||||
List("ago", "agosto"),
|
||||
List("sep", "septiembre"),
|
||||
List("oct", "octubre"),
|
||||
List("nov", "noviembre"),
|
||||
List("dic", "diciembre")
|
||||
)
|
||||
}
|
||||
|
@ -47,7 +47,12 @@ object Language {
|
||||
val iso3 = "ita"
|
||||
}
|
||||
|
||||
val all: List[Language] = List(German, English, French, Italian)
|
||||
case object Spanish extends Language {
|
||||
val iso2 = "es"
|
||||
val iso3 = "spa"
|
||||
}
|
||||
|
||||
val all: List[Language] = List(German, English, French, Italian, Spanish)
|
||||
|
||||
def fromString(str: String): Either[String, Language] = {
|
||||
val lang = str.toLowerCase
|
||||
|
@ -25,6 +25,7 @@ object Field {
|
||||
val content_en = Field("content_en")
|
||||
val content_fr = Field("content_fr")
|
||||
val content_it = Field("content_it")
|
||||
val content_es = Field("content_es")
|
||||
val itemName = Field("itemName")
|
||||
val itemNotes = Field("itemNotes")
|
||||
val folderId = Field("folder")
|
||||
@ -39,6 +40,8 @@ object Field {
|
||||
Field.content_fr
|
||||
case Language.Italian =>
|
||||
Field.content_it
|
||||
case Language.Spanish =>
|
||||
Field.content_es
|
||||
}
|
||||
|
||||
implicit val jsonEncoder: Encoder[Field] =
|
||||
|
@ -41,6 +41,7 @@ object SolrQuery {
|
||||
Field.content_en,
|
||||
Field.content_fr,
|
||||
Field.content_it,
|
||||
Field.content_es,
|
||||
Field.itemName,
|
||||
Field.itemNotes,
|
||||
Field.attachmentName
|
||||
|
@ -69,6 +69,14 @@ object SolrSetup {
|
||||
solrEngine,
|
||||
"Add content_it field",
|
||||
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
|
||||
),
|
||||
FtsMigration[F](
|
||||
8,
|
||||
solrEngine,
|
||||
"Add content_es field",
|
||||
addTextField(Some(Language.Spanish))(Field.content_es).map(_ =>
|
||||
FtsMigration.Result.reIndexAll
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -12,6 +12,7 @@ type Language
|
||||
| English
|
||||
| French
|
||||
| Italian
|
||||
| Spanish
|
||||
|
||||
|
||||
fromString : String -> Maybe Language
|
||||
@ -27,6 +28,10 @@ fromString str =
|
||||
|
||||
else if str == "ita" || str == "it" || str == "italian" then
|
||||
Just Italian
|
||||
|
||||
else if str == "spa" || str == "es" || str == "spanish" then
|
||||
Just Spanish
|
||||
|
||||
else
|
||||
Nothing
|
||||
|
||||
@ -46,6 +51,9 @@ toIso3 lang =
|
||||
Italian ->
|
||||
"ita"
|
||||
|
||||
Spanish ->
|
||||
"spa"
|
||||
|
||||
|
||||
toName : Language -> String
|
||||
toName lang =
|
||||
@ -62,7 +70,10 @@ toName lang =
|
||||
Italian ->
|
||||
"Italian"
|
||||
|
||||
Spanish ->
|
||||
"Spanish"
|
||||
|
||||
|
||||
all : List Language
|
||||
all =
|
||||
[ German, English, French, Italian ]
|
||||
[ German, English, French, Italian, Spanish ]
|
||||
|
Loading…
x
Reference in New Issue
Block a user