mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-05 22:55:58 +00:00
Add spanish as an example
Adding a new language without nlp requires now only to fill out the pieces: - define a list of month names to support date recognition - add it to joex' dockerfile to be available for tesseract - update the solr migration/field definitions - update the elm file so it shows up on the client
This commit is contained in:
parent
360cad3304
commit
26dff18ae0
@ -16,6 +16,7 @@ RUN apk add --no-cache openjdk11-jre \
|
|||||||
tesseract-ocr-data-deu \
|
tesseract-ocr-data-deu \
|
||||||
tesseract-ocr-data-fra \
|
tesseract-ocr-data-fra \
|
||||||
tesseract-ocr-data-ita \
|
tesseract-ocr-data-ita \
|
||||||
|
tesseract-ocr-data-spa \
|
||||||
unpaper \
|
unpaper \
|
||||||
wkhtmltopdf \
|
wkhtmltopdf \
|
||||||
libreoffice \
|
libreoffice \
|
||||||
|
@ -65,6 +65,7 @@ object DateFind {
|
|||||||
case Language.German => p1.or(p0).or(p2)
|
case Language.German => p1.or(p0).or(p2)
|
||||||
case Language.French => p1.or(p0).or(p2)
|
case Language.French => p1.or(p0).or(p2)
|
||||||
case Language.Italian => p1.or(p0).or(p2)
|
case Language.Italian => p1.or(p0).or(p2)
|
||||||
|
case Language.Spanish => p1.or(p0).or(p2)
|
||||||
}
|
}
|
||||||
p.read(parts) match {
|
p.read(parts) match {
|
||||||
case Result.Success(sds, _) =>
|
case Result.Success(sds, _) =>
|
||||||
|
@ -22,6 +22,8 @@ object MonthName {
|
|||||||
french
|
french
|
||||||
case Language.Italian =>
|
case Language.Italian =>
|
||||||
italian
|
italian
|
||||||
|
case Language.Spanish =>
|
||||||
|
spanish
|
||||||
}
|
}
|
||||||
|
|
||||||
private val numbers = List(
|
private val numbers = List(
|
||||||
@ -98,4 +100,19 @@ object MonthName {
|
|||||||
List("nov", "novembre"),
|
List("nov", "novembre"),
|
||||||
List("dic", "dicembre")
|
List("dic", "dicembre")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
private val spanish = List(
|
||||||
|
List("ene", "enero"),
|
||||||
|
List("feb", "febrero"),
|
||||||
|
List("mar", "marzo"),
|
||||||
|
List("abr", "abril"),
|
||||||
|
List("may", "mayo"),
|
||||||
|
List("jun"),
|
||||||
|
List("jul"),
|
||||||
|
List("ago", "agosto"),
|
||||||
|
List("sep", "septiembre"),
|
||||||
|
List("oct", "octubre"),
|
||||||
|
List("nov", "noviembre"),
|
||||||
|
List("dic", "diciembre")
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
@ -47,7 +47,12 @@ object Language {
|
|||||||
val iso3 = "ita"
|
val iso3 = "ita"
|
||||||
}
|
}
|
||||||
|
|
||||||
val all: List[Language] = List(German, English, French, Italian)
|
case object Spanish extends Language {
|
||||||
|
val iso2 = "es"
|
||||||
|
val iso3 = "spa"
|
||||||
|
}
|
||||||
|
|
||||||
|
val all: List[Language] = List(German, English, French, Italian, Spanish)
|
||||||
|
|
||||||
def fromString(str: String): Either[String, Language] = {
|
def fromString(str: String): Either[String, Language] = {
|
||||||
val lang = str.toLowerCase
|
val lang = str.toLowerCase
|
||||||
|
@ -25,6 +25,7 @@ object Field {
|
|||||||
val content_en = Field("content_en")
|
val content_en = Field("content_en")
|
||||||
val content_fr = Field("content_fr")
|
val content_fr = Field("content_fr")
|
||||||
val content_it = Field("content_it")
|
val content_it = Field("content_it")
|
||||||
|
val content_es = Field("content_es")
|
||||||
val itemName = Field("itemName")
|
val itemName = Field("itemName")
|
||||||
val itemNotes = Field("itemNotes")
|
val itemNotes = Field("itemNotes")
|
||||||
val folderId = Field("folder")
|
val folderId = Field("folder")
|
||||||
@ -39,6 +40,8 @@ object Field {
|
|||||||
Field.content_fr
|
Field.content_fr
|
||||||
case Language.Italian =>
|
case Language.Italian =>
|
||||||
Field.content_it
|
Field.content_it
|
||||||
|
case Language.Spanish =>
|
||||||
|
Field.content_es
|
||||||
}
|
}
|
||||||
|
|
||||||
implicit val jsonEncoder: Encoder[Field] =
|
implicit val jsonEncoder: Encoder[Field] =
|
||||||
|
@ -41,6 +41,7 @@ object SolrQuery {
|
|||||||
Field.content_en,
|
Field.content_en,
|
||||||
Field.content_fr,
|
Field.content_fr,
|
||||||
Field.content_it,
|
Field.content_it,
|
||||||
|
Field.content_es,
|
||||||
Field.itemName,
|
Field.itemName,
|
||||||
Field.itemNotes,
|
Field.itemNotes,
|
||||||
Field.attachmentName
|
Field.attachmentName
|
||||||
|
@ -69,6 +69,14 @@ object SolrSetup {
|
|||||||
solrEngine,
|
solrEngine,
|
||||||
"Add content_it field",
|
"Add content_it field",
|
||||||
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
|
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
|
||||||
|
),
|
||||||
|
FtsMigration[F](
|
||||||
|
8,
|
||||||
|
solrEngine,
|
||||||
|
"Add content_es field",
|
||||||
|
addTextField(Some(Language.Spanish))(Field.content_es).map(_ =>
|
||||||
|
FtsMigration.Result.reIndexAll
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ type Language
|
|||||||
| English
|
| English
|
||||||
| French
|
| French
|
||||||
| Italian
|
| Italian
|
||||||
|
| Spanish
|
||||||
|
|
||||||
|
|
||||||
fromString : String -> Maybe Language
|
fromString : String -> Maybe Language
|
||||||
@ -27,6 +28,10 @@ fromString str =
|
|||||||
|
|
||||||
else if str == "ita" || str == "it" || str == "italian" then
|
else if str == "ita" || str == "it" || str == "italian" then
|
||||||
Just Italian
|
Just Italian
|
||||||
|
|
||||||
|
else if str == "spa" || str == "es" || str == "spanish" then
|
||||||
|
Just Spanish
|
||||||
|
|
||||||
else
|
else
|
||||||
Nothing
|
Nothing
|
||||||
|
|
||||||
@ -46,6 +51,9 @@ toIso3 lang =
|
|||||||
Italian ->
|
Italian ->
|
||||||
"ita"
|
"ita"
|
||||||
|
|
||||||
|
Spanish ->
|
||||||
|
"spa"
|
||||||
|
|
||||||
|
|
||||||
toName : Language -> String
|
toName : Language -> String
|
||||||
toName lang =
|
toName lang =
|
||||||
@ -62,7 +70,10 @@ toName lang =
|
|||||||
Italian ->
|
Italian ->
|
||||||
"Italian"
|
"Italian"
|
||||||
|
|
||||||
|
Spanish ->
|
||||||
|
"Spanish"
|
||||||
|
|
||||||
|
|
||||||
all : List Language
|
all : List Language
|
||||||
all =
|
all =
|
||||||
[ German, English, French, Italian ]
|
[ German, English, French, Italian, Spanish ]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user