mirror of
				https://github.com/TheAnachronism/docspell.git
				synced 2025-11-03 18:00:11 +00:00 
			
		
		
		
	Add spanish as an example
Adding a new language without nlp requires now only to fill out the pieces: - define a list of month names to support date recognition - add it to joex' dockerfile to be available for tesseract - update the solr migration/field definitions - update the elm file so it shows up on the client
This commit is contained in:
		@@ -16,6 +16,7 @@ RUN apk add --no-cache openjdk11-jre \
 | 
			
		||||
    tesseract-ocr-data-deu \
 | 
			
		||||
    tesseract-ocr-data-fra \
 | 
			
		||||
    tesseract-ocr-data-ita \
 | 
			
		||||
    tesseract-ocr-data-spa \
 | 
			
		||||
    unpaper \
 | 
			
		||||
    wkhtmltopdf \
 | 
			
		||||
    libreoffice \
 | 
			
		||||
 
 | 
			
		||||
@@ -65,6 +65,7 @@ object DateFind {
 | 
			
		||||
        case Language.German  => p1.or(p0).or(p2)
 | 
			
		||||
        case Language.French  => p1.or(p0).or(p2)
 | 
			
		||||
        case Language.Italian => p1.or(p0).or(p2)
 | 
			
		||||
        case Language.Spanish => p1.or(p0).or(p2)
 | 
			
		||||
      }
 | 
			
		||||
      p.read(parts) match {
 | 
			
		||||
        case Result.Success(sds, _) =>
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,8 @@ object MonthName {
 | 
			
		||||
        french
 | 
			
		||||
      case Language.Italian =>
 | 
			
		||||
        italian
 | 
			
		||||
      case Language.Spanish =>
 | 
			
		||||
        spanish
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  private val numbers = List(
 | 
			
		||||
@@ -98,4 +100,19 @@ object MonthName {
 | 
			
		||||
    List("nov", "novembre"),
 | 
			
		||||
    List("dic", "dicembre")
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
  private val spanish = List(
 | 
			
		||||
    List("ene", "enero"),
 | 
			
		||||
    List("feb", "febrero"),
 | 
			
		||||
    List("mar", "marzo"),
 | 
			
		||||
    List("abr", "abril"),
 | 
			
		||||
    List("may", "mayo"),
 | 
			
		||||
    List("jun"),
 | 
			
		||||
    List("jul"),
 | 
			
		||||
    List("ago", "agosto"),
 | 
			
		||||
    List("sep", "septiembre"),
 | 
			
		||||
    List("oct", "octubre"),
 | 
			
		||||
    List("nov", "noviembre"),
 | 
			
		||||
    List("dic", "diciembre")
 | 
			
		||||
  )
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -47,7 +47,12 @@ object Language {
 | 
			
		||||
    val iso3 = "ita"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val all: List[Language] = List(German, English, French, Italian)
 | 
			
		||||
  case object Spanish extends Language {
 | 
			
		||||
    val iso2 = "es"
 | 
			
		||||
    val iso3 = "spa"
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  val all: List[Language] = List(German, English, French, Italian, Spanish)
 | 
			
		||||
 | 
			
		||||
  def fromString(str: String): Either[String, Language] = {
 | 
			
		||||
    val lang = str.toLowerCase
 | 
			
		||||
 
 | 
			
		||||
@@ -25,6 +25,7 @@ object Field {
 | 
			
		||||
  val content_en     = Field("content_en")
 | 
			
		||||
  val content_fr     = Field("content_fr")
 | 
			
		||||
  val content_it     = Field("content_it")
 | 
			
		||||
  val content_es     = Field("content_es")
 | 
			
		||||
  val itemName       = Field("itemName")
 | 
			
		||||
  val itemNotes      = Field("itemNotes")
 | 
			
		||||
  val folderId       = Field("folder")
 | 
			
		||||
@@ -39,6 +40,8 @@ object Field {
 | 
			
		||||
        Field.content_fr
 | 
			
		||||
      case Language.Italian =>
 | 
			
		||||
        Field.content_it
 | 
			
		||||
      case Language.Spanish =>
 | 
			
		||||
        Field.content_es
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  implicit val jsonEncoder: Encoder[Field] =
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,7 @@ object SolrQuery {
 | 
			
		||||
            Field.content_en,
 | 
			
		||||
            Field.content_fr,
 | 
			
		||||
            Field.content_it,
 | 
			
		||||
            Field.content_es,
 | 
			
		||||
            Field.itemName,
 | 
			
		||||
            Field.itemNotes,
 | 
			
		||||
            Field.attachmentName
 | 
			
		||||
 
 | 
			
		||||
@@ -69,6 +69,14 @@ object SolrSetup {
 | 
			
		||||
            solrEngine,
 | 
			
		||||
            "Add content_it field",
 | 
			
		||||
            addContentItField.map(_ => FtsMigration.Result.reIndexAll)
 | 
			
		||||
          ),
 | 
			
		||||
          FtsMigration[F](
 | 
			
		||||
            8,
 | 
			
		||||
            solrEngine,
 | 
			
		||||
            "Add content_es field",
 | 
			
		||||
            addTextField(Some(Language.Spanish))(Field.content_es).map(_ =>
 | 
			
		||||
              FtsMigration.Result.reIndexAll
 | 
			
		||||
            )
 | 
			
		||||
          )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -12,6 +12,7 @@ type Language
 | 
			
		||||
    | English
 | 
			
		||||
    | French
 | 
			
		||||
    | Italian
 | 
			
		||||
    | Spanish
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
fromString : String -> Maybe Language
 | 
			
		||||
@@ -27,6 +28,10 @@ fromString str =
 | 
			
		||||
 | 
			
		||||
    else if str == "ita" || str == "it" || str == "italian" then
 | 
			
		||||
        Just Italian
 | 
			
		||||
 | 
			
		||||
    else if str == "spa" || str == "es" || str == "spanish" then
 | 
			
		||||
        Just Spanish
 | 
			
		||||
 | 
			
		||||
    else
 | 
			
		||||
        Nothing
 | 
			
		||||
 | 
			
		||||
@@ -46,6 +51,9 @@ toIso3 lang =
 | 
			
		||||
        Italian ->
 | 
			
		||||
            "ita"
 | 
			
		||||
 | 
			
		||||
        Spanish ->
 | 
			
		||||
            "spa"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
toName : Language -> String
 | 
			
		||||
toName lang =
 | 
			
		||||
@@ -62,7 +70,10 @@ toName lang =
 | 
			
		||||
        Italian ->
 | 
			
		||||
            "Italian"
 | 
			
		||||
 | 
			
		||||
        Spanish ->
 | 
			
		||||
            "Spanish"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
all : List Language
 | 
			
		||||
all =
 | 
			
		||||
    [ German, English, French, Italian ]
 | 
			
		||||
    [ German, English, French, Italian, Spanish ]
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user