mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-06-23 02:48:26 +00:00
Reorganize nlp pipeline and add nlp-unsupported language italian
Improves and reorganizes how nlp pipelines are setup. Now users can choose from many options, depending on their hardware and usage scenario. This is the base to use more languages without depending on what stanford-nlp supports. Support then is involves to text extraction and simple regex-ner processing.
This commit is contained in:
@ -24,6 +24,7 @@ object Field {
|
||||
val content_de = Field("content_de")
|
||||
val content_en = Field("content_en")
|
||||
val content_fr = Field("content_fr")
|
||||
val content_it = Field("content_it")
|
||||
val itemName = Field("itemName")
|
||||
val itemNotes = Field("itemNotes")
|
||||
val folderId = Field("folder")
|
||||
@ -36,6 +37,8 @@ object Field {
|
||||
Field.content_en
|
||||
case Language.French =>
|
||||
Field.content_fr
|
||||
case Language.Italian =>
|
||||
Field.content_it
|
||||
}
|
||||
|
||||
implicit val jsonEncoder: Encoder[Field] =
|
||||
|
@ -40,6 +40,7 @@ object SolrQuery {
|
||||
Field.content_de,
|
||||
Field.content_en,
|
||||
Field.content_fr,
|
||||
Field.content_it,
|
||||
Field.itemName,
|
||||
Field.itemNotes,
|
||||
Field.attachmentName
|
||||
|
@ -63,6 +63,12 @@ object SolrSetup {
|
||||
solrEngine,
|
||||
"Index all from database",
|
||||
FtsMigration.Result.indexAll.pure[F]
|
||||
),
|
||||
FtsMigration[F](
|
||||
7,
|
||||
solrEngine,
|
||||
"Add content_it field",
|
||||
addContentItField.map(_ => FtsMigration.Result.reIndexAll)
|
||||
)
|
||||
)
|
||||
|
||||
@ -72,6 +78,9 @@ object SolrSetup {
|
||||
def addContentFrField: F[Unit] =
|
||||
addTextField(Some(Language.French))(Field.content_fr)
|
||||
|
||||
def addContentItField: F[Unit] =
|
||||
addTextField(Some(Language.Italian))(Field.content_it)
|
||||
|
||||
def setupCoreSchema: F[Unit] = {
|
||||
val cmds0 =
|
||||
List(
|
||||
@ -90,13 +99,15 @@ object SolrSetup {
|
||||
)
|
||||
.traverse(addTextField(None))
|
||||
|
||||
val cntLang = Language.all.traverse {
|
||||
val cntLang = List(Language.German, Language.English, Language.French).traverse {
|
||||
case l @ Language.German =>
|
||||
addTextField(l.some)(Field.content_de)
|
||||
case l @ Language.English =>
|
||||
addTextField(l.some)(Field.content_en)
|
||||
case l @ Language.French =>
|
||||
addTextField(l.some)(Field.content_fr)
|
||||
case _ =>
|
||||
().pure[F]
|
||||
}
|
||||
|
||||
cmds0 *> cmds1 *> cntLang *> ().pure[F]
|
||||
@ -125,6 +136,9 @@ object SolrSetup {
|
||||
case Some(Language.French) =>
|
||||
run(DeleteField.command(DeleteField(field))).attempt *>
|
||||
run(AddField.command(AddField.textFR(field)))
|
||||
case Some(Language.Italian) =>
|
||||
run(DeleteField.command(DeleteField(field))).attempt *>
|
||||
run(AddField.command(AddField.textIT(field)))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -161,6 +175,9 @@ object SolrSetup {
|
||||
|
||||
def textFR(field: Field): AddField =
|
||||
AddField(field, "text_fr", true, true, false)
|
||||
|
||||
def textIT(field: Field): AddField =
|
||||
AddField(field, "text_it", true, true, false)
|
||||
}
|
||||
|
||||
case class DeleteField(name: Field)
|
||||
|
Reference in New Issue
Block a user