Merge pull request #1190 from eikek/update-stanford-core-nlp

Update stanford core nlp
This commit is contained in:
mergify[bot]
2021-11-20 14:09:04 +00:00
committed by GitHub
19 changed files with 178 additions and 43 deletions

View File

@ -40,7 +40,7 @@ object Dependencies {
val ScalaJavaTimeVersion = "2.3.0"
val ScodecBitsVersion = "1.1.29"
val Slf4jVersion = "1.7.32"
val StanfordNlpVersion = "4.2.2"
val StanfordNlpVersion = "4.3.2"
val TikaVersion = "2.1.0"
val YamuscaVersion = "0.8.1"
val SwaggerUIVersion = "4.1.0"
@ -185,18 +185,16 @@ object Dependencies {
)
)
val stanfordNlpModels = Seq(
("edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion)
.classifier("models"),
("edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion)
.classifier("models-german"),
("edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion)
.classifier("models-french"),
("edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion)
.classifier(
"models-english"
)
)
val stanfordNlpModels = {
val artifact = "edu.stanford.nlp" % "stanford-corenlp" % StanfordNlpVersion
Seq(
artifact.classifier("models"),
artifact.classifier("models-german"),
artifact.classifier("models-french"),
artifact.classifier("models-english"),
artifact.classifier("models-spanish")
)
}
val tika = Seq(
"org.apache.tika" % "tika-core" % TikaVersion

View File

@ -67,18 +67,29 @@ object NerModelsPlugin extends AutoPlugin {
}
private val nerModels = List(
"german.distsim.crf.ser.gz",
// English
"english.conll.4class.distsim.crf.ser.gz",
"regexner_caseless.tab",
"regexner_cased.tab",
"english-left3words-distsim.tagger",
"english-left3words-distsim.tagger.props",
// German
"german.distsim.crf.ser.gz",
"german-mwt.tsv",
"german-ud.tagger",
"german-ud.tagger.props",
// French
"french-wikiner-4class.crf.ser.gz",
"french-mwt-statistical.tsv",
"french-mwt.tagger",
"french-mwt.tsv",
"german-mwt.tsv",
"german-ud.tagger",
"german-ud.tagger.props",
"french-ud.tagger",
"french-ud.tagger.props",
"english-left3words-distsim.tagger",
"english-left3words-distsim.tagger.props"
// Spanish
"spanish.ancora.distsim.s512.crf.ser.gz",
"spanish-mwt.tsv",
"spanish-ud.tagger",
"kbp_regexner_number_sp.tag",
"kbp_regexner_mapping_sp.tag"
)
}