mirror of
https://github.com/TheAnachronism/docspell.git
synced 2025-08-05 02:24:52 +00:00
Apply scalafmt to all files
This commit is contained in:
@ -7,20 +7,21 @@ import fs2.Stream
|
||||
import scala.concurrent.ExecutionContext
|
||||
|
||||
object TestFiles {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
||||
|
||||
|
||||
val letterSourceDE: Stream[IO, Byte] =
|
||||
LenientUri.fromJava(getClass.getResource("/letter-de-source.pdf")).
|
||||
readURL[IO](16 * 1024, blocker)
|
||||
LenientUri
|
||||
.fromJava(getClass.getResource("/letter-de-source.pdf"))
|
||||
.readURL[IO](16 * 1024, blocker)
|
||||
|
||||
val letterSourceEN: Stream[IO, Byte] =
|
||||
LenientUri.fromJava(getClass.getResource("/letter-en-source.pdf")).
|
||||
readURL[IO](16 * 1024, blocker)
|
||||
LenientUri
|
||||
.fromJava(getClass.getResource("/letter-en-source.pdf"))
|
||||
.readURL[IO](16 * 1024, blocker)
|
||||
|
||||
|
||||
val letterDEText = """Max Mustermann
|
||||
val letterDEText =
|
||||
"""Max Mustermann
|
||||
|
|
||||
|Lilienweg 21
|
||||
|
|
||||
@ -52,7 +53,8 @@ object TestFiles {
|
||||
|Max Mustermann
|
||||
|""".stripMargin.trim
|
||||
|
||||
val letterENText = """Derek Jeter
|
||||
val letterENText =
|
||||
"""Derek Jeter
|
||||
|
|
||||
|123 Elm Ave.
|
||||
|
|
||||
|
@ -22,11 +22,9 @@ object ContactAnnotateSpec extends SimpleTestSuite {
|
||||
|
||||
val labels = Contact.annotate(text)
|
||||
assertEquals(labels.size, 2)
|
||||
assertEquals(labels(0),
|
||||
NerLabel("john.smith@example.com", NerTag.Email, 25, 47))
|
||||
assertEquals(labels(0), NerLabel("john.smith@example.com", NerTag.Email, 25, 47))
|
||||
assertEquals(text.substring(25, 47).toLowerCase, "john.smith@example.com")
|
||||
assertEquals(labels(1),
|
||||
NerLabel("example.com", NerTag.Website, 308, 319))
|
||||
assertEquals(labels(1), NerLabel("example.com", NerTag.Website, 308, 319))
|
||||
assertEquals(text.substring(308, 319).toLowerCase, "example.com")
|
||||
}
|
||||
}
|
||||
|
@ -8,45 +8,49 @@ object TextAnalyserSuite extends SimpleTestSuite {
|
||||
|
||||
test("find english ner labels") {
|
||||
val labels = StanfordNerClassifier.nerAnnotate(Language.English)(TestFiles.letterENText)
|
||||
val expect = Vector(NerLabel("Derek",NerTag.Person,0,5)
|
||||
, NerLabel("Jeter",NerTag.Person,6,11)
|
||||
, NerLabel("Treesville",NerTag.Person,27,37)
|
||||
, NerLabel("Derek",NerTag.Person,69,74)
|
||||
, NerLabel("Jeter",NerTag.Person,75,80)
|
||||
, NerLabel("Treesville",NerTag.Location,96,106)
|
||||
, NerLabel("M.",NerTag.Person,142,144)
|
||||
, NerLabel("Leat",NerTag.Person,145,149)
|
||||
, NerLabel("Syrup",NerTag.Organization,160,165)
|
||||
, NerLabel("Production",NerTag.Organization,166,176)
|
||||
, NerLabel("Old",NerTag.Organization,177,180)
|
||||
, NerLabel("Sticky",NerTag.Organization,181,187)
|
||||
, NerLabel("Pancake",NerTag.Organization,188,195)
|
||||
, NerLabel("Company",NerTag.Organization,196,203)
|
||||
, NerLabel("Maple",NerTag.Location,208,213)
|
||||
, NerLabel("Lane",NerTag.Location,214,218)
|
||||
, NerLabel("Forest",NerTag.Location,220,226)
|
||||
, NerLabel("Hemptown",NerTag.Location,241,249)
|
||||
, NerLabel("Little",NerTag.Organization,349,355)
|
||||
, NerLabel("League",NerTag.Organization,356,362)
|
||||
, NerLabel("Derek",NerTag.Person,1119,1124)
|
||||
, NerLabel("Jeter",NerTag.Person,1125,1130))
|
||||
val expect = Vector(
|
||||
NerLabel("Derek", NerTag.Person, 0, 5),
|
||||
NerLabel("Jeter", NerTag.Person, 6, 11),
|
||||
NerLabel("Treesville", NerTag.Person, 27, 37),
|
||||
NerLabel("Derek", NerTag.Person, 69, 74),
|
||||
NerLabel("Jeter", NerTag.Person, 75, 80),
|
||||
NerLabel("Treesville", NerTag.Location, 96, 106),
|
||||
NerLabel("M.", NerTag.Person, 142, 144),
|
||||
NerLabel("Leat", NerTag.Person, 145, 149),
|
||||
NerLabel("Syrup", NerTag.Organization, 160, 165),
|
||||
NerLabel("Production", NerTag.Organization, 166, 176),
|
||||
NerLabel("Old", NerTag.Organization, 177, 180),
|
||||
NerLabel("Sticky", NerTag.Organization, 181, 187),
|
||||
NerLabel("Pancake", NerTag.Organization, 188, 195),
|
||||
NerLabel("Company", NerTag.Organization, 196, 203),
|
||||
NerLabel("Maple", NerTag.Location, 208, 213),
|
||||
NerLabel("Lane", NerTag.Location, 214, 218),
|
||||
NerLabel("Forest", NerTag.Location, 220, 226),
|
||||
NerLabel("Hemptown", NerTag.Location, 241, 249),
|
||||
NerLabel("Little", NerTag.Organization, 349, 355),
|
||||
NerLabel("League", NerTag.Organization, 356, 362),
|
||||
NerLabel("Derek", NerTag.Person, 1119, 1124),
|
||||
NerLabel("Jeter", NerTag.Person, 1125, 1130)
|
||||
)
|
||||
assertEquals(labels, expect)
|
||||
}
|
||||
|
||||
test("find german ner labels") {
|
||||
val labels = StanfordNerClassifier.nerAnnotate(Language.German)(TestFiles.letterDEText)
|
||||
val expect = Vector(NerLabel("Max", NerTag.Person, 0, 3)
|
||||
, NerLabel("Mustermann", NerTag.Person, 4, 14)
|
||||
, NerLabel("Lilienweg", NerTag.Location, 16, 25)
|
||||
, NerLabel("Max", NerTag.Person, 77, 80)
|
||||
, NerLabel("Mustermann", NerTag.Person, 81, 91)
|
||||
, NerLabel("Lilienweg", NerTag.Location, 93, 102)
|
||||
, NerLabel("EasyCare", NerTag.Organization, 124, 132)
|
||||
, NerLabel("AG", NerTag.Organization, 133, 135)
|
||||
, NerLabel("Ackerweg", NerTag.Location, 158, 166)
|
||||
, NerLabel("Nebendorf", NerTag.Location, 184, 193)
|
||||
, NerLabel("Max", NerTag.Person, 505, 508)
|
||||
, NerLabel("Mustermann", NerTag.Person, 509, 519))
|
||||
val expect = Vector(
|
||||
NerLabel("Max", NerTag.Person, 0, 3),
|
||||
NerLabel("Mustermann", NerTag.Person, 4, 14),
|
||||
NerLabel("Lilienweg", NerTag.Location, 16, 25),
|
||||
NerLabel("Max", NerTag.Person, 77, 80),
|
||||
NerLabel("Mustermann", NerTag.Person, 81, 91),
|
||||
NerLabel("Lilienweg", NerTag.Location, 93, 102),
|
||||
NerLabel("EasyCare", NerTag.Organization, 124, 132),
|
||||
NerLabel("AG", NerTag.Organization, 133, 135),
|
||||
NerLabel("Ackerweg", NerTag.Location, 158, 166),
|
||||
NerLabel("Nebendorf", NerTag.Location, 184, 193),
|
||||
NerLabel("Max", NerTag.Person, 505, 508),
|
||||
NerLabel("Mustermann", NerTag.Person, 509, 519)
|
||||
)
|
||||
assertEquals(labels, expect)
|
||||
}
|
||||
}
|
||||
|
@ -9,16 +9,22 @@ object TextExtractionSuite extends SimpleTestSuite {
|
||||
|
||||
test("extract english pdf") {
|
||||
ignore()
|
||||
val text = TextExtract.extract[IO](letterSourceEN, blocker, "eng", Config.default).
|
||||
compile.lastOrError.unsafeRunSync()
|
||||
val text = TextExtract
|
||||
.extract[IO](letterSourceEN, blocker, "eng", Config.default)
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
println(text)
|
||||
}
|
||||
|
||||
test("extract german pdf") {
|
||||
ignore()
|
||||
val expect = TestFiles.letterDEText
|
||||
val extract = TextExtract.extract[IO](letterSourceDE, blocker, "deu", Config.default).
|
||||
compile.lastOrError.unsafeRunSync()
|
||||
val extract = TextExtract
|
||||
.extract[IO](letterSourceDE, blocker, "deu", Config.default)
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
|
||||
assertEquals(extract.trim, expect.trim)
|
||||
}
|
||||
|
@ -15,7 +15,6 @@ object TestSplitterSpec extends SimpleTestSuite {
|
||||
|
||||
val words = TextSplitter.splitToken(text, " \t\r\n".toSet).toVector
|
||||
|
||||
|
||||
assertEquals(words.size, 31)
|
||||
assertEquals(words(13), Word("bitte", 109, 114))
|
||||
assertEquals(text.substring(109, 114).toLowerCase, "bitte")
|
||||
|
Reference in New Issue
Block a user