Merge pull request #38 from eikek/feature/file-types
Feature/file types
@ -1,6 +1,6 @@
|
||||
version = "2.4.2"
|
||||
|
||||
align = most
|
||||
align = more
|
||||
#align.arrowEnumeratorGenerator = true
|
||||
|
||||
maxColumn = 100
|
||||
|
80
build.sbt
@ -143,6 +143,42 @@ val common = project.in(file("modules/common")).
|
||||
Dependencies.pureconfig.map(_ % "optional")
|
||||
)
|
||||
|
||||
// Some example files for testing
|
||||
// https://file-examples.com/index.php/sample-documents-download/sample-doc-download/
|
||||
val files = project.in(file("modules/files")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(
|
||||
name := "docspell-files",
|
||||
libraryDependencies ++=
|
||||
Dependencies.tika,
|
||||
Test / sourceGenerators += Def.task {
|
||||
val base = (Test/resourceDirectory).value
|
||||
val files = (base ** (_.isFile)) pair sbt.io.Path.relativeTo(base)
|
||||
val lines = files.toList.map(_._2).map(s => {
|
||||
val ident = s.replaceAll("[^a-zA-Z0-9_]+", "_")
|
||||
ident -> s"""val $ident = createUrl("${s}")"""
|
||||
})
|
||||
val content = s"""package docspell.files
|
||||
|
||||
object ExampleFiles extends ExampleFilesSupport {
|
||||
|
||||
${lines.map(_._2).mkString("\n")}
|
||||
|
||||
val all = List(
|
||||
${lines.map(_._1).mkString(",\n")}
|
||||
)
|
||||
|
||||
}
|
||||
"""
|
||||
val target = (Test/sourceManaged).value/"scala"/"ExampleFiles.scala"
|
||||
IO.createDirectory(target.getParentFile)
|
||||
IO.write(target, content)
|
||||
Seq(target)
|
||||
}.taskValue
|
||||
).dependsOn(common)
|
||||
|
||||
val store = project.in(file("modules/store")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
@ -160,19 +196,44 @@ val store = project.in(file("modules/store")).
|
||||
Dependencies.emil
|
||||
).dependsOn(common)
|
||||
|
||||
val text = project.in(file("modules/text")).
|
||||
val extract = project.in(file("modules/extract")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(
|
||||
name := "docspell-extract",
|
||||
libraryDependencies ++=
|
||||
Dependencies.fs2 ++
|
||||
Dependencies.twelvemonkeys ++
|
||||
Dependencies.pdfbox ++
|
||||
Dependencies.poi ++
|
||||
Dependencies.commonsIO ++
|
||||
Dependencies.julOverSlf4j
|
||||
).dependsOn(common, files % "compile->compile;test->test")
|
||||
|
||||
val convert = project.in(file("modules/convert")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(
|
||||
name := "docspell-convert",
|
||||
libraryDependencies ++=
|
||||
Dependencies.flexmark ++
|
||||
Dependencies.twelvemonkeys
|
||||
).dependsOn(common, files % "compile->compile;test->test")
|
||||
|
||||
val analysis = project.in(file("modules/analysis")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
enablePlugins(NerModelsPlugin).
|
||||
settings(sharedSettings).
|
||||
settings(testSettings).
|
||||
settings(NerModelsPlugin.nerClassifierSettings).
|
||||
settings(
|
||||
name := "docspell-text",
|
||||
name := "docspell-analysis",
|
||||
libraryDependencies ++=
|
||||
Dependencies.fs2 ++
|
||||
Dependencies.tika ++
|
||||
Dependencies.stanfordNlpCore
|
||||
).dependsOn(common)
|
||||
).dependsOn(common, files % "test->test")
|
||||
|
||||
val restapi = project.in(file("modules/restapi")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
@ -226,7 +287,7 @@ val joex = project.in(file("modules/joex")).
|
||||
addCompilerPlugin(Dependencies.betterMonadicFor),
|
||||
buildInfoPackage := "docspell.joex",
|
||||
reStart/javaOptions ++= Seq(s"-Dconfig.file=${(LocalRootProject/baseDirectory).value/"local"/"dev.conf"}")
|
||||
).dependsOn(store, text, joexapi, restapi)
|
||||
).dependsOn(store, extract, convert, analysis, joexapi, restapi)
|
||||
|
||||
val backend = project.in(file("modules/backend")).
|
||||
disablePlugins(RevolverPlugin).
|
||||
@ -303,11 +364,11 @@ val microsite = project.in(file("modules/microsite")).
|
||||
skip in publish := true,
|
||||
micrositeFooterText := Some(
|
||||
"""
|
||||
|<p>© 2019 <a href="https://github.com/eikek/docspell">Docspell, v{{site.version}}</a></p>
|
||||
|<p>© 2020 <a href="https://github.com/eikek/docspell">Docspell, v{{site.version}}</a></p>
|
||||
|""".stripMargin
|
||||
),
|
||||
micrositeName := "Docspell",
|
||||
micrositeDescription := "A (PDF) Document Organizer",
|
||||
micrositeDescription := "Auto-tagging Document Organizer",
|
||||
micrositeDocumentationUrl := "/docspell/getit.html",
|
||||
micrositeDocumentationLabelDescription := "Quickstart",
|
||||
micrositeFavicons := Seq(microsites.MicrositeFavicon("favicon.png", "96x96")),
|
||||
@ -356,7 +417,10 @@ val root = project.in(file(".")).
|
||||
name := "docspell-root"
|
||||
).
|
||||
aggregate(common
|
||||
, text
|
||||
, extract
|
||||
, convert
|
||||
, analysis
|
||||
, files
|
||||
, store
|
||||
, joexapi
|
||||
, joex
|
||||
|
@ -1,9 +1,10 @@
|
||||
package docspell.text.contact
|
||||
package docspell.analysis.contact
|
||||
|
||||
import fs2.Stream
|
||||
import cats.implicits._
|
||||
import docspell.common.{Ident, LenientUri, NerLabel, NerTag}
|
||||
import docspell.text.split.TextSplitter
|
||||
|
||||
import docspell.common._
|
||||
import docspell.analysis.split._
|
||||
|
||||
object Contact {
|
||||
private[this] val protocols = Set("ftp", "http", "https")
|
@ -1,4 +1,4 @@
|
||||
package docspell.text.contact
|
||||
package docspell.analysis.contact
|
||||
|
||||
import cats.data.NonEmptyList
|
||||
import docspell.common.LenientUri
|
@ -1,6 +1,6 @@
|
||||
package docspell.text.contact
|
||||
package docspell.analysis.contact
|
||||
|
||||
private[text] object Tld {
|
||||
private[analysis] object Tld {
|
||||
|
||||
def findTld(str: String): Option[String] =
|
||||
known.find(str.endsWith)
|
@ -1,10 +1,10 @@
|
||||
package docspell.text.date
|
||||
package docspell.analysis.date
|
||||
|
||||
import fs2._
|
||||
import java.time.LocalDate
|
||||
|
||||
import docspell.common.{Language, NerDateLabel, NerLabel, NerTag}
|
||||
import docspell.text.split.{TextSplitter, Word}
|
||||
import fs2.{Pure, Stream}
|
||||
import docspell.common._
|
||||
import docspell.analysis.split._
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
@ -21,7 +21,7 @@ object DateFind {
|
||||
.map(sd =>
|
||||
NerDateLabel(
|
||||
sd.toLocalDate,
|
||||
NerLabel(text.substring(q(0).begin, q(2).end), NerTag.Date, q(0).begin, q(1).end)
|
||||
NerLabel(text.substring(q.head.begin, q(2).end), NerTag.Date, q.head.begin, q(1).end)
|
||||
)
|
||||
)
|
||||
)
|
@ -1,17 +1,17 @@
|
||||
package docspell.text.nlp
|
||||
package docspell.analysis.nlp
|
||||
|
||||
import java.net.URL
|
||||
import java.util.zip.GZIPInputStream
|
||||
|
||||
import docspell.common.{Language, NerLabel, NerTag}
|
||||
import edu.stanford.nlp.ie.AbstractSequenceClassifier
|
||||
import edu.stanford.nlp.ie.crf.CRFClassifier
|
||||
import edu.stanford.nlp.ling.{CoreAnnotations, CoreLabel}
|
||||
import org.log4s.getLogger
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import org.log4s._
|
||||
import docspell.common._
|
||||
|
||||
import java.net.URL
|
||||
import scala.util.Using
|
||||
import scala.jdk.CollectionConverters._
|
||||
|
||||
object StanfordNerClassifier {
|
||||
private[this] val logger = getLogger
|
@ -1,4 +1,4 @@
|
||||
package docspell.text.split
|
||||
package docspell.analysis.split
|
||||
|
||||
import fs2.Stream
|
||||
|
@ -1,4 +1,4 @@
|
||||
package docspell.text.split
|
||||
package docspell.analysis.split
|
||||
|
||||
case class Word(value: String, begin: Int, end: Int) {
|
||||
def isEmpty: Boolean = value.isEmpty
|
@ -1,4 +1,4 @@
|
||||
package docspell.text.contact
|
||||
package docspell.analysis.contact
|
||||
|
||||
import docspell.common.{NerLabel, NerTag}
|
||||
import minitest.SimpleTestSuite
|
@ -1,8 +1,8 @@
|
||||
package docspell.text.date
|
||||
package docspell.analysis.date
|
||||
|
||||
import docspell.files.TestFiles
|
||||
import minitest.SimpleTestSuite
|
||||
import docspell.common.Language
|
||||
import docspell.text.TestFiles
|
||||
import minitest._
|
||||
|
||||
object DateFindSpec extends SimpleTestSuite {
|
||||
|
@ -1,8 +1,8 @@
|
||||
package docspell.text.nlp
|
||||
package docspell.analysis.nlp
|
||||
|
||||
import docspell.common.{Language, NerLabel, NerTag}
|
||||
import docspell.text.TestFiles
|
||||
import minitest.SimpleTestSuite
|
||||
import docspell.files.TestFiles
|
||||
import docspell.common._
|
||||
|
||||
object TextAnalyserSuite extends SimpleTestSuite {
|
||||
|
||||
@ -12,25 +12,23 @@ object TextAnalyserSuite extends SimpleTestSuite {
|
||||
NerLabel("Derek", NerTag.Person, 0, 5),
|
||||
NerLabel("Jeter", NerTag.Person, 6, 11),
|
||||
NerLabel("Treesville", NerTag.Person, 27, 37),
|
||||
NerLabel("Derek", NerTag.Person, 69, 74),
|
||||
NerLabel("Jeter", NerTag.Person, 75, 80),
|
||||
NerLabel("Treesville", NerTag.Location, 96, 106),
|
||||
NerLabel("M.", NerTag.Person, 142, 144),
|
||||
NerLabel("Leat", NerTag.Person, 145, 149),
|
||||
NerLabel("Syrup", NerTag.Organization, 160, 165),
|
||||
NerLabel("Production", NerTag.Organization, 166, 176),
|
||||
NerLabel("Old", NerTag.Organization, 177, 180),
|
||||
NerLabel("Sticky", NerTag.Organization, 181, 187),
|
||||
NerLabel("Pancake", NerTag.Organization, 188, 195),
|
||||
NerLabel("Company", NerTag.Organization, 196, 203),
|
||||
NerLabel("Maple", NerTag.Location, 208, 213),
|
||||
NerLabel("Lane", NerTag.Location, 214, 218),
|
||||
NerLabel("Forest", NerTag.Location, 220, 226),
|
||||
NerLabel("Hemptown", NerTag.Location, 241, 249),
|
||||
NerLabel("Little", NerTag.Organization, 349, 355),
|
||||
NerLabel("League", NerTag.Organization, 356, 362),
|
||||
NerLabel("Derek", NerTag.Person, 1119, 1124),
|
||||
NerLabel("Jeter", NerTag.Person, 1125, 1130)
|
||||
NerLabel("Derek", NerTag.Person, 68, 73),
|
||||
NerLabel("Jeter", NerTag.Person, 74, 79),
|
||||
NerLabel("Treesville", NerTag.Location, 95, 105),
|
||||
NerLabel("Syrup", NerTag.Organization, 159, 164),
|
||||
NerLabel("Production", NerTag.Organization, 165, 175),
|
||||
NerLabel("Old", NerTag.Organization, 176, 179),
|
||||
NerLabel("Sticky", NerTag.Organization, 180, 186),
|
||||
NerLabel("Pancake", NerTag.Organization, 187, 194),
|
||||
NerLabel("Company", NerTag.Organization, 195, 202),
|
||||
NerLabel("Maple", NerTag.Location, 207, 212),
|
||||
NerLabel("Lane", NerTag.Location, 213, 217),
|
||||
NerLabel("Forest", NerTag.Location, 219, 225),
|
||||
NerLabel("Hemptown", NerTag.Location, 239, 247),
|
||||
NerLabel("Little", NerTag.Organization, 347, 353),
|
||||
NerLabel("League", NerTag.Organization, 354, 360),
|
||||
NerLabel("Derek", NerTag.Person, 1117, 1122),
|
||||
NerLabel("Jeter", NerTag.Person, 1123, 1128)
|
||||
)
|
||||
assertEquals(labels, expect)
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
package docspell.text.split
|
||||
package docspell.analysis.split
|
||||
|
||||
import minitest._
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object TestSplitterSpec extends SimpleTestSuite {
|
||||
|
@ -8,11 +8,10 @@ import doobie._
|
||||
import doobie.implicits._
|
||||
import docspell.store.{AddResult, Store}
|
||||
import docspell.store.queries.{QAttachment, QItem}
|
||||
import OItem.{AttachmentData, ItemData, ListItem, Query}
|
||||
import OItem.{AttachmentData, AttachmentSourceData, ItemData, ListItem, Query}
|
||||
import bitpeace.{FileMeta, RangeDef}
|
||||
import docspell.common.{Direction, Ident, ItemState, MetaProposalList, Timestamp}
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RItem, RTagItem}
|
||||
import docspell.store.records.RSource
|
||||
import docspell.store.records.{RAttachment, RAttachmentMeta, RAttachmentSource, RItem, RSource, RTagItem}
|
||||
|
||||
trait OItem[F[_]] {
|
||||
|
||||
@ -22,6 +21,8 @@ trait OItem[F[_]] {
|
||||
|
||||
def findAttachment(id: Ident, collective: Ident): F[Option[AttachmentData[F]]]
|
||||
|
||||
def findAttachmentSource(id: Ident, collective: Ident): F[Option[AttachmentSourceData[F]]]
|
||||
|
||||
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[AddResult]
|
||||
|
||||
def setDirection(item: Ident, direction: Direction, collective: Ident): F[AddResult]
|
||||
@ -67,7 +68,23 @@ object OItem {
|
||||
type ItemData = QItem.ItemData
|
||||
val ItemData = QItem.ItemData
|
||||
|
||||
trait BinaryData[F[_]] {
|
||||
def data: Stream[F, Byte]
|
||||
def name: Option[String]
|
||||
def meta: FileMeta
|
||||
def fileId: Ident
|
||||
}
|
||||
case class AttachmentData[F[_]](ra: RAttachment, meta: FileMeta, data: Stream[F, Byte])
|
||||
extends BinaryData[F] {
|
||||
val name = ra.name
|
||||
val fileId = ra.fileId
|
||||
}
|
||||
|
||||
case class AttachmentSourceData[F[_]](rs: RAttachmentSource, meta: FileMeta, data: Stream[F, Byte])
|
||||
extends BinaryData[F] {
|
||||
val name = rs.name
|
||||
val fileId = rs.fileId
|
||||
}
|
||||
|
||||
def apply[F[_]: Effect](store: Store[F]): Resource[F, OItem[F]] =
|
||||
Resource.pure[F, OItem[F]](new OItem[F] {
|
||||
@ -83,24 +100,41 @@ object OItem {
|
||||
.transact(RAttachment.findByIdAndCollective(id, collective))
|
||||
.flatMap({
|
||||
case Some(ra) =>
|
||||
store.bitpeace
|
||||
.get(ra.fileId.id)
|
||||
.unNoneTerminate
|
||||
.compile
|
||||
.last
|
||||
.map(
|
||||
_.map(m =>
|
||||
AttachmentData[F](
|
||||
ra,
|
||||
m,
|
||||
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
|
||||
)
|
||||
)
|
||||
makeBinaryData(ra.fileId) { m =>
|
||||
AttachmentData[F](
|
||||
ra,
|
||||
m,
|
||||
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
|
||||
)
|
||||
}
|
||||
|
||||
case None =>
|
||||
(None: Option[AttachmentData[F]]).pure[F]
|
||||
})
|
||||
|
||||
def findAttachmentSource(id: Ident, collective: Ident): F[Option[AttachmentSourceData[F]]] =
|
||||
store
|
||||
.transact(RAttachmentSource.findByIdAndCollective(id, collective))
|
||||
.flatMap({
|
||||
case Some(ra) =>
|
||||
makeBinaryData(ra.fileId) { m =>
|
||||
AttachmentSourceData[F](
|
||||
ra,
|
||||
m,
|
||||
store.bitpeace.fetchData2(RangeDef.all)(Stream.emit(m))
|
||||
)
|
||||
}
|
||||
|
||||
case None =>
|
||||
(None: Option[AttachmentSourceData[F]]).pure[F]
|
||||
})
|
||||
|
||||
private def makeBinaryData[A](fileId: Ident)(f: FileMeta => A): F[Option[A]] =
|
||||
store.bitpeace
|
||||
.get(fileId.id).unNoneTerminate.compile.last.map(
|
||||
_.map(m => f(m))
|
||||
)
|
||||
|
||||
def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[AddResult] = {
|
||||
val db = for {
|
||||
cid <- RItem.getCollective(item)
|
||||
|
19
modules/common/src/main/scala/docspell/common/DataType.scala
Normal file
@ -0,0 +1,19 @@
|
||||
package docspell.common
|
||||
|
||||
sealed trait DataType {
|
||||
|
||||
}
|
||||
|
||||
object DataType {
|
||||
|
||||
case class Exact(mime: MimeType) extends DataType
|
||||
|
||||
case class Hint(hint: MimeTypeHint) extends DataType
|
||||
|
||||
|
||||
def apply(mt: MimeType): DataType =
|
||||
Exact(mt)
|
||||
|
||||
def filename(name: String): DataType =
|
||||
Hint(MimeTypeHint.filename(name))
|
||||
}
|
@ -1,14 +1,14 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.common
|
||||
|
||||
import cats.implicits._
|
||||
import scala.jdk.CollectionConverters._
|
||||
import java.io.IOException
|
||||
import java.nio.file.attribute.BasicFileAttributes
|
||||
import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor}
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
|
||||
import cats.effect.Sync
|
||||
import scala.jdk.CollectionConverters._
|
||||
import fs2.Stream
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
|
||||
object File {
|
||||
|
||||
@ -18,6 +18,9 @@ object File {
|
||||
def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
|
||||
mkDir(parent).map(p => Files.createTempDirectory(p, prefix))
|
||||
|
||||
def mkTempFile[F[_]: Sync](parent: Path, prefix: String, suffix: Option[String] = None): F[Path] =
|
||||
mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))
|
||||
|
||||
def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay {
|
||||
val count = new AtomicInteger(0)
|
||||
Files.walkFileTree(
|
||||
@ -40,6 +43,12 @@ object File {
|
||||
count.get
|
||||
}
|
||||
|
||||
def exists[F[_]: Sync](file: Path): F[Boolean] =
|
||||
Sync[F].delay(Files.exists(file))
|
||||
|
||||
def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
|
||||
Sync[F].delay(Files.exists(file) && Files.size(file) > minSize)
|
||||
|
||||
def deleteFile[F[_]: Sync](file: Path): F[Unit] =
|
||||
Sync[F].delay(Files.deleteIfExists(file)).map(_ => ())
|
||||
|
||||
@ -47,10 +56,8 @@ object File {
|
||||
if (Files.isDirectory(path)) deleteDirectory(path)
|
||||
else deleteFile(path).map(_ => 1)
|
||||
|
||||
def withTempDir[F[_]: Sync, A](parent: Path, prefix: String)(
|
||||
f: Path => Stream[F, A]
|
||||
): Stream[F, A] =
|
||||
Stream.bracket(mkTempDir(parent, prefix))(p => delete(p).map(_ => ())).flatMap(f)
|
||||
def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
|
||||
Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))
|
||||
|
||||
def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] = Sync[F].delay {
|
||||
val javaList =
|
||||
@ -58,4 +65,11 @@ object File {
|
||||
javaList.asScala.toList.sortBy(_.getFileName.toString)
|
||||
}
|
||||
|
||||
def readAll[F[_]: Sync: ContextShift](file: Path, blocker: Blocker, chunkSize: Int): Stream[F, Byte] =
|
||||
fs2.io.file.readAll(file, blocker, chunkSize)
|
||||
|
||||
def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] =
|
||||
readAll[F](file, blocker, 8192).
|
||||
through(fs2.text.utf8Decode).
|
||||
compile.foldMonoid
|
||||
}
|
@ -65,6 +65,11 @@ case class LenientUri(
|
||||
fs2.io.readInputStream(Sync[F].delay(url.openStream()), chunkSize, blocker, true)
|
||||
)
|
||||
|
||||
def readText[F[_]: Sync: ContextShift](chunkSize: Int, blocker: Blocker): F[String] =
|
||||
readURL[F](chunkSize, blocker).
|
||||
through(fs2.text.utf8Decode).
|
||||
compile.foldMonoid
|
||||
|
||||
def host: Option[String] =
|
||||
authority.map(a =>
|
||||
a.indexOf(':') match {
|
||||
|
41
modules/common/src/main/scala/docspell/common/Logger.scala
Normal file
@ -0,0 +1,41 @@
|
||||
package docspell.common
|
||||
|
||||
import cats.effect.Sync
|
||||
import docspell.common.syntax.all._
|
||||
import org.log4s.{Logger => Log4sLogger}
|
||||
|
||||
trait Logger[F[_]] {
|
||||
|
||||
def trace(msg: => String): F[Unit]
|
||||
def debug(msg: => String): F[Unit]
|
||||
def info(msg: => String): F[Unit]
|
||||
def warn(msg: => String): F[Unit]
|
||||
def error(ex: Throwable)(msg: => String): F[Unit]
|
||||
def error(msg: => String): F[Unit]
|
||||
|
||||
}
|
||||
|
||||
object Logger {
|
||||
|
||||
|
||||
def log4s[F[_]: Sync](log: Log4sLogger): Logger[F] = new Logger[F] {
|
||||
def trace(msg: => String): F[Unit] =
|
||||
log.ftrace(msg)
|
||||
|
||||
def debug(msg: => String): F[Unit] =
|
||||
log.fdebug(msg)
|
||||
|
||||
def info(msg: => String): F[Unit] =
|
||||
log.finfo(msg)
|
||||
|
||||
def warn(msg: => String): F[Unit] =
|
||||
log.fwarn(msg)
|
||||
|
||||
def error(ex: Throwable)(msg: => String): F[Unit] =
|
||||
log.ferror(ex)(msg)
|
||||
|
||||
def error(msg: => String): F[Unit] =
|
||||
log.ferror(msg)
|
||||
}
|
||||
|
||||
}
|
@ -27,7 +27,7 @@ object MimeType {
|
||||
MimeType("image", partFromString(sub).throwLeft)
|
||||
|
||||
private[this] val validChars: Set[Char] =
|
||||
(('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "*-").toSet
|
||||
(('A' to 'Z') ++ ('a' to 'z') ++ ('0' to '9') ++ "*-.+").toSet
|
||||
|
||||
def parse(str: String): Either[String, MimeType] =
|
||||
str.indexOf('/') match {
|
||||
@ -44,10 +44,11 @@ object MimeType {
|
||||
|
||||
private def partFromString(s: String): Either[String, String] =
|
||||
if (s.forall(validChars.contains)) Right(s)
|
||||
else Left(s"Invalid identifier: $s. Allowed chars: ${validChars.mkString}")
|
||||
else Left(s"Invalid identifier: $s. Allowed chars: ${validChars.toList.sorted.mkString}")
|
||||
|
||||
val octetStream = application("octet-stream")
|
||||
val pdf = application("pdf")
|
||||
val zip = application("zip")
|
||||
val png = image("png")
|
||||
val jpeg = image("jpeg")
|
||||
val tiff = image("tiff")
|
||||
|
@ -0,0 +1,20 @@
|
||||
package docspell.common
|
||||
|
||||
case class MimeTypeHint(filename: Option[String], advertised: Option[String]) {
|
||||
|
||||
def withName(name: String): MimeTypeHint =
|
||||
copy(filename = Some(name))
|
||||
}
|
||||
|
||||
object MimeTypeHint {
|
||||
val none = MimeTypeHint(None, None)
|
||||
|
||||
def filename(name: String): MimeTypeHint =
|
||||
MimeTypeHint(Some(name), None)
|
||||
|
||||
def advertised(mimeType: MimeType): MimeTypeHint =
|
||||
advertised(mimeType.asString)
|
||||
|
||||
def advertised(mimeType: String): MimeTypeHint =
|
||||
MimeTypeHint(None, Some(mimeType))
|
||||
}
|
@ -1,35 +1,54 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.common
|
||||
|
||||
import java.io.InputStream
|
||||
import java.lang.ProcessBuilder.Redirect
|
||||
import java.nio.file.Path
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.{Blocker, ContextShift, Sync}
|
||||
import fs2.{Stream, io, text}
|
||||
import org.log4s.getLogger
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import docspell.common.syntax.all._
|
||||
|
||||
object SystemCommand {
|
||||
|
||||
private[this] val logger = getLogger
|
||||
final case class Config(program: String, args: Seq[String], timeout: Duration) {
|
||||
|
||||
def mapArgs(f: String => String): Config =
|
||||
Config(program, args.map(f), timeout)
|
||||
|
||||
def replace(repl: Map[String, String]): Config =
|
||||
mapArgs(s =>
|
||||
repl.foldLeft(s) {
|
||||
case (res, (k, v)) =>
|
||||
res.replace(k, v)
|
||||
})
|
||||
|
||||
def toCmd: List[String] =
|
||||
program :: args.toList
|
||||
|
||||
lazy val cmdString: String =
|
||||
toCmd.mkString(" ")
|
||||
}
|
||||
|
||||
final case class Result(rc: Int, stdout: String, stderr: String)
|
||||
|
||||
def exec[F[_]: Sync: ContextShift](
|
||||
cmd: Config.Command,
|
||||
cmd: Config,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
wd: Option[Path] = None,
|
||||
stdin: Stream[F, Byte] = Stream.empty
|
||||
): Stream[F, Result] =
|
||||
startProcess(cmd, wd) { proc =>
|
||||
startProcess(cmd, wd, logger, stdin) { proc =>
|
||||
Stream.eval {
|
||||
for {
|
||||
_ <- writeToProcess(stdin, proc, blocker)
|
||||
term <- Sync[F].delay(proc.waitFor(cmd.timeout.seconds, TimeUnit.SECONDS))
|
||||
_ <- if (term) logger.fdebug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
||||
_ <- if (term) logger.debug(s"Command `${cmd.cmdString}` finished: ${proc.exitValue}")
|
||||
else
|
||||
logger.fwarn(
|
||||
logger.warn(
|
||||
s"Command `${cmd.cmdString}` did not finish in ${cmd.timeout.formatExact}!"
|
||||
)
|
||||
_ <- if (!term) timeoutError(proc, cmd) else Sync[F].pure(())
|
||||
@ -40,12 +59,13 @@ object SystemCommand {
|
||||
}
|
||||
|
||||
def execSuccess[F[_]: Sync: ContextShift](
|
||||
cmd: Config.Command,
|
||||
cmd: Config,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
wd: Option[Path] = None,
|
||||
stdin: Stream[F, Byte] = Stream.empty
|
||||
): Stream[F, Result] =
|
||||
exec(cmd, blocker, wd, stdin).flatMap { r =>
|
||||
exec(cmd, blocker, logger, wd, stdin).flatMap { r =>
|
||||
if (r.rc != 0)
|
||||
Stream.raiseError[F](
|
||||
new Exception(
|
||||
@ -55,18 +75,23 @@ object SystemCommand {
|
||||
else Stream.emit(r)
|
||||
}
|
||||
|
||||
private def startProcess[F[_]: Sync, A](cmd: Config.Command, wd: Option[Path])(
|
||||
private def startProcess[F[_]: Sync, A](cmd: Config, wd: Option[Path], logger: Logger[F], stdin: Stream[F, Byte])(
|
||||
f: Process => Stream[F, A]
|
||||
): Stream[F, A] = {
|
||||
val log = logger.fdebug(s"Running external command: ${cmd.cmdString}")
|
||||
val proc = log *> Sync[F].delay {
|
||||
val log = logger.debug(s"Running external command: ${cmd.cmdString}")
|
||||
val hasStdin = stdin.take(1).compile.last.map(_.isDefined)
|
||||
val proc = log *> hasStdin.flatMap(flag => Sync[F].delay {
|
||||
val pb = new ProcessBuilder(cmd.toCmd.asJava)
|
||||
.redirectInput(if (flag) Redirect.PIPE else Redirect.INHERIT)
|
||||
.redirectError(Redirect.PIPE)
|
||||
.redirectOutput(Redirect.PIPE)
|
||||
|
||||
wd.map(_.toFile).foreach(pb.directory)
|
||||
pb.start()
|
||||
}
|
||||
})
|
||||
Stream
|
||||
.bracket(proc)(p =>
|
||||
logger.fdebug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||
logger.debug(s"Closing process: `${cmd.cmdString}`").map { _ =>
|
||||
p.destroy()
|
||||
}
|
||||
)
|
||||
@ -93,7 +118,7 @@ object SystemCommand {
|
||||
): F[Unit] =
|
||||
data.through(io.writeOutputStream(Sync[F].delay(proc.getOutputStream), blocker)).compile.drain
|
||||
|
||||
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config.Command): F[Unit] =
|
||||
private def timeoutError[F[_]: Sync](proc: Process, cmd: Config): F[Unit] =
|
||||
Sync[F].delay(proc.destroyForcibly()).attempt *> {
|
||||
Sync[F].raiseError(
|
||||
new Exception(s"Command `${cmd.cmdString}` timed out (${cmd.timeout.formatExact})")
|
121
modules/convert/src/main/scala/docspell/convert/Conversion.scala
Normal file
@ -0,0 +1,121 @@
|
||||
package docspell.convert
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
import fs2._
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
import docspell.convert.extern.{Tesseract, Unoconv, WkHtmlPdf}
|
||||
import docspell.convert.flexmark.Markdown
|
||||
import docspell.files.{ImageSize, TikaMimetype}
|
||||
|
||||
trait Conversion[F[_]] {
|
||||
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A]
|
||||
|
||||
}
|
||||
|
||||
object Conversion {
|
||||
|
||||
def create[F[_]: Sync: ContextShift](
|
||||
cfg: ConvertConfig,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Resource[F, Conversion[F]] =
|
||||
Resource.pure(new Conversion[F] {
|
||||
|
||||
def toPDF[A](dataType: DataType, lang: Language, handler: Handler[F, A])(in: Stream[F, Byte]): F[A] =
|
||||
TikaMimetype.resolve(dataType, in).flatMap {
|
||||
case MimeType.pdf =>
|
||||
handler.run(ConversionResult.successPdf(in))
|
||||
|
||||
case MimeType.html =>
|
||||
WkHtmlPdf.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, blocker, logger)(in, handler)
|
||||
|
||||
case Texts(_) =>
|
||||
Markdown.toHtml(in, cfg.markdown).flatMap { html =>
|
||||
val bytes = Stream
|
||||
.chunk(Chunk.bytes(html.getBytes(StandardCharsets.UTF_8)))
|
||||
.covary[F]
|
||||
WkHtmlPdf.toPDF(cfg.wkhtmlpdf, cfg.chunkSize, blocker, logger)(bytes, handler)
|
||||
}
|
||||
|
||||
case Images(mt) =>
|
||||
ImageSize.get(in).flatMap {
|
||||
case Some(dim) =>
|
||||
if (dim.product > cfg.maxImageSize) {
|
||||
logger
|
||||
.info(s"Image size (${dim.product}) is too large (max ${cfg.maxImageSize}).") *>
|
||||
handler.run(
|
||||
ConversionResult.inputMalformed(
|
||||
mt,
|
||||
s"Image size (${dim.width}x${dim.height}) is too large (max ${cfg.maxImageSize})."
|
||||
)
|
||||
)
|
||||
} else {
|
||||
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(in, handler)
|
||||
}
|
||||
|
||||
case None =>
|
||||
logger.info(
|
||||
s"Cannot read image when determining size for ${mt.asString}. Converting anyways."
|
||||
) *>
|
||||
Tesseract.toPDF(cfg.tesseract, lang, cfg.chunkSize, blocker, logger)(in, handler)
|
||||
}
|
||||
|
||||
case Office(_) =>
|
||||
Unoconv.toPDF(cfg.unoconv, cfg.chunkSize, blocker, logger)(in, handler)
|
||||
|
||||
case mt =>
|
||||
handler.run(ConversionResult.unsupportedFormat(mt))
|
||||
}
|
||||
})
|
||||
|
||||
object Images {
|
||||
|
||||
val all = Set(MimeType.jpeg, MimeType.png, MimeType.tiff)
|
||||
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).filter(all.contains)
|
||||
}
|
||||
|
||||
object Texts {
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).filter(_.primary == "text")
|
||||
}
|
||||
|
||||
object Office {
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
|
||||
val odtAlias = MimeType.application("x-vnd.oasis.opendocument.text")
|
||||
val odsAlias = MimeType.application("x-vnd.oasis.opendocument.spreadsheet")
|
||||
val msoffice = MimeType.application("x-tika-msoffice")
|
||||
val ooxml = MimeType.application("x-tika-ooxml")
|
||||
val docx = MimeType.application("vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
val xlsx = MimeType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
val xls = MimeType.application("vnd.ms-excel")
|
||||
val doc = MimeType.application("msword")
|
||||
val rtf = MimeType.application("rtf")
|
||||
|
||||
// without a filename, tika returns application/zip for odt/ods files, since
|
||||
// they are just zip files
|
||||
val odfContainer = MimeType.zip
|
||||
|
||||
val all =
|
||||
Set(odt, ods, odtAlias, odsAlias, msoffice, ooxml, docx, xlsx, xls, doc, rtf, odfContainer)
|
||||
|
||||
def unapply(m: MimeType): Option[MimeType] =
|
||||
Some(m).filter(all.contains)
|
||||
}
|
||||
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
mt match {
|
||||
case Office(_) => Some(mt)
|
||||
case Texts(_) => Some(mt)
|
||||
case Images(_) => Some(mt)
|
||||
case MimeType.html => Some(mt)
|
||||
case _ => None
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package docspell.convert
|
||||
|
||||
import cats.data.Kleisli
|
||||
import fs2.Stream
|
||||
import docspell.common.MimeType
|
||||
|
||||
sealed trait ConversionResult[F[_]] {
|
||||
|
||||
def pdfData: Stream[F, Byte]
|
||||
|
||||
}
|
||||
|
||||
object ConversionResult {
|
||||
|
||||
/** The conversion is done by external tools that write files to the
|
||||
* file system. These are temporary files and they will be deleted
|
||||
* once the process finishes. This handler is used to do something
|
||||
* relevant with the resulting files.
|
||||
*/
|
||||
type Handler[F[_], A] = Kleisli[F, ConversionResult[F], A]
|
||||
|
||||
def unsupportedFormat[F[_]](mime: MimeType): ConversionResult[F] =
|
||||
UnsupportedFormat[F](mime)
|
||||
|
||||
def failure[F[_]](ex: Throwable): ConversionResult[F] =
|
||||
Failure[F](ex)
|
||||
|
||||
def successPdf[F[_]](pdf: Stream[F, Byte]): ConversionResult[F] =
|
||||
SuccessPdf[F](pdf)
|
||||
|
||||
def successPdfTxt[F[_]](pdf: Stream[F, Byte], txt: F[String]): ConversionResult[F] =
|
||||
SuccessPdfTxt[F](pdf, txt)
|
||||
|
||||
def inputMalformed[F[_]](mimeType: MimeType, reason: String): ConversionResult[F] =
|
||||
InputMalformed(mimeType, reason)
|
||||
|
||||
case class UnsupportedFormat[F[_]](mime: MimeType) extends ConversionResult[F] {
|
||||
val pdfData = Stream.empty
|
||||
}
|
||||
case class Failure[F[_]](ex: Throwable) extends ConversionResult[F] {
|
||||
val pdfData = Stream.empty
|
||||
}
|
||||
case class SuccessPdf[F[_]](pdf: Stream[F, Byte]) extends ConversionResult[F] {
|
||||
val pdfData = pdf
|
||||
}
|
||||
case class SuccessPdfTxt[F[_]](pdf: Stream[F, Byte], txt: F[String]) extends ConversionResult[F] {
|
||||
val pdfData = pdf
|
||||
}
|
||||
|
||||
case class InputMalformed[F[_]](mimeType: MimeType, reason: String) extends ConversionResult[F] {
|
||||
val pdfData = Stream.empty
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package docspell.convert
|
||||
|
||||
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
||||
import docspell.convert.flexmark.MarkdownConfig
|
||||
|
||||
case class ConvertConfig(chunkSize: Int,
|
||||
maxImageSize: Int,
|
||||
markdown: MarkdownConfig,
|
||||
wkhtmlpdf: WkHtmlPdfConfig,
|
||||
tesseract: TesseractConfig,
|
||||
unoconv: UnoconvConfig)
|
120
modules/convert/src/main/scala/docspell/convert/extern/ExternConv.scala
vendored
Normal file
@ -0,0 +1,120 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import fs2.{Pipe, Stream}
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
import docspell.convert.ConversionResult.{Handler, successPdf, successPdfTxt}
|
||||
|
||||
private[extern] object ExternConv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift, A](
|
||||
name: String,
|
||||
cmdCfg: SystemCommand.Config,
|
||||
wd: Path,
|
||||
useStdin: Boolean,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
reader: (Path, SystemCommand.Result) => F[ConversionResult[F]]
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] =
|
||||
Stream.resource(File.withTempDir[F](wd, s"docspell-$name")).flatMap { dir =>
|
||||
val inFile = dir.resolve("infile").toAbsolutePath.normalize
|
||||
val out = dir.resolve("out.pdf").toAbsolutePath.normalize
|
||||
val sysCfg =
|
||||
cmdCfg.replace(
|
||||
Map(
|
||||
"{{outfile}}" -> out.toString
|
||||
) ++
|
||||
(if (!useStdin) Map("{{infile}}" -> inFile.toString)
|
||||
else Map.empty)
|
||||
)
|
||||
|
||||
val createInput: Pipe[F, Byte, Unit] =
|
||||
if (useStdin) _ => Stream.emit(())
|
||||
else storeDataToFile(name, blocker, logger, inFile)
|
||||
|
||||
in.through(createInput).flatMap { _ =>
|
||||
SystemCommand
|
||||
.execSuccess[F](sysCfg, blocker, logger, Some(dir), if (useStdin) in else Stream.empty)
|
||||
.evalMap(result =>
|
||||
logResult(name, result, logger).
|
||||
flatMap(_ => reader(out, result)).
|
||||
flatMap(handler.run)
|
||||
)
|
||||
}
|
||||
}.compile.lastOrError
|
||||
|
||||
def readResult[F[_]: Sync: ContextShift](
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] =
|
||||
File.existsNonEmpty[F](out).flatMap {
|
||||
case true =>
|
||||
if (result.rc == 0) successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
|
||||
else
|
||||
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
||||
successPdf(File.readAll(out, blocker, chunkSize)).pure[F]
|
||||
|
||||
case false =>
|
||||
ConversionResult.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
).pure[F]
|
||||
}
|
||||
|
||||
def readResultTesseract[F[_]: Sync: ContextShift](
|
||||
outPrefix: String,
|
||||
blocker: Blocker,
|
||||
chunkSize: Int,
|
||||
logger: Logger[F]
|
||||
)(out: Path, result: SystemCommand.Result): F[ConversionResult[F]] = {
|
||||
val outPdf = out.resolveSibling(s"$outPrefix.pdf")
|
||||
File.existsNonEmpty[F](outPdf).flatMap {
|
||||
case true =>
|
||||
val outTxt = out.resolveSibling(s"$outPrefix.txt")
|
||||
File.exists(outTxt).flatMap(txtExists => {
|
||||
val pdfData = File.readAll(out, blocker, chunkSize)
|
||||
if (result.rc == 0) {
|
||||
if (txtExists) successPdfTxt(pdfData, File.readText(outTxt, blocker)).pure[F]
|
||||
else successPdf(pdfData).pure[F]
|
||||
} else {
|
||||
logger.warn(s"Command not successful (rc=${result.rc}), but file exists.") *>
|
||||
successPdf(pdfData).pure[F]
|
||||
}
|
||||
})
|
||||
|
||||
case false =>
|
||||
ConversionResult.failure[F](
|
||||
new Exception(s"Command result=${result.rc}. No output file found.")
|
||||
).pure[F]
|
||||
}
|
||||
}
|
||||
|
||||
private def storeDataToFile[F[_]: Sync: ContextShift](
|
||||
name: String,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
inFile: Path
|
||||
): Pipe[F, Byte, Unit] =
|
||||
in =>
|
||||
Stream.eval(logger.debug(s"Storing input to file ${inFile} for running $name")).drain ++
|
||||
Stream.eval(storeFile(in, inFile, blocker))
|
||||
|
||||
private def logResult[F[_]: Sync](
|
||||
name: String,
|
||||
result: SystemCommand.Result,
|
||||
logger: Logger[F]
|
||||
): F[Unit] =
|
||||
logger.debug(s"$name stdout: ${result.stdout}") *>
|
||||
logger.debug(s"$name stderr: ${result.stderr}")
|
||||
|
||||
private def storeFile[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
target: Path,
|
||||
blocker: Blocker
|
||||
): F[Unit] =
|
||||
in.through(fs2.io.file.writeAll(target, blocker)).compile.drain
|
||||
}
|
27
modules/convert/src/main/scala/docspell/convert/extern/Tesseract.scala
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
|
||||
object Tesseract {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift, A](
|
||||
cfg: TesseractConfig,
|
||||
lang: Language,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||
val outBase = cfg.command.args.tail.headOption.getOrElse("out")
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResultTesseract[F](outBase, blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("tesseract", cfg.command.replace(Map("{{lang}}" -> lang.iso3)), cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||
}
|
||||
|
||||
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/TesseractConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class TesseractConfig (command: SystemCommand.Config, workingDir: Path)
|
25
modules/convert/src/main/scala/docspell/convert/extern/Unoconv.scala
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
|
||||
object Unoconv {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift, A](
|
||||
cfg: UnoconvConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("unoconv", cfg.command, cfg.workingDir, false, blocker, logger, reader)(in, handler)
|
||||
}
|
||||
|
||||
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/UnoconvConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class UnoconvConfig (command: SystemCommand.Config, workingDir: Path)
|
25
modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdf.scala
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
|
||||
object WkHtmlPdf {
|
||||
|
||||
def toPDF[F[_]: Sync: ContextShift, A](
|
||||
cfg: WkHtmlPdfConfig,
|
||||
chunkSize: Int,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
)(in: Stream[F, Byte], handler: Handler[F, A]): F[A] = {
|
||||
val reader: (Path, SystemCommand.Result) => F[ConversionResult[F]] =
|
||||
ExternConv.readResult[F](blocker, chunkSize, logger)
|
||||
|
||||
ExternConv.toPDF[F, A]("wkhtmltopdf", cfg.command, cfg.workingDir, true, blocker, logger, reader)(in, handler)
|
||||
}
|
||||
|
||||
}
|
7
modules/convert/src/main/scala/docspell/convert/extern/WkHtmlPdfConfig.scala
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import docspell.common.SystemCommand
|
||||
|
||||
case class WkHtmlPdfConfig (command: SystemCommand.Config, workingDir: Path)
|
@ -0,0 +1,71 @@
|
||||
package docspell.convert.flexmark
|
||||
|
||||
import java.io.{InputStream, InputStreamReader}
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.util
|
||||
|
||||
import cats.effect.Sync
|
||||
import cats.implicits._
|
||||
import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension
|
||||
import com.vladsch.flexmark.ext.tables.TablesExtension
|
||||
import com.vladsch.flexmark.html.HtmlRenderer
|
||||
import com.vladsch.flexmark.parser.Parser
|
||||
import com.vladsch.flexmark.util.data.{DataKey, MutableDataSet}
|
||||
import fs2.Stream
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
object Markdown {
|
||||
|
||||
def toHtml(is: InputStream, cfg: MarkdownConfig): Either[Throwable, String] = {
|
||||
val p = createParser()
|
||||
val r = createRenderer()
|
||||
Try {
|
||||
val reader = new InputStreamReader(is, StandardCharsets.UTF_8)
|
||||
val doc = p.parseReader(reader)
|
||||
wrapHtml(r.render(doc), cfg)
|
||||
}.toEither
|
||||
}
|
||||
|
||||
|
||||
def toHtml(md: String, cfg: MarkdownConfig): String = {
|
||||
val p = createParser()
|
||||
val r = createRenderer()
|
||||
val doc = p.parse(md)
|
||||
wrapHtml(r.render(doc), cfg)
|
||||
}
|
||||
|
||||
def toHtml[F[_]: Sync](data: Stream[F, Byte], cfg: MarkdownConfig): F[String] =
|
||||
data.through(fs2.text.utf8Decode).compile.foldMonoid.
|
||||
map(str => toHtml(str, cfg))
|
||||
|
||||
private def wrapHtml(body: String, cfg: MarkdownConfig): String = {
|
||||
s"""<!DOCTYPE html>
|
||||
|<html>
|
||||
|<head>
|
||||
|<meta charset="utf-8"/>
|
||||
|<style>
|
||||
|${cfg.internalCss}
|
||||
|</style>
|
||||
|</head>
|
||||
|<body>
|
||||
|$body
|
||||
|</body>
|
||||
|</html>
|
||||
|""".stripMargin
|
||||
}
|
||||
|
||||
private def createParser(): Parser = {
|
||||
val opts = new MutableDataSet()
|
||||
opts.set(Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]],
|
||||
util.Arrays.asList(TablesExtension.create(),
|
||||
StrikethroughExtension.create()));
|
||||
|
||||
Parser.builder(opts).build()
|
||||
}
|
||||
|
||||
private def createRenderer(): HtmlRenderer = {
|
||||
val opts = new MutableDataSet()
|
||||
HtmlRenderer.builder(opts).build()
|
||||
}
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
package docspell.convert.flexmark
|
||||
|
||||
case class MarkdownConfig(internalCss: String)
|
@ -0,0 +1,160 @@
|
||||
package docspell.convert
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
import cats.data.Kleisli
|
||||
import cats.implicits._
|
||||
import cats.effect.IO
|
||||
import fs2.Stream
|
||||
import docspell.common._
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
import docspell.convert.extern.{TesseractConfig, UnoconvConfig, WkHtmlPdfConfig}
|
||||
import docspell.convert.flexmark.MarkdownConfig
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object ConversionTest extends SimpleTestSuite with FileChecks {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
|
||||
val convertConfig = ConvertConfig(
|
||||
8192,
|
||||
3000 * 3000,
|
||||
MarkdownConfig("body { padding: 2em 5em; }"),
|
||||
WkHtmlPdfConfig(
|
||||
SystemCommand.Config(
|
||||
"wkhtmltopdf",
|
||||
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
|
||||
Duration.seconds(20)
|
||||
),
|
||||
target
|
||||
),
|
||||
TesseractConfig(
|
||||
SystemCommand.Config(
|
||||
"tesseract",
|
||||
Seq("{{infile}}", "out", "-l", "deu", "pdf", "txt"),
|
||||
Duration.seconds(20)
|
||||
),
|
||||
target
|
||||
),
|
||||
UnoconvConfig(
|
||||
SystemCommand.Config(
|
||||
"unoconv",
|
||||
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
|
||||
Duration.seconds(20)
|
||||
),
|
||||
target
|
||||
)
|
||||
)
|
||||
|
||||
val conversion = Conversion.create[IO](convertConfig, blocker, logger)
|
||||
|
||||
val bombs = List(
|
||||
ExampleFiles.bombs_20K_gray_jpeg,
|
||||
ExampleFiles.bombs_20K_gray_png,
|
||||
ExampleFiles.bombs_20K_rgb_jpeg,
|
||||
ExampleFiles.bombs_20K_rgb_png
|
||||
)
|
||||
val pdfOnly = List(
|
||||
ExampleFiles.examples_sample_ods,
|
||||
ExampleFiles.examples_sample_doc,
|
||||
ExampleFiles.examples_sample_docx,
|
||||
ExampleFiles.examples_sample_ods,
|
||||
ExampleFiles.examples_sample_odt,
|
||||
ExampleFiles.examples_sample_rtf,
|
||||
ExampleFiles.examples_sample_xls,
|
||||
ExampleFiles.examples_sample_xlsx,
|
||||
ExampleFiles.letter_de_md,
|
||||
ExampleFiles.letter_de_txt,
|
||||
ExampleFiles.letter_en_txt,
|
||||
ExampleFiles.letter_de_html
|
||||
)
|
||||
val pdfAndTxt = List(
|
||||
ExampleFiles.camera_letter_en_jpg,
|
||||
ExampleFiles.camera_letter_en_png,
|
||||
ExampleFiles.camera_letter_en_tiff,
|
||||
ExampleFiles.scanner_jfif_jpg
|
||||
)
|
||||
|
||||
test("convert to pdf") {
|
||||
if (!commandsExist) ignore("At least one of the conversion programs not found")
|
||||
else
|
||||
File
|
||||
.withTempDir[IO](target, "convpdf")
|
||||
.use { dir =>
|
||||
conversion.use { conv =>
|
||||
def check(n: Long): Handler[IO, Unit] =
|
||||
storePdfHandler(dir.resolve(s"test-$n.pdf")).map { p =>
|
||||
assert(p.isNonEmpty && p.isPDF)
|
||||
}
|
||||
|
||||
runConversion(pdfOnly, check, conv).compile.drain
|
||||
}
|
||||
}
|
||||
.unsafeRunSync()
|
||||
}
|
||||
|
||||
test("convert image to pdf and txt") {
|
||||
if (!commandsExist) ignore("At least one of the conversion programs not found")
|
||||
else
|
||||
File
|
||||
.withTempDir[IO](target, "convimgpdf")
|
||||
.use { dir =>
|
||||
conversion.use { conv =>
|
||||
def check(n: Long): Handler[IO, Unit] =
|
||||
storePdfTxtHandler(dir.resolve(s"test-$n.pdf"), dir.resolve(s"test-$n.txt"))
|
||||
.map {
|
||||
case (p, t) =>
|
||||
assert(p.isNonEmpty && p.isPDF)
|
||||
assert(t.isNonEmpty && t.isPlainText)
|
||||
}
|
||||
|
||||
runConversion(pdfAndTxt, check, conv).compile.drain
|
||||
}
|
||||
}
|
||||
.unsafeRunSync()
|
||||
}
|
||||
|
||||
test("do not convert image bombs") {
|
||||
if (!commandsExist) ignore("At least one of the conversion programs not found")
|
||||
else
|
||||
conversion
|
||||
.use { conv =>
|
||||
def check: Handler[IO, Unit] =
|
||||
Kleisli({
|
||||
case ConversionResult.InputMalformed(_, _) =>
|
||||
().pure[IO]
|
||||
case cr =>
|
||||
IO.raiseError(new Exception(s"Unexpected result: $cr"))
|
||||
})
|
||||
|
||||
runConversion(bombs, _ => check, conv).compile.drain
|
||||
}
|
||||
.unsafeRunSync()
|
||||
}
|
||||
|
||||
def runConversion[A](
|
||||
uris: List[LenientUri],
|
||||
handler: Long => Handler[IO, A],
|
||||
conv: Conversion[IO]
|
||||
) =
|
||||
Stream
|
||||
.emits(uris)
|
||||
.covary[IO]
|
||||
.zipWithIndex
|
||||
.evalMap({
|
||||
case (uri, index) =>
|
||||
val load = uri.readURL[IO](8192, blocker)
|
||||
val dataType = DataType.filename(uri.path.segments.last)
|
||||
logger.info(s"Processing file ${uri.path.asString}") *>
|
||||
conv.toPDF(dataType, Language.German, handler(index))(load)
|
||||
})
|
||||
|
||||
def commandsExist: Boolean =
|
||||
commandExists(convertConfig.unoconv.command.program) &&
|
||||
commandExists(convertConfig.wkhtmlpdf.command.program) &&
|
||||
commandExists(convertConfig.tesseract.command.program)
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package docspell.convert
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.{Files, Path}
|
||||
|
||||
import cats.data.Kleisli
|
||||
import cats.effect.IO
|
||||
import fs2.{Pipe, Stream}
|
||||
import docspell.common.MimeType
|
||||
import docspell.convert.ConversionResult.Handler
|
||||
import docspell.files.TikaMimetype
|
||||
|
||||
trait FileChecks {
|
||||
|
||||
implicit class FileCheckOps(p: Path) {
|
||||
|
||||
def isNonEmpty: Boolean =
|
||||
Files.exists(p) && Files.size(p) > 0
|
||||
|
||||
def isType(mime: MimeType): Boolean =
|
||||
TikaMimetype.detect[IO](p).map(_ == mime).unsafeRunSync
|
||||
|
||||
def isPDF: Boolean =
|
||||
isType(MimeType.pdf)
|
||||
|
||||
def isPlainText: Boolean =
|
||||
isType(MimeType.text("plain"))
|
||||
}
|
||||
|
||||
def storeFile(file: Path): Pipe[IO, Byte, Path] =
|
||||
in => Stream.eval(in.compile.to(Array).flatMap(bytes => IO(Files.write(file, bytes))))
|
||||
|
||||
def storePdfHandler(file: Path): Handler[IO, Path] =
|
||||
storePdfTxtHandler(file, file.resolveSibling("unexpected.txt")).map(_._1)
|
||||
|
||||
def storePdfTxtHandler(filePdf: Path, fileTxt: Path): Handler[IO, (Path, Path)] =
|
||||
Kleisli({
|
||||
case ConversionResult.SuccessPdfTxt(pdf, txt) =>
|
||||
for {
|
||||
pout <- pdf.through(storeFile(filePdf)).compile.lastOrError
|
||||
str <- txt
|
||||
tout <- IO(Files.write(fileTxt, str.getBytes(StandardCharsets.UTF_8)))
|
||||
} yield (pout, tout)
|
||||
|
||||
case ConversionResult.SuccessPdf(pdf) =>
|
||||
pdf.through(storeFile(filePdf)).compile.lastOrError.map(p => (p, fileTxt))
|
||||
|
||||
case ConversionResult.Failure(ex) =>
|
||||
throw new Exception(s"Unexpected result (failure: ${ex.getMessage})", ex)
|
||||
|
||||
case cr =>
|
||||
throw new Exception(s"Unexpected result: $cr")
|
||||
})
|
||||
|
||||
def commandExists(cmd: String): Boolean =
|
||||
Runtime.getRuntime.exec(Array("which", cmd)).waitFor() == 0
|
||||
|
||||
|
||||
}
|
107
modules/convert/src/test/scala/docspell/convert/extern/ExternConvTest.scala
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
package docspell.convert.extern
|
||||
|
||||
import java.nio.file.{Path, Paths}
|
||||
|
||||
import cats.effect._
|
||||
import docspell.common._
|
||||
import docspell.convert.FileChecks
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object ExternConvTest extends SimpleTestSuite with FileChecks {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
val target = Paths.get("target")
|
||||
|
||||
test("convert html to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"wkhtmltopdf",
|
||||
Seq("-s", "A4", "--encoding", "UTF-8", "-", "{{outfile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "wkhtmltopdf")
|
||||
.use(dir =>
|
||||
IO {
|
||||
val wkCfg = WkHtmlPdfConfig(cfg, target)
|
||||
val p =
|
||||
WkHtmlPdf
|
||||
.toPDF[IO, Path](wkCfg, 8192, blocker, logger)(
|
||||
ExampleFiles.letter_de_html.readURL[IO](8192, blocker),
|
||||
storePdfHandler(dir.resolve("test.pdf"))
|
||||
)
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(p.isNonEmpty && p.isPDF)
|
||||
}
|
||||
)
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
test("convert office to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"unoconv",
|
||||
Seq("-f", "pdf", "-o", "{{outfile}}", "{{infile}}"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "unoconv")
|
||||
.use(dir =>
|
||||
IO {
|
||||
val ucCfg = UnoconvConfig(cfg, target)
|
||||
val p =
|
||||
Unoconv
|
||||
.toPDF[IO, Path](ucCfg, 8192, blocker, logger)(
|
||||
ExampleFiles.examples_sample_docx.readURL[IO](8192, blocker),
|
||||
storePdfHandler(dir.resolve("test.pdf"))
|
||||
)
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(p.isNonEmpty && p.isPDF)
|
||||
}
|
||||
)
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
test("convert image to pdf") {
|
||||
val cfg = SystemCommand.Config(
|
||||
"tesseract",
|
||||
Seq("{{infile}}", "out", "-l", "deu", "pdf", "txt"),
|
||||
Duration.seconds(20)
|
||||
)
|
||||
|
||||
if (!commandExists(cfg.program)) ignore(s"Command ${cfg.program} not found")
|
||||
else {
|
||||
File
|
||||
.withTempDir[IO](target, "tesseract")
|
||||
.use(dir =>
|
||||
IO {
|
||||
val tessCfg = TesseractConfig(cfg, target)
|
||||
val (pdf, txt) =
|
||||
Tesseract
|
||||
.toPDF[IO, (Path, Path)](tessCfg, Language.German, 8192, blocker, logger)(
|
||||
ExampleFiles.camera_letter_en_jpg.readURL[IO](8192, blocker),
|
||||
storePdfTxtHandler(dir.resolve("test.pdf"), dir.resolve("test.txt"))
|
||||
)
|
||||
.unsafeRunSync()
|
||||
|
||||
assert(pdf.isNonEmpty && pdf.isPDF)
|
||||
assert(txt.isNonEmpty && txt.isPlainText)
|
||||
}
|
||||
)
|
||||
.unsafeRunSync
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
11
modules/extract/NOTICE
Normal file
@ -0,0 +1,11 @@
|
||||
The Java source files in docspell-extract are unmodified copies of
|
||||
those found in the Apache Tika parser project. It follows the
|
||||
NOTICE.txt file from Apache Tika parsers:
|
||||
|
||||
Apache Tika parsers
|
||||
Copyright 2007-2019 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
|
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.odf;
|
||||
|
||||
import org.apache.tika.sax.ContentHandlerDecorator;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.AttributesImpl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Content handler decorator that:<ul>
|
||||
* <li>Maps old OpenOffice 1.0 Namespaces to the OpenDocument ones</li>
|
||||
* <li>Returns a fake DTD when parser requests OpenOffice DTD</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class NSNormalizerContentHandler extends ContentHandlerDecorator {
|
||||
|
||||
private static final String OLD_NS =
|
||||
"http://openoffice.org/2000/";
|
||||
|
||||
private static final String NEW_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:";
|
||||
|
||||
private static final String DTD_PUBLIC_ID =
|
||||
"-//OpenOffice.org//DTD OfficeDocument 1.0//EN";
|
||||
|
||||
public NSNormalizerContentHandler(ContentHandler handler) {
|
||||
super(handler);
|
||||
}
|
||||
|
||||
private String mapOldNS(String ns) {
|
||||
if (ns != null && ns.startsWith(OLD_NS)) {
|
||||
return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0";
|
||||
} else {
|
||||
return ns;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(
|
||||
String namespaceURI, String localName, String qName,
|
||||
Attributes atts) throws SAXException {
|
||||
AttributesImpl natts = new AttributesImpl();
|
||||
for (int i = 0; i < atts.getLength(); i++) {
|
||||
natts.addAttribute(
|
||||
mapOldNS(atts.getURI(i)), atts.getLocalName(i),
|
||||
atts.getQName(i), atts.getType(i), atts.getValue(i));
|
||||
}
|
||||
super.startElement(mapOldNS(namespaceURI), localName, qName, atts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String namespaceURI, String localName, String qName)
|
||||
throws SAXException {
|
||||
super.endElement(mapOldNS(namespaceURI), localName, qName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startPrefixMapping(String prefix, String uri)
|
||||
throws SAXException {
|
||||
super.startPrefixMapping(prefix, mapOldNS(uri));
|
||||
}
|
||||
|
||||
/**
|
||||
* do not load any DTDs (may be requested by parser). Fake the DTD by
|
||||
* returning a empty string as InputSource
|
||||
*/
|
||||
@Override
|
||||
public InputSource resolveEntity(String publicId, String systemId)
|
||||
throws IOException, SAXException {
|
||||
if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
|
||||
|| DTD_PUBLIC_ID.equals(publicId)) {
|
||||
return new InputSource(new StringReader(""));
|
||||
} else {
|
||||
return super.resolveEntity(publicId, systemId);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,606 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.odf;
|
||||
|
||||
import org.apache.commons.io.input.CloseShieldInputStream;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AbstractParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.sax.ElementMappingContentHandler;
|
||||
import org.apache.tika.sax.ElementMappingContentHandler.TargetElement;
|
||||
import org.apache.tika.sax.OfflineContentHandler;
|
||||
import org.apache.tika.sax.XHTMLContentHandler;
|
||||
import org.apache.tika.utils.XMLReaderUtils;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.AttributesImpl;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.Stack;
|
||||
|
||||
import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
|
||||
|
||||
/**
|
||||
* Parser for ODF <code>content.xml</code> files.
|
||||
*/
|
||||
public class OpenDocumentContentParser extends AbstractParser {
|
||||
private interface Style {
|
||||
}
|
||||
|
||||
private static class TextStyle implements Style {
|
||||
public boolean italic;
|
||||
public boolean bold;
|
||||
public boolean underlined;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TextStyle{" +
|
||||
"italic=" + italic +
|
||||
", bold=" + bold +
|
||||
", underlined=" + underlined +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
private static class ListStyle implements Style {
|
||||
public boolean ordered;
|
||||
|
||||
public String getTag() {
|
||||
return ordered ? "ol" : "ul";
|
||||
}
|
||||
}
|
||||
|
||||
private static final class OpenDocumentElementMappingContentHandler extends
|
||||
ElementMappingContentHandler {
|
||||
private static final char[] SPACE = new char[]{ ' '};
|
||||
private static final String CLASS = "class";
|
||||
private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation");
|
||||
private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note");
|
||||
private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes");
|
||||
|
||||
private static Attributes buildAttributes(String key, String value) {
|
||||
AttributesImpl attrs = new AttributesImpl();
|
||||
attrs.addAttribute("", key, key, "CDATA", value);
|
||||
return attrs;
|
||||
}
|
||||
|
||||
private final ContentHandler handler;
|
||||
private final BitSet textNodeStack = new BitSet();
|
||||
private int nodeDepth = 0;
|
||||
private int completelyFiltered = 0;
|
||||
private Stack<String> headingStack = new Stack<String>();
|
||||
private Map<String, TextStyle> paragraphTextStyleMap = new HashMap<String, TextStyle>();
|
||||
private Map<String, TextStyle> textStyleMap = new HashMap<String, TextStyle>();
|
||||
private Map<String, ListStyle> listStyleMap = new HashMap<String, ListStyle>();
|
||||
private String currParagraphStyleName; //paragraph style name
|
||||
private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs
|
||||
private String currTextStyleName;
|
||||
|
||||
private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
|
||||
private ListStyle listStyle;
|
||||
|
||||
// True if we are currently in the named style:
|
||||
private boolean curUnderlined;
|
||||
private boolean curBold;
|
||||
private boolean curItalic;
|
||||
|
||||
//have we written the start style tags
|
||||
//yet for the current text style
|
||||
boolean hasWrittenStartStyleTags = false;
|
||||
|
||||
private int pDepth = 0; //<p> can appear inside comments and other things that are already inside <p>
|
||||
//we need to track our pDepth and only output <p> if we're at the main level
|
||||
|
||||
|
||||
private OpenDocumentElementMappingContentHandler(ContentHandler handler,
|
||||
Map<QName, TargetElement> mappings) {
|
||||
super(handler, mappings);
|
||||
this.handler = handler;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
// only forward content of tags from text:-namespace
|
||||
if (completelyFiltered == 0 && nodeDepth > 0
|
||||
&& textNodeStack.get(nodeDepth - 1)) {
|
||||
if (!hasWrittenStartStyleTags) {
|
||||
updateStyleTags();
|
||||
hasWrittenStartStyleTags = true;
|
||||
}
|
||||
super.characters(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
// helper for checking tags which need complete filtering
|
||||
// (with sub-tags)
|
||||
private boolean needsCompleteFiltering(
|
||||
String namespaceURI, String localName) {
|
||||
if (TEXT_NS.equals(namespaceURI)) {
|
||||
return localName.endsWith("-template")
|
||||
|| localName.endsWith("-style");
|
||||
}
|
||||
return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
|
||||
}
|
||||
|
||||
// map the heading level to <hX> HTML tags
|
||||
private String getXHTMLHeaderTagName(Attributes atts) {
|
||||
String depthStr = atts.getValue(TEXT_NS, "outline-level");
|
||||
if (depthStr == null) {
|
||||
return "h1";
|
||||
}
|
||||
|
||||
int depth = Integer.parseInt(depthStr);
|
||||
if (depth >= 6) {
|
||||
return "h6";
|
||||
} else if (depth <= 1) {
|
||||
return "h1";
|
||||
} else {
|
||||
return "h" + depth;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a node is a text node
|
||||
*/
|
||||
private boolean isTextNode(String namespaceURI, String localName) {
|
||||
if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) {
|
||||
return true;
|
||||
}
|
||||
if (SVG_NS.equals(namespaceURI)) {
|
||||
return "title".equals(localName) ||
|
||||
"desc".equals(localName);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void startList(String name) throws SAXException {
|
||||
String elementName = "ul";
|
||||
if (name != null) {
|
||||
ListStyle style = listStyleMap.get(name);
|
||||
elementName = style != null ? style.getTag() : "ul";
|
||||
listStyleStack.push(style);
|
||||
}
|
||||
handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
|
||||
}
|
||||
|
||||
private void endList() throws SAXException {
|
||||
String elementName = "ul";
|
||||
if (!listStyleStack.isEmpty()) {
|
||||
ListStyle style = listStyleStack.pop();
|
||||
elementName = style != null ? style.getTag() : "ul";
|
||||
}
|
||||
handler.endElement(XHTML, elementName, elementName);
|
||||
}
|
||||
|
||||
private void startSpan(String name) throws SAXException {
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
currTextStyle = textStyleMap.get(name);
|
||||
hasWrittenStartStyleTags = false;
|
||||
}
|
||||
|
||||
private void startParagraph(String styleName) throws SAXException {
|
||||
if (pDepth == 0) {
|
||||
handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES);
|
||||
if (styleName != null) {
|
||||
currTextStyle = paragraphTextStyleMap.get(styleName);
|
||||
}
|
||||
hasWrittenStartStyleTags = false;
|
||||
} else {
|
||||
handler.characters(SPACE, 0, SPACE.length);
|
||||
}
|
||||
pDepth++;
|
||||
}
|
||||
|
||||
private void endParagraph() throws SAXException {
|
||||
closeStyleTags();
|
||||
if (pDepth == 1) {
|
||||
handler.endElement(XHTML, "p", "p");
|
||||
} else {
|
||||
handler.characters(SPACE, 0, SPACE.length);
|
||||
}
|
||||
pDepth--;
|
||||
|
||||
}
|
||||
|
||||
private void updateStyleTags() throws SAXException {
|
||||
|
||||
if (currTextStyle == null) {
|
||||
closeStyleTags();
|
||||
return;
|
||||
}
|
||||
if (currTextStyle.bold != curBold) {
|
||||
// Enforce nesting -- must close s and i tags
|
||||
if (curUnderlined) {
|
||||
handler.endElement(XHTML, "u", "u");
|
||||
curUnderlined = false;
|
||||
}
|
||||
if (curItalic) {
|
||||
handler.endElement(XHTML, "i", "i");
|
||||
curItalic = false;
|
||||
}
|
||||
if (currTextStyle.bold) {
|
||||
handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
|
||||
} else {
|
||||
handler.endElement(XHTML, "b", "b");
|
||||
}
|
||||
curBold = currTextStyle.bold;
|
||||
}
|
||||
|
||||
if (currTextStyle.italic != curItalic) {
|
||||
// Enforce nesting -- must close s tag
|
||||
if (curUnderlined) {
|
||||
handler.endElement(XHTML, "u", "u");
|
||||
curUnderlined = false;
|
||||
}
|
||||
if (currTextStyle.italic) {
|
||||
handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
|
||||
} else {
|
||||
handler.endElement(XHTML, "i", "i");
|
||||
}
|
||||
curItalic = currTextStyle.italic;
|
||||
}
|
||||
|
||||
if (currTextStyle.underlined != curUnderlined) {
|
||||
if (currTextStyle.underlined) {
|
||||
handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
|
||||
} else {
|
||||
handler.endElement(XHTML, "u", "u");
|
||||
}
|
||||
curUnderlined = currTextStyle.underlined;
|
||||
}
|
||||
}
|
||||
|
||||
private void endSpan() throws SAXException {
|
||||
updateStyleTags();
|
||||
}
|
||||
|
||||
private void closeStyleTags() throws SAXException {
|
||||
// Close any still open style tags
|
||||
if (curUnderlined) {
|
||||
handler.endElement(XHTML,"u", "u");
|
||||
curUnderlined = false;
|
||||
}
|
||||
if (curItalic) {
|
||||
handler.endElement(XHTML,"i", "i");
|
||||
curItalic = false;
|
||||
}
|
||||
if (curBold) {
|
||||
handler.endElement(XHTML,"b", "b");
|
||||
curBold = false;
|
||||
}
|
||||
currTextStyle = null;
|
||||
hasWrittenStartStyleTags = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(
|
||||
String namespaceURI, String localName, String qName,
|
||||
Attributes attrs) throws SAXException {
|
||||
// keep track of current node type. If it is a text node,
|
||||
// a bit at the current depth its set in textNodeStack.
|
||||
// characters() checks the top bit to determine, if the
|
||||
// actual node is a text node to print out nodeDepth contains
|
||||
// the depth of the current node and also marks top of stack.
|
||||
assert nodeDepth >= 0;
|
||||
|
||||
// Set styles
|
||||
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
||||
String family = attrs.getValue(STYLE_NS, "family");
|
||||
if ("text".equals(family)) {
|
||||
currTextStyle = new TextStyle();
|
||||
currTextStyleName = attrs.getValue(STYLE_NS, "name");
|
||||
} else if ("paragraph".equals(family)) {
|
||||
currTextStyle = new TextStyle();
|
||||
currParagraphStyleName = attrs.getValue(STYLE_NS, "name");
|
||||
}
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
||||
listStyle = new ListStyle();
|
||||
String name = attrs.getValue(STYLE_NS, "name");
|
||||
listStyleMap.put(name, listStyle);
|
||||
} else if (currTextStyle != null && STYLE_NS.equals(namespaceURI)
|
||||
&& "text-properties".equals(localName)) {
|
||||
String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
|
||||
if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
|
||||
currTextStyle.italic = true;
|
||||
}
|
||||
String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
|
||||
if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
|
||||
|| (fontWeight != null && Character.isDigit(fontWeight.charAt(0))
|
||||
&& Integer.valueOf(fontWeight) > 500)) {
|
||||
currTextStyle.bold = true;
|
||||
}
|
||||
String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
|
||||
if (underlineStyle != null && !underlineStyle.equals("none")) {
|
||||
currTextStyle.underlined = true;
|
||||
}
|
||||
} else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
|
||||
if ("list-level-style-bullet".equals(localName)) {
|
||||
listStyle.ordered = false;
|
||||
} else if ("list-level-style-number".equals(localName)) {
|
||||
listStyle.ordered = true;
|
||||
}
|
||||
}
|
||||
|
||||
textNodeStack.set(nodeDepth++,
|
||||
isTextNode(namespaceURI, localName));
|
||||
// filter *all* content of some tags
|
||||
assert completelyFiltered >= 0;
|
||||
|
||||
if (needsCompleteFiltering(namespaceURI, localName)) {
|
||||
completelyFiltered++;
|
||||
}
|
||||
// call next handler if no filtering
|
||||
if (completelyFiltered == 0) {
|
||||
// special handling of text:h, that are directly passed
|
||||
// to incoming handler
|
||||
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
||||
final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
|
||||
handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
||||
startList(attrs.getValue(TEXT_NS, "style-name"));
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
||||
startSpan(attrs.getValue(TEXT_NS, "style-name"));
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
||||
startParagraph(attrs.getValue(TEXT_NS, "style-name"));
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) {
|
||||
handler.characters(SPACE, 0, 1);
|
||||
} else if ("annotation".equals(localName)) {
|
||||
closeStyleTags();
|
||||
handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES);
|
||||
} else if ("note".equals(localName)) {
|
||||
closeStyleTags();
|
||||
handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES);
|
||||
} else if ("notes".equals(localName)) {
|
||||
closeStyleTags();
|
||||
handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES);
|
||||
} else {
|
||||
super.startElement(namespaceURI, localName, qName, attrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(
|
||||
String namespaceURI, String localName, String qName)
|
||||
throws SAXException {
|
||||
if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
|
||||
if (currTextStyle != null && currTextStyleName != null) {
|
||||
textStyleMap.put(currTextStyleName, currTextStyle);
|
||||
currTextStyleName = null;
|
||||
currTextStyle = null;
|
||||
} else if (currTextStyle != null && currParagraphStyleName != null) {
|
||||
paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle);
|
||||
currParagraphStyleName = null;
|
||||
currTextStyle = null;
|
||||
}
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
|
||||
listStyle = null;
|
||||
}
|
||||
|
||||
// call next handler if no filtering
|
||||
if (completelyFiltered == 0) {
|
||||
// special handling of text:h, that are directly passed
|
||||
// to incoming handler
|
||||
if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
|
||||
final String el = headingStack.pop();
|
||||
handler.endElement(XHTMLContentHandler.XHTML, el, el);
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
|
||||
endList();
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
|
||||
currTextStyle = null;
|
||||
hasWrittenStartStyleTags = false;
|
||||
} else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
|
||||
endParagraph();
|
||||
} else if ("annotation".equals(localName) || "note".equals(localName) ||
|
||||
"notes".equals(localName)) {
|
||||
closeStyleTags();
|
||||
handler.endElement("", localName, localName);
|
||||
} else {
|
||||
super.endElement(namespaceURI, localName, qName);
|
||||
}
|
||||
|
||||
// special handling of tabulators
|
||||
if (TEXT_NS.equals(namespaceURI)
|
||||
&& ("tab-stop".equals(localName)
|
||||
|| "tab".equals(localName))) {
|
||||
this.characters(TAB, 0, TAB.length);
|
||||
}
|
||||
}
|
||||
|
||||
// revert filter for *all* content of some tags
|
||||
if (needsCompleteFiltering(namespaceURI, localName)) {
|
||||
completelyFiltered--;
|
||||
}
|
||||
assert completelyFiltered >= 0;
|
||||
|
||||
// reduce current node depth
|
||||
nodeDepth--;
|
||||
assert nodeDepth >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startPrefixMapping(String prefix, String uri) {
|
||||
// remove prefix mappings as they should not occur in XHTML
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endPrefixMapping(String prefix) {
|
||||
// remove prefix mappings as they should not occur in XHTML
|
||||
}
|
||||
}
|
||||
|
||||
public static final String TEXT_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:text:1.0";
|
||||
|
||||
public static final String TABLE_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:table:1.0";
|
||||
|
||||
public static final String STYLE_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:style:1.0";
|
||||
|
||||
public static final String FORMATTING_OBJECTS_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
|
||||
|
||||
public static final String OFFICE_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:office:1.0";
|
||||
|
||||
public static final String SVG_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
|
||||
|
||||
public static final String PRESENTATION_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
|
||||
|
||||
public static final String DRAW_NS =
|
||||
"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
|
||||
|
||||
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
|
||||
|
||||
protected static final char[] TAB = new char[]{'\t'};
|
||||
|
||||
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
|
||||
|
||||
/**
|
||||
* Mappings between ODF tag names and XHTML tag names
|
||||
* (including attributes). All other tag names/attributes are ignored
|
||||
* and left out from event stream.
|
||||
*/
|
||||
private static final HashMap<QName, TargetElement> MAPPINGS =
|
||||
new HashMap<QName, TargetElement>();
|
||||
|
||||
static {
|
||||
// general mappings of text:-tags
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "p"),
|
||||
new TargetElement(XHTML, "p"));
|
||||
// text:h-tags are mapped specifically in startElement/endElement
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "line-break"),
|
||||
new TargetElement(XHTML, "br"));
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "list-item"),
|
||||
new TargetElement(XHTML, "li"));
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "note"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
MAPPINGS.put(
|
||||
new QName(OFFICE_NS, "annotation"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
MAPPINGS.put(
|
||||
new QName(PRESENTATION_NS, "notes"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
MAPPINGS.put(
|
||||
new QName(DRAW_NS, "object"),
|
||||
new TargetElement(XHTML, "object"));
|
||||
MAPPINGS.put(
|
||||
new QName(DRAW_NS, "text-box"),
|
||||
new TargetElement(XHTML, "div"));
|
||||
MAPPINGS.put(
|
||||
new QName(SVG_NS, "title"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
MAPPINGS.put(
|
||||
new QName(SVG_NS, "desc"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "span"),
|
||||
new TargetElement(XHTML, "span"));
|
||||
|
||||
final HashMap<QName, QName> aAttsMapping =
|
||||
new HashMap<QName, QName>();
|
||||
aAttsMapping.put(
|
||||
new QName(XLINK_NS, "href"),
|
||||
new QName("href"));
|
||||
aAttsMapping.put(
|
||||
new QName(XLINK_NS, "title"),
|
||||
new QName("title"));
|
||||
MAPPINGS.put(
|
||||
new QName(TEXT_NS, "a"),
|
||||
new TargetElement(XHTML, "a", aAttsMapping));
|
||||
|
||||
// create HTML tables from table:-tags
|
||||
MAPPINGS.put(
|
||||
new QName(TABLE_NS, "table"),
|
||||
new TargetElement(XHTML, "table"));
|
||||
// repeating of rows is ignored; for columns, see below!
|
||||
MAPPINGS.put(
|
||||
new QName(TABLE_NS, "table-row"),
|
||||
new TargetElement(XHTML, "tr"));
|
||||
// special mapping for rowspan/colspan attributes
|
||||
final HashMap<QName, QName> tableCellAttsMapping =
|
||||
new HashMap<QName, QName>();
|
||||
tableCellAttsMapping.put(
|
||||
new QName(TABLE_NS, "number-columns-spanned"),
|
||||
new QName("colspan"));
|
||||
tableCellAttsMapping.put(
|
||||
new QName(TABLE_NS, "number-rows-spanned"),
|
||||
new QName("rowspan"));
|
||||
/* TODO: The following is not correct, the cell should be repeated not spanned!
|
||||
* Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
|
||||
* Problems may occur when both spanning and repeating is given, which is not allowed by spec.
|
||||
* Cell spanning instead of repeating is not a problem, because OpenOffice uses it
|
||||
* only for empty cells.
|
||||
*/
|
||||
tableCellAttsMapping.put(
|
||||
new QName(TABLE_NS, "number-columns-repeated"),
|
||||
new QName("colspan"));
|
||||
MAPPINGS.put(
|
||||
new QName(TABLE_NS, "table-cell"),
|
||||
new TargetElement(XHTML, "td", tableCellAttsMapping));
|
||||
}
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||
return Collections.emptySet(); // not a top-level parser
|
||||
}
|
||||
|
||||
public void parse(
|
||||
InputStream stream, ContentHandler handler,
|
||||
Metadata metadata, ParseContext context)
|
||||
throws IOException, SAXException, TikaException {
|
||||
parseInternal(stream,
|
||||
new XHTMLContentHandler(handler, metadata),
|
||||
metadata, context);
|
||||
}
|
||||
|
||||
void parseInternal(
|
||||
InputStream stream, final ContentHandler handler,
|
||||
Metadata metadata, ParseContext context)
|
||||
throws IOException, SAXException, TikaException {
|
||||
|
||||
DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
|
||||
|
||||
|
||||
XMLReaderUtils.parseSAX(
|
||||
new CloseShieldInputStream(stream),
|
||||
new OfflineContentHandler(
|
||||
new NSNormalizerContentHandler(dh)),
|
||||
context);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,199 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.odf;
|
||||
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.DublinCore;
|
||||
import org.apache.tika.metadata.MSOffice;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Office;
|
||||
import org.apache.tika.metadata.OfficeOpenXMLCore;
|
||||
import org.apache.tika.metadata.PagedText;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.apache.tika.metadata.TikaCoreProperties;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.xml.AttributeDependantMetadataHandler;
|
||||
import org.apache.tika.parser.xml.AttributeMetadataHandler;
|
||||
import org.apache.tika.parser.xml.ElementMetadataHandler;
|
||||
import org.apache.tika.parser.xml.MetadataHandler;
|
||||
import org.apache.tika.parser.xml.XMLParser;
|
||||
import org.apache.tika.sax.TeeContentHandler;
|
||||
import org.apache.tika.sax.xpath.CompositeMatcher;
|
||||
import org.apache.tika.sax.xpath.Matcher;
|
||||
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
||||
import org.apache.tika.sax.xpath.XPathParser;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Parser for OpenDocument <code>meta.xml</code> files.
|
||||
*/
|
||||
public class OpenDocumentMetaParser extends XMLParser {
|
||||
/**
|
||||
* Serial version UID
|
||||
*/
|
||||
private static final long serialVersionUID = -8739250869531737584L;
|
||||
|
||||
private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
|
||||
private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
|
||||
|
||||
/**
|
||||
* @see OfficeOpenXMLCore#SUBJECT
|
||||
* @deprecated use OfficeOpenXMLCore#SUBJECT
|
||||
*/
|
||||
@Deprecated
|
||||
private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
|
||||
Property.composite(Office.INITIAL_AUTHOR,
|
||||
new Property[]{Property.externalText("initial-creator")});
|
||||
|
||||
private static ContentHandler getDublinCoreHandler(
|
||||
Metadata metadata, Property property, String element) {
|
||||
return new ElementMetadataHandler(
|
||||
DublinCore.NAMESPACE_URI_DC, element,
|
||||
metadata, property);
|
||||
}
|
||||
|
||||
private static ContentHandler getMeta(
|
||||
ContentHandler ch, Metadata md, Property property, String element) {
|
||||
Matcher matcher = new CompositeMatcher(
|
||||
META_XPATH.parse("//meta:" + element),
|
||||
META_XPATH.parse("//meta:" + element + "//text()"));
|
||||
ContentHandler branch =
|
||||
new MatchingContentHandler(new MetadataHandler(md, property), matcher);
|
||||
return new TeeContentHandler(ch, branch);
|
||||
}
|
||||
|
||||
private static ContentHandler getUserDefined(
|
||||
ContentHandler ch, Metadata md) {
|
||||
Matcher matcher = new CompositeMatcher(
|
||||
META_XPATH.parse("//meta:user-defined/@meta:name"),
|
||||
META_XPATH.parse("//meta:user-defined//text()"));
|
||||
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
|
||||
ContentHandler branch = new MatchingContentHandler(
|
||||
new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
|
||||
matcher);
|
||||
return new TeeContentHandler(ch, branch);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
private static ContentHandler getStatistic(
|
||||
ContentHandler ch, Metadata md, String name, String attribute) {
|
||||
Matcher matcher =
|
||||
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
||||
ContentHandler branch = new MatchingContentHandler(
|
||||
new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
|
||||
return new TeeContentHandler(ch, branch);
|
||||
}
|
||||
|
||||
private static ContentHandler getStatistic(
|
||||
ContentHandler ch, Metadata md, Property property, String attribute) {
|
||||
Matcher matcher =
|
||||
META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
|
||||
ContentHandler branch = new MatchingContentHandler(
|
||||
new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
|
||||
return new TeeContentHandler(ch, branch);
|
||||
}
|
||||
|
||||
protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
|
||||
// We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
|
||||
// Process the Dublin Core Attributes
|
||||
ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"),
|
||||
getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
|
||||
|
||||
// Process the OO Meta Attributes
|
||||
ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
|
||||
// ODF uses dc:date for modified
|
||||
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
|
||||
DublinCore.NAMESPACE_URI_DC, "date",
|
||||
md, TikaCoreProperties.MODIFIED));
|
||||
|
||||
// ODF uses dc:subject for description
|
||||
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
|
||||
DublinCore.NAMESPACE_URI_DC, "subject",
|
||||
md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
|
||||
ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
|
||||
|
||||
ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
|
||||
ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
|
||||
ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
|
||||
ch = getMeta(ch, md, Property.externalText("generator"), "generator");
|
||||
|
||||
// Process the user defined Meta Attributes
|
||||
ch = getUserDefined(ch, md);
|
||||
|
||||
// Process the OO Statistics Attributes
|
||||
ch = getStatistic(ch, md, Office.OBJECT_COUNT, "object-count");
|
||||
ch = getStatistic(ch, md, Office.IMAGE_COUNT, "image-count");
|
||||
ch = getStatistic(ch, md, Office.PAGE_COUNT, "page-count");
|
||||
ch = getStatistic(ch, md, PagedText.N_PAGES, "page-count");
|
||||
ch = getStatistic(ch, md, Office.TABLE_COUNT, "table-count");
|
||||
ch = getStatistic(ch, md, Office.PARAGRAPH_COUNT, "paragraph-count");
|
||||
ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
|
||||
ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
|
||||
|
||||
// Legacy, Tika-1.0 style attributes
|
||||
// TODO Remove these in Tika 2.0
|
||||
ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
|
||||
ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
|
||||
ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
|
||||
ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
|
||||
ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
|
||||
ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
|
||||
ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
|
||||
|
||||
// Legacy Statistics Attributes, replaced with real keys above
|
||||
// TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
|
||||
ch = getStatistic(ch, md, "nbPage", "page-count");
|
||||
ch = getStatistic(ch, md, "nbPara", "paragraph-count");
|
||||
ch = getStatistic(ch, md, "nbWord", "word-count");
|
||||
ch = getStatistic(ch, md, "nbCharacter", "character-count");
|
||||
ch = getStatistic(ch, md, "nbTab", "table-count");
|
||||
ch = getStatistic(ch, md, "nbObject", "object-count");
|
||||
ch = getStatistic(ch, md, "nbImg", "image-count");
|
||||
|
||||
// Normalise the rest
|
||||
ch = new NSNormalizerContentHandler(ch);
|
||||
return ch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(
|
||||
InputStream stream, ContentHandler handler,
|
||||
Metadata metadata, ParseContext context)
|
||||
throws IOException, SAXException, TikaException {
|
||||
super.parse(stream, handler, metadata, context);
|
||||
// Copy subject to description for OO2
|
||||
String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
|
||||
if (odfSubject != null && !odfSubject.equals("") &&
|
||||
(metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
|
||||
metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,256 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.odf;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
||||
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||
import org.apache.tika.io.TikaInputStream;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.TikaCoreProperties;
|
||||
import org.apache.tika.metadata.TikaMetadataKeys;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AbstractParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.sax.EmbeddedContentHandler;
|
||||
import org.apache.tika.sax.EndDocumentShieldingContentHandler;
|
||||
import org.apache.tika.sax.XHTMLContentHandler;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
/**
|
||||
* OpenOffice parser
|
||||
*/
|
||||
public class OpenDocumentParser extends AbstractParser {
|
||||
|
||||
/**
|
||||
* Serial version UID
|
||||
*/
|
||||
private static final long serialVersionUID = -6410276875438618287L;
|
||||
|
||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
||||
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
|
||||
MediaType.application("vnd.sun.xml.writer"),
|
||||
MediaType.application("vnd.oasis.opendocument.text"),
|
||||
MediaType.application("vnd.oasis.opendocument.graphics"),
|
||||
MediaType.application("vnd.oasis.opendocument.presentation"),
|
||||
MediaType.application("vnd.oasis.opendocument.spreadsheet"),
|
||||
MediaType.application("vnd.oasis.opendocument.chart"),
|
||||
MediaType.application("vnd.oasis.opendocument.image"),
|
||||
MediaType.application("vnd.oasis.opendocument.formula"),
|
||||
MediaType.application("vnd.oasis.opendocument.text-master"),
|
||||
MediaType.application("vnd.oasis.opendocument.text-web"),
|
||||
MediaType.application("vnd.oasis.opendocument.text-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.graphics-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.presentation-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.chart-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.image-template"),
|
||||
MediaType.application("vnd.oasis.opendocument.formula-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.text"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.graphics"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.presentation"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.chart"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.image"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.formula"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.text-master"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.text-web"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.text-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.chart-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.image-template"),
|
||||
MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
|
||||
|
||||
private static final String META_NAME = "meta.xml";
|
||||
|
||||
private Parser meta = new OpenDocumentMetaParser();
|
||||
|
||||
private Parser content = new OpenDocumentContentParser();
|
||||
|
||||
public Parser getMetaParser() {
|
||||
return meta;
|
||||
}
|
||||
|
||||
public void setMetaParser(Parser meta) {
|
||||
this.meta = meta;
|
||||
}
|
||||
|
||||
public Parser getContentParser() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContentParser(Parser content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||
return SUPPORTED_TYPES;
|
||||
}
|
||||
|
||||
public void parse(
|
||||
InputStream stream, ContentHandler baseHandler,
|
||||
Metadata metadata, ParseContext context)
|
||||
throws IOException, SAXException, TikaException {
|
||||
|
||||
// Open the Zip stream
|
||||
// Use a File if we can, and an already open zip is even better
|
||||
ZipFile zipFile = null;
|
||||
ZipInputStream zipStream = null;
|
||||
if (stream instanceof TikaInputStream) {
|
||||
TikaInputStream tis = (TikaInputStream) stream;
|
||||
Object container = ((TikaInputStream) stream).getOpenContainer();
|
||||
if (container instanceof ZipFile) {
|
||||
zipFile = (ZipFile) container;
|
||||
} else if (tis.hasFile()) {
|
||||
zipFile = new ZipFile(tis.getFile());
|
||||
} else {
|
||||
zipStream = new ZipInputStream(stream);
|
||||
}
|
||||
} else {
|
||||
zipStream = new ZipInputStream(stream);
|
||||
}
|
||||
|
||||
// Prepare to handle the content
|
||||
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
|
||||
|
||||
// As we don't know which of the metadata or the content
|
||||
// we'll hit first, catch the endDocument call initially
|
||||
EndDocumentShieldingContentHandler handler =
|
||||
new EndDocumentShieldingContentHandler(xhtml);
|
||||
|
||||
if (zipFile != null) {
|
||||
try {
|
||||
handleZipFile(zipFile, metadata, context, handler);
|
||||
} finally {
|
||||
//Do we want to close silently == catch an exception here?
|
||||
zipFile.close();
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
handleZipStream(zipStream, metadata, context, handler);
|
||||
} finally {
|
||||
//Do we want to close silently == catch an exception here?
|
||||
zipStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Only now call the end document
|
||||
if (handler.getEndDocumentWasCalled()) {
|
||||
handler.reallyEndDocument();
|
||||
}
|
||||
}
|
||||
|
||||
private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
|
||||
ZipEntry entry = zipStream.getNextEntry();
|
||||
if (entry == null) {
|
||||
throw new IOException("No entries found in ZipInputStream");
|
||||
}
|
||||
do {
|
||||
handleZipEntry(entry, zipStream, metadata, context, handler);
|
||||
entry = zipStream.getNextEntry();
|
||||
} while (entry != null);
|
||||
}
|
||||
|
||||
private void handleZipFile(ZipFile zipFile, Metadata metadata,
|
||||
ParseContext context, EndDocumentShieldingContentHandler handler)
|
||||
throws IOException, TikaException, SAXException {
|
||||
// If we can, process the metadata first, then the
|
||||
// rest of the file afterwards (TIKA-1353)
|
||||
// Only possible to guarantee that when opened from a file not a stream
|
||||
|
||||
ZipEntry entry = zipFile.getEntry(META_NAME);
|
||||
if (entry != null) {
|
||||
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
|
||||
}
|
||||
|
||||
Enumeration<? extends ZipEntry> entries = zipFile.entries();
|
||||
while (entries.hasMoreElements()) {
|
||||
entry = entries.nextElement();
|
||||
if (!META_NAME.equals(entry.getName())) {
|
||||
handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
|
||||
}
|
||||
}
|
||||
}
|
||||
private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
|
||||
ParseContext context, EndDocumentShieldingContentHandler handler)
|
||||
throws IOException, SAXException, TikaException {
|
||||
if (entry == null) return;
|
||||
|
||||
if (entry.getName().equals("mimetype")) {
|
||||
String type = IOUtils.toString(zip, UTF_8);
|
||||
metadata.set(Metadata.CONTENT_TYPE, type);
|
||||
} else if (entry.getName().equals(META_NAME)) {
|
||||
meta.parse(zip, new DefaultHandler(), metadata, context);
|
||||
} else if (entry.getName().endsWith("content.xml")) {
|
||||
if (content instanceof OpenDocumentContentParser) {
|
||||
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
|
||||
} else {
|
||||
// Foreign content parser was set:
|
||||
content.parse(zip, handler, metadata, context);
|
||||
}
|
||||
} else if (entry.getName().endsWith("styles.xml")) {
|
||||
if (content instanceof OpenDocumentContentParser) {
|
||||
((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
|
||||
} else {
|
||||
// Foreign content parser was set:
|
||||
content.parse(zip, handler, metadata, context);
|
||||
}
|
||||
} else {
|
||||
String embeddedName = entry.getName();
|
||||
//scrape everything under Thumbnails/ and Pictures/
|
||||
if (embeddedName.contains("Thumbnails/") ||
|
||||
embeddedName.contains("Pictures/")) {
|
||||
EmbeddedDocumentExtractor embeddedDocumentExtractor =
|
||||
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
|
||||
Metadata embeddedMetadata = new Metadata();
|
||||
embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName());
|
||||
/* if (embeddedName.startsWith("Thumbnails/")) {
|
||||
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
|
||||
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
|
||||
}*/
|
||||
if (embeddedName.contains("Pictures/")) {
|
||||
embeddedMetadata.set(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE,
|
||||
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
|
||||
}
|
||||
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
|
||||
embeddedDocumentExtractor.parseEmbedded(zip,
|
||||
new EmbeddedContentHandler(handler), embeddedMetadata, false);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Base class for SAX handlers that map SAX events into document metadata.
|
||||
*
|
||||
* @since Apache Tika 0.10
|
||||
*/
|
||||
class AbstractMetadataHandler extends DefaultHandler {
|
||||
|
||||
private final Metadata metadata;
|
||||
private final Property property;
|
||||
private final String name;
|
||||
|
||||
protected AbstractMetadataHandler(Metadata metadata, String name) {
|
||||
this.metadata = metadata;
|
||||
this.property = null;
|
||||
this.name = name;
|
||||
}
|
||||
protected AbstractMetadataHandler(Metadata metadata, Property property) {
|
||||
this.metadata = metadata;
|
||||
this.property = property;
|
||||
this.name = property.getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given metadata value. The value is ignored if it is
|
||||
* <code>null</code> or empty. If the metadata entry already exists,
|
||||
* then the given value is appended to it with a comma as the separator.
|
||||
*
|
||||
* @param value metadata value
|
||||
*/
|
||||
protected void addMetadata(String value) {
|
||||
if (value != null && value.length() > 0) {
|
||||
if (metadata.isMultiValued(name)) {
|
||||
// Add the value, assuming it's not already there
|
||||
List<String> previous = Arrays.asList(metadata.getValues(name));
|
||||
if (!previous.contains(value)) {
|
||||
if (property != null) {
|
||||
metadata.add(property, value);
|
||||
} else {
|
||||
metadata.add(name, value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Set the value, assuming it's not already there
|
||||
String previous = metadata.get(name);
|
||||
if (previous != null && previous.length() > 0) {
|
||||
if (!previous.equals(value)) {
|
||||
if (property != null) {
|
||||
if (property.isMultiValuePermitted()) {
|
||||
metadata.add(property, value);
|
||||
} else {
|
||||
// Replace the existing value if isMultiValuePermitted is false
|
||||
metadata.set(property, value);
|
||||
}
|
||||
} else {
|
||||
metadata.add(name, value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (property != null) {
|
||||
metadata.set(property, value);
|
||||
} else {
|
||||
metadata.set(name, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
/**
|
||||
* This adds a Metadata entry for a given node.
|
||||
* The textual content of the node is used as the
|
||||
* value, and the Metadata name is taken from
|
||||
* an attribute, with a prefix if required.
|
||||
*/
|
||||
public class AttributeDependantMetadataHandler extends DefaultHandler {
|
||||
|
||||
private final Metadata metadata;
|
||||
|
||||
private final String nameHoldingAttribute;
|
||||
private final String namePrefix;
|
||||
private String name;
|
||||
|
||||
private final StringBuilder buffer = new StringBuilder();
|
||||
|
||||
public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute, String namePrefix) {
|
||||
this.metadata = metadata;
|
||||
this.nameHoldingAttribute = nameHoldingAttribute;
|
||||
this.namePrefix = namePrefix;
|
||||
}
|
||||
|
||||
public void addMetadata(String value) {
|
||||
if(name == null || name.length() == 0) {
|
||||
// We didn't find the attribute which holds the name
|
||||
return;
|
||||
}
|
||||
if (value.length() > 0) {
|
||||
String previous = metadata.get(name);
|
||||
if (previous != null && previous.length() > 0) {
|
||||
value = previous + ", " + value;
|
||||
}
|
||||
metadata.set(name, value);
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String name) {
|
||||
addMetadata(buffer.toString());
|
||||
buffer.setLength(0);
|
||||
}
|
||||
|
||||
public void startElement(
|
||||
String uri, String localName, String name, Attributes attributes) {
|
||||
String rawName = attributes.getValue(nameHoldingAttribute);
|
||||
if (rawName != null) {
|
||||
if (namePrefix == null) {
|
||||
this.name = rawName;
|
||||
} else {
|
||||
this.name = namePrefix + rawName;
|
||||
}
|
||||
}
|
||||
// All other attributes are ignored
|
||||
}
|
||||
|
||||
|
||||
public void characters(char[] ch, int start, int length) {
|
||||
buffer.append(ch, start, length);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* SAX event handler that maps the contents of an XML attribute into
|
||||
* a metadata field.
|
||||
*
|
||||
* @since Apache Tika 0.10
|
||||
*/
|
||||
public class AttributeMetadataHandler extends AbstractMetadataHandler {
|
||||
|
||||
private final String uri;
|
||||
|
||||
private final String localName;
|
||||
|
||||
public AttributeMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, String name) {
|
||||
super(metadata, name);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
}
|
||||
public AttributeMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, Property property) {
|
||||
super(metadata, property);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(
|
||||
String uri, String localName, String qName, Attributes attributes)
|
||||
throws SAXException {
|
||||
for (int i = 0; i < attributes.getLength(); i++) {
|
||||
if (attributes.getURI(i).equals(this.uri)
|
||||
&& attributes.getLocalName(i).equals(this.localName)) {
|
||||
addMetadata(attributes.getValue(i).trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.DublinCore;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.apache.tika.metadata.TikaCoreProperties;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.sax.TeeContentHandler;
|
||||
import org.xml.sax.ContentHandler;
|
||||
|
||||
/**
|
||||
* Dublin Core metadata parser
|
||||
*/
|
||||
public class DcXMLParser extends XMLParser {
|
||||
|
||||
/** Serial version UID */
|
||||
private static final long serialVersionUID = 4905318835463880819L;
|
||||
|
||||
private static ContentHandler getDublinCoreHandler(
|
||||
Metadata metadata, Property property, String element) {
|
||||
return new ElementMetadataHandler(
|
||||
DublinCore.NAMESPACE_URI_DC, element,
|
||||
metadata, property);
|
||||
}
|
||||
|
||||
protected ContentHandler getContentHandler(
|
||||
ContentHandler handler, Metadata metadata, ParseContext context) {
|
||||
return new TeeContentHandler(
|
||||
super.getContentHandler(handler, metadata, context),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"),
|
||||
getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights"));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,241 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.xml.sax.Attributes;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* SAX event handler that maps the contents of an XML element into
|
||||
* a metadata field.
|
||||
*
|
||||
* @since Apache Tika 0.10
|
||||
*/
|
||||
public class ElementMetadataHandler extends AbstractMetadataHandler {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ElementMetadataHandler.class);
|
||||
|
||||
private static final String LOCAL_NAME_RDF_BAG = "Bag";
|
||||
private static final String LOCAL_NAME_RDF_LI = "li";
|
||||
private static final String URI_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||||
|
||||
private final String uri;
|
||||
|
||||
private final String localName;
|
||||
|
||||
private final Metadata metadata;
|
||||
|
||||
private final String name;
|
||||
private Property targetProperty;
|
||||
|
||||
private final boolean allowDuplicateValues;
|
||||
private final boolean allowEmptyValues;
|
||||
|
||||
/**
|
||||
* The buffer used to capture characters when inside a bag li element.
|
||||
*/
|
||||
private final StringBuilder bufferBagged = new StringBuilder();
|
||||
|
||||
/**
|
||||
* The buffer used to capture characters inside standard elements.
|
||||
*/
|
||||
private final StringBuilder bufferBagless = new StringBuilder();
|
||||
|
||||
/**
|
||||
* Whether or not the value was found in a standard element structure or inside a bag.
|
||||
*/
|
||||
private boolean isBagless = true;
|
||||
|
||||
private int matchLevel = 0;
|
||||
private int parentMatchLevel = 0;
|
||||
|
||||
/**
|
||||
* Constructor for string metadata keys.
|
||||
*
|
||||
* @param uri the uri of the namespace of the element
|
||||
* @param localName the local name of the element
|
||||
* @param metadata the Tika metadata object to populate
|
||||
* @param name the Tika metadata field key
|
||||
*/
|
||||
public ElementMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, String name) {
|
||||
super(metadata, name);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
this.metadata = metadata;
|
||||
this.name = name;
|
||||
this.allowDuplicateValues = false;
|
||||
this.allowEmptyValues = false;
|
||||
LOG.trace("created simple handler for {}", this.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for string metadata keys which allows change of behavior
|
||||
* for duplicate and empty entry values.
|
||||
*
|
||||
* @param uri the uri of the namespace of the element
|
||||
* @param localName the local name of the element
|
||||
* @param metadata the Tika metadata object to populate
|
||||
* @param name the Tika metadata field key
|
||||
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
||||
* @param allowEmptyValues add empty values to the Tika metadata
|
||||
*/
|
||||
public ElementMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, String name, boolean allowDuplicateValues, boolean allowEmptyValues) {
|
||||
super(metadata, name);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
this.metadata = metadata;
|
||||
this.name = name;
|
||||
this.allowDuplicateValues = allowDuplicateValues;
|
||||
this.allowEmptyValues = allowEmptyValues;
|
||||
LOG.trace("created simple handler for {}", this.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for Property metadata keys.
|
||||
*
|
||||
* @param uri the uri of the namespace of the element
|
||||
* @param localName the local name of the element
|
||||
* @param metadata the Tika metadata object to populate
|
||||
* @param targetProperty the Tika metadata Property key
|
||||
*/
|
||||
public ElementMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, Property targetProperty) {
|
||||
super(metadata, targetProperty);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
this.metadata = metadata;
|
||||
this.targetProperty = targetProperty;
|
||||
this.name = targetProperty.getName();
|
||||
this.allowDuplicateValues = false;
|
||||
this.allowEmptyValues = false;
|
||||
LOG.trace("created property handler for {}", this.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for Property metadata keys which allows change of behavior
|
||||
* for duplicate and empty entry values.
|
||||
*
|
||||
* @param uri the uri of the namespace of the element
|
||||
* @param localName the local name of the element
|
||||
* @param metadata the Tika metadata object to populate
|
||||
* @param targetProperty the Tika metadata Property key
|
||||
* @param allowDuplicateValues add duplicate values to the Tika metadata
|
||||
* @param allowEmptyValues add empty values to the Tika metadata
|
||||
*/
|
||||
public ElementMetadataHandler(
|
||||
String uri, String localName, Metadata metadata, Property targetProperty, boolean allowDuplicateValues, boolean allowEmptyValues) {
|
||||
super(metadata, targetProperty);
|
||||
this.uri = uri;
|
||||
this.localName = localName;
|
||||
this.metadata = metadata;
|
||||
this.targetProperty = targetProperty;
|
||||
this.name = targetProperty.getName();
|
||||
this.allowDuplicateValues = allowDuplicateValues;
|
||||
this.allowEmptyValues = allowEmptyValues;
|
||||
LOG.trace("created property handler for {}", this.name);
|
||||
}
|
||||
|
||||
protected boolean isMatchingParentElement(String uri, String localName) {
|
||||
return (uri.equals(this.uri) && localName.equals(this.localName));
|
||||
}
|
||||
|
||||
protected boolean isMatchingElement(String uri, String localName) {
|
||||
// match if we're inside the parent element or within some bag element
|
||||
return (uri.equals(this.uri) && localName.equals(this.localName)) ||
|
||||
(parentMatchLevel > 0 &&
|
||||
((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) ||
|
||||
(uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(
|
||||
String uri, String localName, String name, Attributes attributes) {
|
||||
if (isMatchingElement(uri, localName)) {
|
||||
matchLevel++;
|
||||
}
|
||||
if (isMatchingParentElement(uri, localName)) {
|
||||
parentMatchLevel++;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String name) {
|
||||
if (isMatchingParentElement(uri, localName)) {
|
||||
parentMatchLevel--;
|
||||
}
|
||||
if (isMatchingElement(uri, localName)) {
|
||||
matchLevel--;
|
||||
if (matchLevel == 2) {
|
||||
// we're inside a bag li element, add the bagged buffer
|
||||
addMetadata(bufferBagged.toString().trim());
|
||||
bufferBagged.setLength(0);
|
||||
isBagless = false;
|
||||
}
|
||||
if (matchLevel == 0 && isBagless) {
|
||||
String valueBagless = bufferBagless.toString();
|
||||
if (valueBagless.length() > 0 && !valueBagless.contains(LOCAL_NAME_RDF_BAG)) {
|
||||
// we're in a standard element, add the bagless buffer
|
||||
addMetadata(valueBagless.trim());
|
||||
bufferBagless.setLength(0);
|
||||
}
|
||||
isBagless = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length) {
|
||||
// We need to append to both buffers since we don't if we're inside a bag until we're done
|
||||
if (parentMatchLevel > 0 && matchLevel > 2) {
|
||||
bufferBagged.append(ch, start, length);
|
||||
}
|
||||
if (parentMatchLevel > 0 && matchLevel > 0) {
|
||||
bufferBagless.append(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ignorableWhitespace(char[] ch, int start, int length) {
|
||||
characters(ch, start, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addMetadata(String value) {
|
||||
LOG.trace("adding {}={}", name, value);
|
||||
if (targetProperty != null && targetProperty.isMultiValuePermitted()) {
|
||||
if ((value != null && value.length() > 0) || allowEmptyValues) {
|
||||
if (value == null || value.length() == 0 && allowEmptyValues) {
|
||||
value = "";
|
||||
}
|
||||
String[] previous = metadata.getValues(name);
|
||||
if (previous == null || !Arrays.asList(previous).contains(value) || allowDuplicateValues) {
|
||||
metadata.add(targetProperty, value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
super.addMetadata(value);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
|
||||
import org.apache.tika.extractor.EmbeddedDocumentUtil;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.TikaMetadataKeys;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
public class FictionBookParser extends XMLParser {
|
||||
private static final long serialVersionUID = 4195954546491524374L;
|
||||
|
||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
||||
Collections.singleton(MediaType.application("x-fictionbook+xml"));
|
||||
@Override
|
||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||
return SUPPORTED_TYPES;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata, ParseContext context) {
|
||||
return new BinaryElementsDataHandler(
|
||||
EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler);
|
||||
}
|
||||
|
||||
private static class BinaryElementsDataHandler extends DefaultHandler {
|
||||
private static final String ELEMENT_BINARY = "binary";
|
||||
|
||||
private boolean binaryMode = false;
|
||||
private static final String ATTRIBUTE_ID = "id";
|
||||
|
||||
private final EmbeddedDocumentExtractor partExtractor;
|
||||
private final ContentHandler handler;
|
||||
private final StringBuilder binaryData = new StringBuilder();
|
||||
private Metadata metadata;
|
||||
private static final String ATTRIBUTE_CONTENT_TYPE = "content-type";
|
||||
|
||||
private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor, ContentHandler handler) {
|
||||
this.partExtractor = partExtractor;
|
||||
this.handler = handler;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
|
||||
binaryMode = ELEMENT_BINARY.equals(localName);
|
||||
if (binaryMode) {
|
||||
binaryData.setLength(0);
|
||||
metadata = new Metadata();
|
||||
|
||||
metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
|
||||
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endElement(String uri, String localName, String qName) throws SAXException {
|
||||
if (binaryMode) {
|
||||
try {
|
||||
partExtractor.parseEmbedded(
|
||||
new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())),
|
||||
handler,
|
||||
metadata,
|
||||
true
|
||||
);
|
||||
} catch (IOException e) {
|
||||
throw new SAXException("IOException in parseEmbedded", e);
|
||||
}
|
||||
|
||||
binaryMode = false;
|
||||
binaryData.setLength(0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||
if (!binaryMode) {
|
||||
handler.characters(ch, start, length);
|
||||
} else {
|
||||
binaryData.append(ch, start, length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
|
||||
handler.ignorableWhitespace(ch, start, length);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.Property;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
/**
|
||||
* This adds Metadata entries with a specified name for
|
||||
* the textual content of a node (if present), and
|
||||
* all attribute values passed through the matcher
|
||||
* (but not their names).
|
||||
*
|
||||
* @deprecated Use the {@link AttributeMetadataHandler} and
|
||||
* {@link ElementMetadataHandler} classes instead
|
||||
*/
|
||||
public class MetadataHandler extends DefaultHandler {
|
||||
|
||||
private final Metadata metadata;
|
||||
|
||||
private final Property property;
|
||||
private final String name;
|
||||
|
||||
private final StringBuilder buffer = new StringBuilder();
|
||||
|
||||
public MetadataHandler(Metadata metadata, String name) {
|
||||
this.metadata = metadata;
|
||||
this.property = null;
|
||||
this.name = name;
|
||||
}
|
||||
public MetadataHandler(Metadata metadata, Property property) {
|
||||
this.metadata = metadata;
|
||||
this.property = property;
|
||||
this.name = property.getName();
|
||||
}
|
||||
|
||||
public void addMetadata(String value) {
|
||||
if (value.length() > 0) {
|
||||
String previous = metadata.get(name);
|
||||
if (previous != null && previous.length() > 0) {
|
||||
value = previous + ", " + value;
|
||||
}
|
||||
|
||||
if (this.property != null) {
|
||||
metadata.set(property, value);
|
||||
} else {
|
||||
metadata.set(name, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String name) {
|
||||
addMetadata(buffer.toString());
|
||||
buffer.setLength(0);
|
||||
}
|
||||
|
||||
public void startElement(
|
||||
String uri, String localName, String name, Attributes attributes) {
|
||||
for (int i = 0; i < attributes.getLength(); i++) {
|
||||
addMetadata(attributes.getValue(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void characters(char[] ch, int start, int length) {
|
||||
buffer.append(ch, start, length);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.tika.parser.xml;
|
||||
|
||||
import org.apache.commons.io.input.CloseShieldInputStream;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AbstractParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.sax.EmbeddedContentHandler;
|
||||
import org.apache.tika.sax.OfflineContentHandler;
|
||||
import org.apache.tika.sax.TaggedContentHandler;
|
||||
import org.apache.tika.sax.TextContentHandler;
|
||||
import org.apache.tika.sax.XHTMLContentHandler;
|
||||
import org.apache.tika.utils.XMLReaderUtils;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* XML parser.
|
||||
*/
|
||||
public class XMLParser extends AbstractParser {
|
||||
|
||||
/** Serial version UID */
|
||||
private static final long serialVersionUID = -6028836725280212837L;
|
||||
|
||||
private static final Set<MediaType> SUPPORTED_TYPES =
|
||||
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
|
||||
MediaType.application("xml"),
|
||||
MediaType.image("svg+xml"))));
|
||||
|
||||
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||
return SUPPORTED_TYPES;
|
||||
}
|
||||
|
||||
public void parse(
|
||||
InputStream stream, ContentHandler handler,
|
||||
Metadata metadata, ParseContext context)
|
||||
throws IOException, SAXException, TikaException {
|
||||
if (metadata.get(Metadata.CONTENT_TYPE) == null) {
|
||||
metadata.set(Metadata.CONTENT_TYPE, "application/xml");
|
||||
}
|
||||
|
||||
final XHTMLContentHandler xhtml =
|
||||
new XHTMLContentHandler(handler, metadata);
|
||||
xhtml.startDocument();
|
||||
xhtml.startElement("p");
|
||||
|
||||
TaggedContentHandler tagged = new TaggedContentHandler(handler);
|
||||
try {
|
||||
XMLReaderUtils.parseSAX(
|
||||
new CloseShieldInputStream(stream),
|
||||
new OfflineContentHandler(new EmbeddedContentHandler(
|
||||
getContentHandler(tagged, metadata, context))), context);
|
||||
} catch (SAXException e) {
|
||||
tagged.throwIfCauseOf(e);
|
||||
throw new TikaException("XML parse error", e);
|
||||
} finally {
|
||||
xhtml.endElement("p");
|
||||
xhtml.endDocument();
|
||||
}
|
||||
}
|
||||
|
||||
protected ContentHandler getContentHandler(
|
||||
ContentHandler handler, Metadata metadata, ParseContext context) {
|
||||
return new TextContentHandler(handler, true);
|
||||
}
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package docspell.extract
|
||||
|
||||
import docspell.extract.ocr.OcrConfig
|
||||
|
||||
case class ExtractConfig(ocr: OcrConfig, pdf: PdfConfig)
|
@ -0,0 +1,39 @@
|
||||
package docspell.extract
|
||||
|
||||
import docspell.common.MimeType
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
sealed trait ExtractResult {
|
||||
|
||||
def textOption: Option[String]
|
||||
|
||||
}
|
||||
|
||||
object ExtractResult {
|
||||
|
||||
case class UnsupportedFormat(mime: MimeType) extends ExtractResult {
|
||||
val textOption = None
|
||||
}
|
||||
def unsupportedFormat(mt: MimeType): ExtractResult =
|
||||
UnsupportedFormat(mt)
|
||||
|
||||
case class Failure(ex: Throwable) extends ExtractResult {
|
||||
val textOption = None
|
||||
}
|
||||
def failure(ex: Throwable): ExtractResult =
|
||||
Failure(ex)
|
||||
|
||||
case class Success(text: String) extends ExtractResult {
|
||||
val textOption = Some(text)
|
||||
}
|
||||
def success(text: String): ExtractResult =
|
||||
Success(text)
|
||||
|
||||
def fromTry(r: Try[String]): ExtractResult =
|
||||
r.fold(Failure.apply, Success.apply)
|
||||
|
||||
def fromEither(e: Either[Throwable, String]): ExtractResult =
|
||||
e.fold(failure, success)
|
||||
|
||||
}
|
@ -0,0 +1,88 @@
|
||||
package docspell.extract
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import docspell.common._
|
||||
import docspell.extract.ocr.{OcrType, TextExtract}
|
||||
import docspell.extract.odf.{OdfExtract, OdfType}
|
||||
import docspell.extract.poi.{PoiExtract, PoiType}
|
||||
import docspell.extract.rtf.RtfExtract
|
||||
import fs2.Stream
|
||||
import docspell.files.TikaMimetype
|
||||
import docspell.files.ImageSize
|
||||
|
||||
trait Extraction[F[_]] {
|
||||
|
||||
def extractText(data: Stream[F, Byte], dataType: DataType, lang: Language): F[ExtractResult]
|
||||
|
||||
}
|
||||
|
||||
object Extraction {
|
||||
|
||||
def create[F[_]: Sync: ContextShift](
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
cfg: ExtractConfig
|
||||
): Extraction[F] =
|
||||
new Extraction[F] {
|
||||
def extractText(
|
||||
data: Stream[F, Byte],
|
||||
dataType: DataType,
|
||||
lang: Language
|
||||
): F[ExtractResult] = {
|
||||
TikaMimetype.resolve(dataType, data).flatMap {
|
||||
case MimeType.pdf =>
|
||||
PdfExtract
|
||||
.get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger)
|
||||
.map(ExtractResult.fromEither)
|
||||
|
||||
case PoiType(mt) =>
|
||||
PoiExtract.get(data, mt).map(ExtractResult.fromEither)
|
||||
|
||||
case RtfExtract.rtfType =>
|
||||
RtfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
||||
case OdfType(_) =>
|
||||
OdfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
||||
case OcrType(mt) =>
|
||||
val doExtract = TextExtract
|
||||
.extractOCR(data, blocker, logger, lang.iso3, cfg.ocr)
|
||||
.compile
|
||||
.lastOrError
|
||||
.attempt
|
||||
.map(ExtractResult.fromEither)
|
||||
|
||||
ImageSize.get(data).flatMap {
|
||||
case Some(dim) =>
|
||||
if (dim.product > cfg.ocr.maxImageSize) {
|
||||
logger.info(s"Image size (${dim.product}) is too large (max ${cfg.ocr.maxImageSize}).") *>
|
||||
ExtractResult.failure(new Exception(
|
||||
s"Image size (${dim.width}x${dim.height}) is too large (max ${cfg.ocr.maxImageSize}).")
|
||||
).pure[F]
|
||||
} else {
|
||||
doExtract
|
||||
}
|
||||
case None =>
|
||||
logger.info(s"Cannot read image data from ${mt.asString}. Extracting anyways.") *>
|
||||
doExtract
|
||||
}
|
||||
|
||||
case OdfType.container =>
|
||||
logger.info(s"File detected as ${OdfType.container}. Try to read as OpenDocument file.") *>
|
||||
OdfExtract.get(data).map(ExtractResult.fromEither)
|
||||
|
||||
case mt@MimeType("text", sub) if !sub.contains("html") =>
|
||||
logger.info(s"File detected as ${mt.asString}. Returning itself as text.") *>
|
||||
data.through(fs2.text.utf8Decode).compile.last.map { txt =>
|
||||
ExtractResult.success(txt.getOrElse("").trim)
|
||||
}
|
||||
|
||||
case mt =>
|
||||
ExtractResult.unsupportedFormat(mt).pure[F]
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
package docspell.extract
|
||||
|
||||
case class PdfConfig (minTextLen: Int)
|
@ -0,0 +1,51 @@
|
||||
package docspell.extract
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import docspell.common.{Language, Logger}
|
||||
import docspell.extract.ocr.{OcrConfig, TextExtract}
|
||||
import docspell.extract.pdfbox.PdfboxExtract
|
||||
|
||||
object PdfExtract {
|
||||
|
||||
def get[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
lang: Language,
|
||||
stripMinLen: Int,
|
||||
ocrCfg: OcrConfig,
|
||||
logger: Logger[F]
|
||||
): F[Either[Throwable, String]] = {
|
||||
|
||||
val runOcr =
|
||||
TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError
|
||||
|
||||
def chooseResult(ocrStr: String, strippedStr: String) =
|
||||
if (ocrStr.length > strippedStr.length)
|
||||
logger.info(
|
||||
s"Using OCR text, as it is longer (${ocrStr.length} > ${strippedStr.length})"
|
||||
) *> ocrStr.pure[F]
|
||||
else
|
||||
logger.info(
|
||||
s"Using stripped text (not OCR), as it is longer (${strippedStr.length} > ${ocrStr.length})"
|
||||
) *> strippedStr.pure[F]
|
||||
|
||||
//maybe better: inspect the pdf and decide whether ocr or not
|
||||
for {
|
||||
pdfboxRes <- logger.debug("Trying to strip text from pdf using pdfbox.") *> PdfboxExtract.get[F](in)
|
||||
res <- pdfboxRes.fold(
|
||||
ex =>
|
||||
logger.info(
|
||||
s"Stripping text from PDF resulted in an error: ${ex.getMessage}. Trying with OCR. "
|
||||
) >> runOcr.attempt,
|
||||
str =>
|
||||
if (str.length >= stripMinLen) str.pure[F].attempt
|
||||
else
|
||||
logger
|
||||
.info(s"Stripped text from PDF is small (${str.length}). Trying with OCR.") *>
|
||||
runOcr.flatMap(ocrStr => chooseResult(ocrStr, str)).attempt
|
||||
)
|
||||
} yield res
|
||||
}
|
||||
}
|
@ -1,28 +1,28 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.extract.ocr
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.effect.{Blocker, ContextShift, Sync}
|
||||
import fs2.Stream
|
||||
import org.log4s._
|
||||
import docspell.common._
|
||||
|
||||
object Ocr {
|
||||
private[this] val logger = getLogger
|
||||
|
||||
/** Extract the text of all pages in the given pdf file.
|
||||
*/
|
||||
def extractPdf[F[_]: Sync: ContextShift](
|
||||
pdf: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
|
||||
runGhostscript(pdf, config, wd, blocker)
|
||||
.flatMap({ tmpImg =>
|
||||
runTesseractFile(tmpImg, blocker, lang, config)
|
||||
})
|
||||
config: OcrConfig
|
||||
): F[Option[String]] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
||||
runGhostscript(pdf, config, wd, blocker, logger)
|
||||
.flatMap(tmpImg => runTesseractFile(tmpImg, blocker, logger, lang, config))
|
||||
.fold1(_ + "\n\n\n" + _)
|
||||
.compile
|
||||
.last
|
||||
}
|
||||
|
||||
/** Extract the text from the given image file
|
||||
@ -30,41 +30,45 @@ object Ocr {
|
||||
def extractImage[F[_]: Sync: ContextShift](
|
||||
img: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] =
|
||||
runTesseractStdin(img, blocker, lang, config)
|
||||
runTesseractStdin(img, blocker, logger, lang, config)
|
||||
|
||||
def extractPdFFile[F[_]: Sync: ContextShift](
|
||||
pdf: Path,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
): Stream[F, String] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf") { wd =>
|
||||
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker)
|
||||
.flatMap({ tif =>
|
||||
runTesseractFile(tif, blocker, lang, config)
|
||||
})
|
||||
config: OcrConfig
|
||||
): F[Option[String]] =
|
||||
File.withTempDir(config.ghostscript.workingDir, "extractpdf").use { wd =>
|
||||
runGhostscriptFile(pdf, config.ghostscript.command, wd, blocker, logger)
|
||||
.flatMap(tif => runTesseractFile(tif, blocker, logger, lang, config))
|
||||
.fold1(_ + "\n\n\n" + _)
|
||||
.compile
|
||||
.last
|
||||
}
|
||||
|
||||
def extractImageFile[F[_]: Sync: ContextShift](
|
||||
img: Path,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] =
|
||||
runTesseractFile(img, blocker, lang, config)
|
||||
runTesseractFile(img, blocker, logger, lang, config)
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runGhostscript[F[_]: Sync: ContextShift](
|
||||
private[extract] def runGhostscript[F[_]: Sync: ContextShift](
|
||||
pdf: Stream[F, Byte],
|
||||
cfg: Config,
|
||||
cfg: OcrConfig,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Path] = {
|
||||
val xargs =
|
||||
if (cfg.pageRange.begin > 0)
|
||||
@ -72,44 +76,37 @@ object Ocr {
|
||||
else cfg.ghostscript.command.args
|
||||
val cmd = cfg.ghostscript.command
|
||||
.copy(args = xargs)
|
||||
.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> "-",
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
.replace(
|
||||
Map(
|
||||
"{{infile}}" -> "-",
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess(cmd, blocker, wd = Some(wd), stdin = pdf)
|
||||
.evalMap({ _ =>
|
||||
File.listFiles(pathEndsWith(".tif"), wd)
|
||||
})
|
||||
.execSuccess(cmd, blocker, logger, wd = Some(wd), stdin = pdf)
|
||||
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
|
||||
.flatMap(fs => Stream.emits(fs))
|
||||
}
|
||||
|
||||
/** Run ghostscript to extract all pdf pages into tiff files. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runGhostscriptFile[F[_]: Sync: ContextShift](
|
||||
private[extract] def runGhostscriptFile[F[_]: Sync: ContextShift](
|
||||
pdf: Path,
|
||||
ghostscript: Config.Command,
|
||||
ghostscript: SystemCommand.Config,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Path] = {
|
||||
val cmd = ghostscript.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
val cmd = ghostscript.replace(
|
||||
Map(
|
||||
"{{infile}}" -> pdf.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> "%d.tif"
|
||||
)
|
||||
)
|
||||
SystemCommand
|
||||
.execSuccess[F](cmd, blocker, wd = Some(wd))
|
||||
.evalMap({ _ =>
|
||||
File.listFiles(pathEndsWith(".tif"), wd)
|
||||
})
|
||||
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
|
||||
.evalMap(_ => File.listFiles(pathEndsWith(".tif"), wd))
|
||||
.flatMap(fs => Stream.emits(fs))
|
||||
}
|
||||
|
||||
@ -119,68 +116,63 @@ object Ocr {
|
||||
/** Run unpaper to optimize the image for ocr. The
|
||||
* files are stored to a temporary location on disk and returned.
|
||||
*/
|
||||
private[text] def runUnpaperFile[F[_]: Sync: ContextShift](
|
||||
private[extract] def runUnpaperFile[F[_]: Sync: ContextShift](
|
||||
img: Path,
|
||||
unpaper: Config.Command,
|
||||
unpaper: SystemCommand.Config,
|
||||
wd: Path,
|
||||
blocker: Blocker
|
||||
blocker: Blocker,
|
||||
logger: Logger[F]
|
||||
): Stream[F, Path] = {
|
||||
val targetFile = img.resolveSibling("u-" + img.getFileName.toString).toAbsolutePath
|
||||
val cmd = unpaper.mapArgs(
|
||||
replace(
|
||||
Map(
|
||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)
|
||||
val cmd = unpaper.replace(
|
||||
Map(
|
||||
"{{infile}}" -> img.toAbsolutePath.toString,
|
||||
"{{outfile}}" -> targetFile.toString
|
||||
)
|
||||
)
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(wd)).map(_ => targetFile).handleErrorWith {
|
||||
th =>
|
||||
SystemCommand
|
||||
.execSuccess[F](cmd, blocker, logger, wd = Some(wd))
|
||||
.map(_ => targetFile)
|
||||
.handleErrorWith { th =>
|
||||
logger
|
||||
.warn(s"Unpaper command failed: ${th.getMessage}. Using input file for text extraction.")
|
||||
Stream.emit(img)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
*/
|
||||
private[text] def runTesseractFile[F[_]: Sync: ContextShift](
|
||||
private[extract] def runTesseractFile[F[_]: Sync: ContextShift](
|
||||
img: Path,
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] =
|
||||
// tesseract cannot cope with absolute filenames
|
||||
// so use the parent as working dir
|
||||
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker).flatMap { uimg =>
|
||||
val cmd = config.tesseract.command.mapArgs(
|
||||
replace(Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)))
|
||||
)
|
||||
SystemCommand.execSuccess[F](cmd, blocker, wd = Some(uimg.getParent)).map(_.stdout)
|
||||
runUnpaperFile(img, config.unpaper.command, img.getParent, blocker, logger).flatMap { uimg =>
|
||||
val cmd = config.tesseract.command
|
||||
.replace(Map("{{file}}" -> uimg.getFileName.toString, "{{lang}}" -> fixLanguage(lang)))
|
||||
SystemCommand.execSuccess[F](cmd, blocker, logger, wd = Some(uimg.getParent)).map(_.stdout)
|
||||
}
|
||||
|
||||
/** Run tesseract on the given image file and return the extracted
|
||||
* text.
|
||||
*/
|
||||
private[text] def runTesseractStdin[F[_]: Sync: ContextShift](
|
||||
private[extract] def runTesseractStdin[F[_]: Sync: ContextShift](
|
||||
img: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] = {
|
||||
val cmd = config.tesseract.command
|
||||
.mapArgs(replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang))))
|
||||
SystemCommand.execSuccess(cmd, blocker, stdin = img).map(_.stdout)
|
||||
.replace(Map("{{file}}" -> "stdin", "{{lang}}" -> fixLanguage(lang)))
|
||||
SystemCommand.execSuccess(cmd, blocker, logger, stdin = img).map(_.stdout)
|
||||
}
|
||||
|
||||
private def replace(repl: Map[String, String]): String => String =
|
||||
s =>
|
||||
repl.foldLeft(s) {
|
||||
case (res, (k, v)) =>
|
||||
res.replace(k, v)
|
||||
}
|
||||
|
||||
private def fixLanguage(lang: String): String =
|
||||
lang match {
|
||||
case "de" => "deu"
|
@ -0,0 +1,52 @@
|
||||
package docspell.extract.ocr
|
||||
|
||||
import java.nio.file.{Path, Paths}
|
||||
|
||||
import docspell.common._
|
||||
|
||||
case class OcrConfig(
|
||||
maxImageSize: Int,
|
||||
ghostscript: OcrConfig.Ghostscript,
|
||||
pageRange: OcrConfig.PageRange,
|
||||
unpaper: OcrConfig.Unpaper,
|
||||
tesseract: OcrConfig.Tesseract
|
||||
) {
|
||||
}
|
||||
|
||||
object OcrConfig {
|
||||
|
||||
case class PageRange(begin: Int)
|
||||
|
||||
case class Ghostscript(command: SystemCommand.Config, workingDir: Path)
|
||||
|
||||
case class Tesseract(command: SystemCommand.Config)
|
||||
|
||||
case class Unpaper(command: SystemCommand.Config)
|
||||
|
||||
val default = OcrConfig(
|
||||
maxImageSize = 3000 * 3000,
|
||||
pageRange = PageRange(10),
|
||||
ghostscript = Ghostscript(
|
||||
SystemCommand.Config(
|
||||
"gs",
|
||||
Seq(
|
||||
"-dNOPAUSE",
|
||||
"-dBATCH",
|
||||
"-dSAFER",
|
||||
"-sDEVICE=tiffscaled8",
|
||||
"-sOutputFile={{outfile}}",
|
||||
"{{infile}}"
|
||||
),
|
||||
Duration.seconds(30)
|
||||
),
|
||||
Paths.get(System.getProperty("java.io.tmpdir")).resolve("docspell-extraction")
|
||||
),
|
||||
unpaper = Unpaper(
|
||||
SystemCommand.Config("unpaper", Seq("{{infile}}", "{{outfile}}"), Duration.seconds(30))
|
||||
),
|
||||
tesseract = Tesseract(
|
||||
SystemCommand
|
||||
.Config("tesseract", Seq("{{file}}", "stdout", "-l", "{{lang}}"), Duration.minutes(1))
|
||||
)
|
||||
)
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package docspell.extract.ocr
|
||||
|
||||
import docspell.common.MimeType
|
||||
|
||||
object OcrType {
|
||||
|
||||
val jpeg = MimeType.jpeg
|
||||
val png = MimeType.png
|
||||
val tiff = MimeType.tiff
|
||||
val pdf = MimeType.pdf
|
||||
|
||||
val all = Set(jpeg, png, tiff, pdf)
|
||||
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(all.contains)
|
||||
}
|
@ -1,7 +1,8 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.extract.ocr
|
||||
|
||||
import cats.effect.{Blocker, ContextShift, Sync}
|
||||
import docspell.common.MimeType
|
||||
import docspell.common._
|
||||
import docspell.files._
|
||||
import fs2.Stream
|
||||
|
||||
object TextExtract {
|
||||
@ -9,28 +10,27 @@ object TextExtract {
|
||||
def extract[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] =
|
||||
extractOCR(in, blocker, lang, config)
|
||||
extractOCR(in, blocker, logger, lang, config)
|
||||
|
||||
def extractOCR[F[_]: Sync: ContextShift](
|
||||
in: Stream[F, Byte],
|
||||
blocker: Blocker,
|
||||
logger: Logger[F],
|
||||
lang: String,
|
||||
config: Config
|
||||
config: OcrConfig
|
||||
): Stream[F, String] =
|
||||
Stream
|
||||
.eval(TikaMimetype.detect(in))
|
||||
.eval(TikaMimetype.detect(in, MimeTypeHint.none))
|
||||
.flatMap({
|
||||
case mt if !config.isAllowed(mt) =>
|
||||
raiseError(s"File `$mt` not allowed")
|
||||
|
||||
case MimeType.pdf =>
|
||||
Ocr.extractPdf(in, blocker, lang, config)
|
||||
Stream.eval(Ocr.extractPdf(in, blocker, logger, lang, config)).unNoneTerminate
|
||||
|
||||
case mt if mt.primary == "image" =>
|
||||
Ocr.extractImage(in, blocker, lang, config)
|
||||
Ocr.extractImage(in, blocker, logger, lang, config)
|
||||
|
||||
case mt =>
|
||||
raiseError(s"File `$mt` not supported")
|
@ -0,0 +1,30 @@
|
||||
package docspell.extract.odf
|
||||
|
||||
import cats.effect._
|
||||
import cats.implicits._
|
||||
import fs2.Stream
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
|
||||
import org.apache.tika.metadata.Metadata
|
||||
import org.apache.tika.parser.ParseContext
|
||||
import org.apache.tika.parser.odf.OpenDocumentParser
|
||||
import org.apache.tika.sax.BodyContentHandler
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
object OdfExtract {
|
||||
|
||||
def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
|
||||
|
||||
|
||||
def get(is: InputStream) = Try {
|
||||
val handler = new BodyContentHandler()
|
||||
val pctx = new ParseContext()
|
||||
val meta = new Metadata()
|
||||
val ooparser = new OpenDocumentParser()
|
||||
ooparser.parse(is, handler, meta, pctx)
|
||||
handler.toString.trim
|
||||
}.toEither
|
||||
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package docspell.extract.odf
|
||||
|
||||
import docspell.common.MimeType
|
||||
|
||||
object OdfType {
|
||||
|
||||
val odt = MimeType.application("vnd.oasis.opendocument.text")
|
||||
val ods = MimeType.application("vnd.oasis.opendocument.spreadsheet")
|
||||
val odtAlias = MimeType.application("x-vnd.oasis.opendocument.text")
|
||||
val odsAlias = MimeType.application("x-vnd.oasis.opendocument.spreadsheet")
|
||||
|
||||
val container = MimeType.zip
|
||||
|
||||
val all = Set(odt, ods, odtAlias, odsAlias)
|
||||
|
||||
def unapply(mt: MimeType): Option[MimeType] =
|
||||
Some(mt).filter(all.contains)
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import org.apache.pdfbox.pdmodel.PDDocument
|
||||
import org.apache.pdfbox.text.PDFTextStripper
|
||||
|
||||
import scala.util.{Try, Using}
|
||||
import fs2.Stream
|
||||
|
||||
object PdfboxExtract {
|
||||
|
||||
def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map { bytes =>
|
||||
Using(PDDocument.load(bytes))(readText).toEither.flatten
|
||||
}
|
||||
|
||||
def get(is: InputStream): Either[Throwable, String] =
|
||||
Using(PDDocument.load(is))(readText).toEither.flatten
|
||||
|
||||
def get(inFile: Path): Either[Throwable, String] =
|
||||
Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten
|
||||
|
||||
private def readText(doc: PDDocument): Either[Throwable, String] =
|
||||
Try {
|
||||
val stripper = new PDFTextStripper()
|
||||
stripper.setAddMoreFormatting(true)
|
||||
stripper.setLineSeparator("\n")
|
||||
stripper.getText(doc).trim // trim here already
|
||||
}.toEither
|
||||
}
|
@ -0,0 +1,88 @@
|
||||
package docspell.extract.poi
|
||||
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
|
||||
import cats.data.EitherT
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import org.apache.poi.hssf.extractor.ExcelExtractor
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook
|
||||
import org.apache.poi.hwpf.extractor.WordExtractor
|
||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook
|
||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument
|
||||
import fs2.Stream
|
||||
|
||||
import scala.util.Try
|
||||
import docspell.common._
|
||||
import docspell.files.TikaMimetype
|
||||
|
||||
object PoiExtract {
|
||||
|
||||
def get[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[Either[Throwable, String]] =
|
||||
TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt))
|
||||
|
||||
def get[F[_]: Sync](data: Stream[F, Byte], mime: MimeType): F[Either[Throwable, String]] =
|
||||
mime match {
|
||||
case PoiType.doc =>
|
||||
getDoc(data)
|
||||
case PoiType.xls =>
|
||||
getXls(data)
|
||||
case PoiType.xlsx =>
|
||||
getXlsx(data)
|
||||
case PoiType.docx =>
|
||||
getDocx(data)
|
||||
case PoiType.msoffice =>
|
||||
EitherT(getDoc[F](data))
|
||||
.recoverWith({
|
||||
case _ => EitherT(getXls[F](data))
|
||||
})
|
||||
.value
|
||||
case PoiType.ooxml =>
|
||||
EitherT(getDocx[F](data))
|
||||
.recoverWith({
|
||||
case _ => EitherT(getXlsx[F](data))
|
||||
})
|
||||
.value
|
||||
case mt =>
|
||||
Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}")))
|
||||
}
|
||||
|
||||
def getDocx(is: InputStream): Either[Throwable, String] =
|
||||
Try {
|
||||
val xt = new XWPFWordExtractor(new XWPFDocument(is))
|
||||
xt.getText.trim
|
||||
}.toEither
|
||||
|
||||
def getDoc(is: InputStream): Either[Throwable, String] =
|
||||
Try {
|
||||
val xt = new WordExtractor(is)
|
||||
xt.getText.trim
|
||||
}.toEither
|
||||
|
||||
def getXlsx(is: InputStream): Either[Throwable, String] =
|
||||
Try {
|
||||
val xt = new XSSFExcelExtractor(new XSSFWorkbook(is))
|
||||
xt.getText.trim
|
||||
}.toEither
|
||||
|
||||
def getXls(is: InputStream): Either[Throwable, String] =
|
||||
Try {
|
||||
val xt = new ExcelExtractor(new HSSFWorkbook(is))
|
||||
xt.getText.trim
|
||||
}.toEither
|
||||
|
||||
def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx)
|
||||
|
||||
def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc)
|
||||
|
||||
def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx)
|
||||
|
||||
def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls)
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package docspell.extract.poi
|
||||
|
||||
import docspell.common.MimeType
|
||||
|
||||
object PoiType {
|
||||
|
||||
val msoffice = MimeType.application("x-tika-msoffice")
|
||||
val ooxml = MimeType.application("x-tika-ooxml")
|
||||
val docx = MimeType.application("vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
val xlsx = MimeType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
val xls = MimeType.application("vnd.ms-excel")
|
||||
val doc = MimeType.application("msword")
|
||||
|
||||
val all = Set(msoffice, ooxml, docx, xlsx, xls, doc)
|
||||
|
||||
def unapply(arg: MimeType): Option[MimeType] =
|
||||
Some(arg).filter(all.contains)
|
||||
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package docspell.extract.rtf
|
||||
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import docspell.common.MimeType
|
||||
import fs2.Stream
|
||||
import javax.swing.text.rtf.RTFEditorKit
|
||||
|
||||
import scala.util.Try
|
||||
|
||||
object RtfExtract {
|
||||
|
||||
val rtfType = MimeType.application("rtf")
|
||||
|
||||
def get(is: InputStream): Either[Throwable, String] =
|
||||
Try {
|
||||
val kit = new RTFEditorKit()
|
||||
val doc = kit.createDefaultDocument()
|
||||
kit.read(is, doc, 0)
|
||||
doc.getText(0, doc.getLength).trim
|
||||
}.toEither
|
||||
|
||||
def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] =
|
||||
data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get)
|
||||
}
|
@ -1,16 +1,19 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.extract.ocr
|
||||
|
||||
import cats.effect.IO
|
||||
import docspell.text.TestFiles
|
||||
import docspell.common.Logger
|
||||
import docspell.files.TestFiles
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object TextExtractionSuite extends SimpleTestSuite {
|
||||
import TestFiles._
|
||||
|
||||
val logger = Logger.log4s[IO](org.log4s.getLogger)
|
||||
|
||||
test("extract english pdf") {
|
||||
ignore()
|
||||
val text = TextExtract
|
||||
.extract[IO](letterSourceEN, blocker, "eng", Config.default)
|
||||
.extract[IO](letterSourceEN, blocker, logger, "eng", OcrConfig.default)
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
||||
@ -21,7 +24,7 @@ object TextExtractionSuite extends SimpleTestSuite {
|
||||
ignore()
|
||||
val expect = TestFiles.letterDEText
|
||||
val extract = TextExtract
|
||||
.extract[IO](letterSourceDE, blocker, "deu", Config.default)
|
||||
.extract[IO](letterSourceDE, blocker, logger, "deu", OcrConfig.default)
|
||||
.compile
|
||||
.lastOrError
|
||||
.unsafeRunSync()
|
@ -0,0 +1,28 @@
|
||||
package docspell.extract.odf
|
||||
|
||||
import cats.effect._
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object OdfExtractTest extends SimpleTestSuite {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val files = List(
|
||||
ExampleFiles.examples_sample_odt -> 6372,
|
||||
ExampleFiles.examples_sample_ods -> 717
|
||||
)
|
||||
|
||||
test("test extract from odt") {
|
||||
files.foreach { case (file, len) =>
|
||||
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
|
||||
val str1 = OdfExtract.get(is).fold(throw _, identity)
|
||||
assertEquals(str1.length, len)
|
||||
|
||||
val data = file.readURL[IO](8192, blocker)
|
||||
val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity)
|
||||
assertEquals(str2, str1)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package docspell.extract.pdfbox
|
||||
|
||||
import cats.effect._
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object PdfboxExtractTest extends SimpleTestSuite {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val textPDFs = List(
|
||||
ExampleFiles.letter_de_pdf -> TestFiles.letterDEText,
|
||||
ExampleFiles.letter_en_pdf -> TestFiles.letterENText
|
||||
)
|
||||
|
||||
test("extract text from text PDFs by inputstream") {
|
||||
textPDFs.foreach {
|
||||
case (file, txt) =>
|
||||
val url = file.toJavaUrl.fold(sys.error, identity)
|
||||
val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity)
|
||||
val received = removeFormatting(str)
|
||||
val expect = removeFormatting(txt)
|
||||
assertEquals(received, expect)
|
||||
}
|
||||
}
|
||||
|
||||
test("extract text from text PDFs via Stream") {
|
||||
textPDFs.foreach {
|
||||
case (file, txt) =>
|
||||
val data = file.readURL[IO](8192, blocker)
|
||||
val str = PdfboxExtract.get(data).unsafeRunSync().fold(throw _, identity)
|
||||
val received = removeFormatting(str)
|
||||
val expect = removeFormatting(txt)
|
||||
assertEquals(received, expect)
|
||||
}
|
||||
}
|
||||
|
||||
test("extract text from image PDFs") {
|
||||
val url = ExampleFiles.scanner_pdf13_pdf.toJavaUrl.fold(sys.error, identity)
|
||||
|
||||
val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity)
|
||||
|
||||
assertEquals(str, "")
|
||||
}
|
||||
|
||||
private def removeFormatting(str: String): String =
|
||||
str.replaceAll("[\\s;:.,\\-]+", "").toLowerCase
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package docspell.extract.poi
|
||||
|
||||
import cats.effect._
|
||||
import docspell.common.MimeTypeHint
|
||||
import docspell.files.{ExampleFiles, TestFiles}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object PoiExtractTest extends SimpleTestSuite {
|
||||
val blocker = TestFiles.blocker
|
||||
implicit val CS = TestFiles.CS
|
||||
|
||||
val officeFiles = List(
|
||||
ExampleFiles.examples_sample_doc -> 6241,
|
||||
ExampleFiles.examples_sample_docx -> 6179,
|
||||
ExampleFiles.examples_sample_xlsx -> 660,
|
||||
ExampleFiles.examples_sample_xls -> 660
|
||||
)
|
||||
|
||||
test("extract text from ms office files") {
|
||||
officeFiles.foreach {
|
||||
case (file, len) =>
|
||||
val str1 = PoiExtract
|
||||
.get[IO](file.readURL[IO](8192, blocker), MimeTypeHint.none)
|
||||
.unsafeRunSync()
|
||||
.fold(throw _, identity)
|
||||
|
||||
val str2 = PoiExtract
|
||||
.get[IO](
|
||||
file.readURL[IO](8192, blocker),
|
||||
MimeTypeHint(Some(file.path.segments.last), None)
|
||||
)
|
||||
.unsafeRunSync()
|
||||
.fold(throw _, identity)
|
||||
|
||||
assertEquals(str1, str2)
|
||||
assertEquals(str1.length, len)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
package docspell.extract.rtf
|
||||
|
||||
import docspell.files.ExampleFiles
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
object RtfExtractTest extends SimpleTestSuite {
|
||||
|
||||
test("extract text from rtf using java input-stream") {
|
||||
val file = ExampleFiles.examples_sample_rtf
|
||||
val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity)
|
||||
val str = RtfExtract.get(is).fold(throw _, identity)
|
||||
assertEquals(str.length, 7342)
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
package docspell.files
|
||||
|
||||
case class Dimension(width: Int, height: Int) {
|
||||
|
||||
def product = width * height
|
||||
|
||||
def toAwtDimension: java.awt.Dimension =
|
||||
new java.awt.Dimension(width, height)
|
||||
}
|
61
modules/files/src/main/scala/docspell/files/ImageSize.scala
Normal file
@ -0,0 +1,61 @@
|
||||
package docspell.files
|
||||
|
||||
import java.io.{ByteArrayInputStream, InputStream}
|
||||
import java.nio.file.Path
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect._
|
||||
import fs2.Stream
|
||||
import javax.imageio.stream.{FileImageInputStream, ImageInputStream}
|
||||
import javax.imageio.{ImageIO, ImageReader}
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
import scala.util.{Try, Using}
|
||||
|
||||
object ImageSize {
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get(file: Path): Option[Dimension] =
|
||||
Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get(in: InputStream): Option[Dimension] =
|
||||
Option(ImageIO.createImageInputStream(in)).flatMap(getDimension)
|
||||
|
||||
/** Return the image size from its header without reading
|
||||
* the whole image into memory.
|
||||
*/
|
||||
def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = {
|
||||
data.take(768).compile.to(Array).map(ar => {
|
||||
val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar))
|
||||
if (iis == null) sys.error("no reader given for the array")
|
||||
else getDimension(iis)
|
||||
})
|
||||
}
|
||||
|
||||
private def getDimension(in: ImageInputStream): Option[Dimension] =
|
||||
ImageIO
|
||||
.getImageReaders(in)
|
||||
.asScala
|
||||
.to(LazyList)
|
||||
.collectFirst(Function.unlift { reader =>
|
||||
val dim = getDimension(in, reader).toOption
|
||||
reader.dispose()
|
||||
dim
|
||||
})
|
||||
|
||||
private def getDimension(
|
||||
in: ImageInputStream,
|
||||
reader: ImageReader
|
||||
): Either[Throwable, Dimension] =
|
||||
Try {
|
||||
reader.setInput(in)
|
||||
val width = reader.getWidth(reader.getMinIndex)
|
||||
val height = reader.getHeight(reader.getMinIndex)
|
||||
Dimension(width, height)
|
||||
}.toEither
|
||||
}
|
@ -1,13 +1,18 @@
|
||||
package docspell.text.ocr
|
||||
package docspell.files
|
||||
|
||||
import java.io.BufferedInputStream
|
||||
import java.nio.file.{Files, Path}
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.Sync
|
||||
import docspell.common.MimeType
|
||||
import docspell.common._
|
||||
import fs2.Stream
|
||||
import org.apache.tika.config.TikaConfig
|
||||
import org.apache.tika.metadata.{HttpHeaders, Metadata, TikaMetadataKeys}
|
||||
import org.apache.tika.mime.MediaType
|
||||
|
||||
import scala.util.Using
|
||||
|
||||
object TikaMimetype {
|
||||
private val tika = new TikaConfig().getDetector
|
||||
|
||||
@ -35,7 +40,20 @@ object TikaMimetype {
|
||||
private def fromBytes(bv: Array[Byte], hint: MimeTypeHint): MimeType =
|
||||
convert(tika.detect(new java.io.ByteArrayInputStream(bv), makeMetadata(hint)))
|
||||
|
||||
def detect[F[_]: Sync](data: Stream[F, Byte]): F[MimeType] =
|
||||
data.take(1024).compile.toVector.map(bytes => fromBytes(bytes.toArray, MimeTypeHint.none))
|
||||
def detect[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[MimeType] =
|
||||
data.take(64).compile.toVector.map(bytes => fromBytes(bytes.toArray, hint))
|
||||
|
||||
def resolve[F[_]: Sync](dt: DataType, data: Stream[F, Byte]): F[MimeType] =
|
||||
dt match {
|
||||
case DataType.Exact(mt) => mt.pure[F]
|
||||
case DataType.Hint(hint) => TikaMimetype.detect(data, hint)
|
||||
}
|
||||
|
||||
def detect[F[_]: Sync](file: Path): F[MimeType] =
|
||||
Sync[F].delay {
|
||||
val hint = MimeTypeHint.filename(file.getFileName.toString)
|
||||
Using(new BufferedInputStream(Files.newInputStream(file), 64))({ in =>
|
||||
convert(tika.detect(in, makeMetadata(hint)))
|
||||
}).toEither
|
||||
}.rethrow
|
||||
}
|
BIN
modules/files/src/test/resources/bombs/20K-gray.jpeg
Normal file
After Width: | Height: | Size: 1.5 MiB |
BIN
modules/files/src/test/resources/bombs/20K-gray.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
modules/files/src/test/resources/bombs/20K-rgb.jpeg
Normal file
After Width: | Height: | Size: 2.2 MiB |
BIN
modules/files/src/test/resources/bombs/20K-rgb.png
Normal file
After Width: | Height: | Size: 1.2 MiB |
BIN
modules/files/src/test/resources/camera/letter-en.jpg
Normal file
After Width: | Height: | Size: 456 KiB |
BIN
modules/files/src/test/resources/camera/letter-en.png
Normal file
After Width: | Height: | Size: 187 KiB |
BIN
modules/files/src/test/resources/camera/letter-en.tiff
Normal file
BIN
modules/files/src/test/resources/examples/sample.doc
Normal file
BIN
modules/files/src/test/resources/examples/sample.docx
Normal file
BIN
modules/files/src/test/resources/examples/sample.ods
Normal file
BIN
modules/files/src/test/resources/examples/sample.odt
Normal file
697
modules/files/src/test/resources/examples/sample.rtf
Normal file
@ -0,0 +1,697 @@
|
||||
{\rtf1\ansi\deff3\adeflang1025
|
||||
{\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\froman\fprq2\fcharset2 Symbol;}{\f2\fswiss\fprq2\fcharset0 Arial;}{\f3\froman\fprq2\fcharset0 Liberation Serif{\*\falt Times New Roman};}{\f4\froman\fprq2\fcharset0 Liberation Sans{\*\falt Arial};}{\f5\froman\fprq2\fcharset0 Symbol;}{\f6\froman\fprq2\fcharset0 OpenSymbol{\*\falt Arial Unicode MS};}{\f7\froman\fprq2\fcharset0 DejaVu Sans;}{\f8\froman\fprq2\fcharset0 Open Sans{\*\falt Arial};}{\f9\fnil\fprq2\fcharset0 Droid Sans Fallback;}{\f10\fnil\fprq2\fcharset0 OpenSymbol{\*\falt Arial Unicode MS};}{\f11\fnil\fprq2\fcharset0 DejaVu Sans;}{\f12\fnil\fprq2\fcharset0 Open Sans{\*\falt Arial};}{\f13\fnil\fprq2\fcharset0 FreeSans;}{\f14\fnil\fprq2\fcharset0 Symbol;}}
|
||||
{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red0\green0\blue10;\red0\green0\blue1;}
|
||||
{\stylesheet{\s0\snext0\ql\nowidctlpar\hyphpar0\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\kerning0\loch\f3\fs24\lang1033 Normal;}
|
||||
{\s1\sbasedon50\snext1\ql\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs36\alang1081\ab\loch\f4\fs36\lang1033 Heading 1;}
|
||||
{\s2\sbasedon50\snext2\ql\nowidctlpar\hyphpar0\sb200\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs32\alang1081\ab\loch\f4\fs32\lang1033 Heading 2;}
|
||||
{\s3\sbasedon50\snext3\ql\nowidctlpar\hyphpar0\sb140\sa120\keepn\ltrpar\cf15\b\dbch\af9\langfe2052\dbch\af13\afs28\alang1081\ab\loch\f4\fs28\lang1033 Heading 3;}
|
||||
{\*\cs15\snext15 WW8Num1z0;}
|
||||
{\*\cs16\snext16 WW8Num1z1;}
|
||||
{\*\cs17\snext17 WW8Num1z2;}
|
||||
{\*\cs18\snext18 WW8Num1z3;}
|
||||
{\*\cs19\snext19 WW8Num1z4;}
|
||||
{\*\cs20\snext20 WW8Num1z5;}
|
||||
{\*\cs21\snext21 WW8Num1z6;}
|
||||
{\*\cs22\snext22 WW8Num1z7;}
|
||||
{\*\cs23\snext23 WW8Num1z8;}
|
||||
{\*\cs24\snext24 WW8Num2z0;}
|
||||
{\*\cs25\snext25 WW8Num2z1;}
|
||||
{\*\cs26\snext26 WW8Num2z2;}
|
||||
{\*\cs27\snext27 WW8Num2z3;}
|
||||
{\*\cs28\snext28 WW8Num2z4;}
|
||||
{\*\cs29\snext29 WW8Num2z5;}
|
||||
{\*\cs30\snext30 WW8Num2z6;}
|
||||
{\*\cs31\snext31 WW8Num2z7;}
|
||||
{\*\cs32\snext32 WW8Num2z8;}
|
||||
{\*\cs33\snext33\dbch\af10\loch\f5 WW8Num3z0;}
|
||||
{\*\cs34\snext34\dbch\af10\loch\f6 WW8Num3z1;}
|
||||
{\*\cs35\snext35\dbch\af10\dbch\af10\loch\f6 Bullets;}
|
||||
{\*\cs36\snext36\cf9\ul\ulc0\langfe255\alang255\lang255 Internet Link;}
|
||||
{\*\cs37\snext37\cf13\ul\ulc0\langfe255\alang255\lang255 Visited Internet Link;}
|
||||
{\*\cs38\snext38\dbch\af14 ListLabel 1;}
|
||||
{\*\cs39\snext39\dbch\af10 ListLabel 2;}
|
||||
{\*\cs40\snext40\b0\dbch\af14\loch\f7\fs21 ListLabel 3;}
|
||||
{\*\cs41\snext41\dbch\af10 ListLabel 4;}
|
||||
{\*\cs42\snext42\dbch\af10 ListLabel 5;}
|
||||
{\*\cs43\snext43\dbch\af14 ListLabel 6;}
|
||||
{\*\cs44\snext44\dbch\af10 ListLabel 7;}
|
||||
{\*\cs45\snext45\dbch\af10 ListLabel 8;}
|
||||
{\*\cs46\snext46\dbch\af14 ListLabel 9;}
|
||||
{\*\cs47\snext47\dbch\af10 ListLabel 10;}
|
||||
{\*\cs48\snext48\dbch\af10 ListLabel 11;}
|
||||
{\*\cs49\snext49\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\loch\f7\fs21 ListLabel 12;}
|
||||
{\s50\sbasedon0\snext51\ql\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs28\alang1081\loch\f4\fs28\lang1033 Heading;}
|
||||
{\s51\sbasedon0\snext51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033 Text Body;}
|
||||
{\s52\sbasedon51\snext52\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033 List;}
|
||||
{\s53\sbasedon0\snext53\ql\nowidctlpar\hyphpar0\sb120\sa120\noline\ltrpar\cf17\i\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\ai\loch\f3\fs24\lang1033 Caption;}
|
||||
{\s54\sbasedon0\snext54\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033 Index;}
|
||||
{\s55\sbasedon0\snext55\ql\nowidctlpar\hyphpar0\li567\ri567\lin567\rin567\fi0\sb0\sa283\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033 Quotations;}
|
||||
{\s56\sbasedon50\snext56\qc\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs56\alang1081\ab\loch\f4\fs56\lang1033 Title;}
|
||||
{\s57\sbasedon50\snext57\qc\nowidctlpar\hyphpar0\sb60\sa120\keepn\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs36\alang1081\loch\f4\fs36\lang1033 Subtitle;}
|
||||
{\s58\sbasedon0\snext58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033 Table Contents;}
|
||||
{\s59\sbasedon58\snext59\qc\nowidctlpar\hyphpar0\noline\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\ab\loch\f3\fs24\lang1033 Table Heading;}
|
||||
}{\*\listtable{\list\listtemplateid1
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-432\li792}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-576\li936}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-720\li1080}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-864\li1224}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-1008\li1368}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-1152\li1512}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-1296\li1656}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-1440\li1800}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi-1584\li1944}\listid1}
|
||||
{\list\listtemplateid2
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u61623 ?;}{\levelnumbers;}\f15\b0\dbch\af14\fi-360\li720}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9702 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li1080}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9642 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li1440}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u61623 ?;}{\levelnumbers;}\f15\dbch\af14\fi-360\li1800}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9702 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li2160}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9642 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li2520}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u61623 ?;}{\levelnumbers;}\f15\dbch\af14\fi-360\li2880}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9702 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li3240}
|
||||
{\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\u9642 ?;}{\levelnumbers;}\f16\dbch\af10\fi-360\li3600}\listid2}
|
||||
{\list\listtemplateid3
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}
|
||||
{\listlevel\levelnfc255\leveljc0\levelstartat1\levelfollow2{\leveltext \'00;}{\levelnumbers;}\fi0\li0}\listid3}
|
||||
}{\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}{\listoverride\listid2\listoverridecount0\ls2}{\listoverride\listid3\listoverridecount0\ls3}}{\*\generator LibreOffice/6.0.7.3$Linux_X86_64 LibreOffice_project/00m0$Build-3}{\info{\creatim\yr2017\mo8\dy2\hr11\min9}{\revtim\yr2019\mo9\dy21\hr14\min2}{\printim\yr0\mo0\dy0\hr0\min0}}{\*\userprops}\deftab709
|
||||
\hyphauto0\viewscale100
|
||||
{\*\pgdsctbl
|
||||
{\pgdsc0\pgdscuse451\pgwsxn11906\pghsxn16838\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\pgdscnxt0 Default Style;}}
|
||||
\formshade{\*\pgdscno0}\paperh16838\paperw11906\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\sectunlocked1\pgndec\pgwsxn11906\pghsxn16838\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc\htmautsp
|
||||
{\*\ftnsep\chftnsep}\viewbksp1{\*\background{\shp{\*\shpinst{\sp{\sn shapeType}{\sv 1}}{\sp{\sn fillColor}{\sv 16777215}}}}}\pgndec\pard\plain \s56\qc\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs56\alang1081\ab\loch\f4\fs56\lang1033\qc\sb240\sa120{\cbpat8\cbpat8\fs21\rtlch \ltrch\loch
|
||||
Lorem ipsum }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s1\ql\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs36\alang1081\ab\loch\f4\fs36\lang1033{\listtext\pard\plain }\ilvl0\ls1 \li792\ri0\lin792\rin0\fi-432\li0\ri0\lin0\rin0\fi-432\sb240\sa120\keepn{\rtlch \ltrch\loch
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ac faucibus odio. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Vestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. }{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b\dbch\af11\ab\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. }{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. }{\scaps0\caps0\cf1\expnd0\expndtw0\i\b0\dbch\af11\ai\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. }{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Nulla eget justo in felis tristique fringilla. Morbi sit amet tortor quis risus auctor condimentum. Morbi in ullamcorper elit. Nulla iaculis tellus sit amet mauris tempus fringilla.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Maecenas mauris lectus, lobortis et purus mattis, blandit dictum tellus.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b\dbch\af11\ab\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Maecenas non lorem quis tellus placerat varius. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i\b0\dbch\af11\ai\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Nulla facilisi. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\ul\ulc0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Aenean congue fringilla justo ut aliquam. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{{\field{\*\fldinst HYPERLINK "https://products.office.com/en-us/word" }{\fldrslt {\cs36\cf9\ul\ulc0\langfe255\alang255\lang255\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Mauris id ex erat. }{}}}\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Morbi viverra semper lorem nec molestie. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033{\listtext\pard\plain \b0\dbch\af14\loch\f7\fs21 \u61623\'3f\tab}\ilvl0\ls2 \li720\ri0\lin720\rin0\fi-360\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
In non mauris justo. Duis vehicula mi vel mi pretium, a viverra erat efficitur. Cras aliquam est ac eros varius, id iaculis dui auctor. Duis pretium neque ligula, et pulvinar mi placerat et. Nulla nec nunc sit amet nunc posuere vestibulum. Ut id neque eget tortor mattis tristique. Donec ante est, blandit sit amet tristique vel, lacinia pulvinar arcu. Pellentesque scelerisque fermentum erat, id posuere justo pulvinar ut. Cras id eros sed enim aliquam lobortis. Sed lobortis nisl ut eros efficitur tincidunt. Cras justo mi, porttitor quis mattis vel, ultricies ut purus. Ut facilisis et lacus eu cursus.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
In eleifend velit vitae libero sollicitudin euismod. Fusce vitae vestibulum velit. Pellentesque vulputate lectus quis pellentesque commodo. Aliquam erat volutpat. Vestibulum in egestas velit. Pellentesque fermentum nisl vitae fringilla venenatis. Etiam id mauris vitae orci maximus ultricies. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s1\ql\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs36\alang1081\ab\loch\f4\fs36\lang1033{\listtext\pard\plain }\ilvl0\ls1 \li792\ri0\lin792\rin0\fi-432\li0\ri0\lin0\rin0\fi-432\sb240\sa120\keepn{\rtlch \ltrch\loch
|
||||
Cras fringilla ipsum magna, in fringilla dui commodo a.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \trowd\trql\trleft53\ltrrow\trrh450\trpaddft3\trpaddt0\trpaddfl3\trpaddl0\trpaddfb3\trpaddb0\trpaddfr3\trpaddr0\clbrdrt\brdrs\brdrw5\brdrcf18\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx770\clbrdrt\brdrs\brdrw5\brdrcf18\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx6434\clbrdrt\brdrs\brdrw5\brdrcf18\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx7992\clbrdrt\brdrs\brdrw5\brdrcf18\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clbrdrr\brdrs\brdrw5\brdrcf18\clpadfr3\clpadr55\clcbpat8\cellx9690\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
\cell\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Lorem ipsum}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Lorem ipsum}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Lorem ipsum}\cell\row\pard\trowd\trql\trleft53\ltrrow\trpaddft3\trpaddt0\trpaddfl3\trpaddl0\trpaddfb3\trpaddb0\trpaddfr3\trpaddr0\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx770\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx6434\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx7992\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clbrdrr\brdrs\brdrw5\brdrcf18\clpadfr3\clpadr55\clcbpat8\cellx9690\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
1}\cell\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
In eleifend velit vitae libero sollicitudin euismod.}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Lorem}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
\cell\row\pard\trowd\trql\trleft53\ltrrow\trpaddft3\trpaddt0\trpaddfl3\trpaddl0\trpaddfb3\trpaddb0\trpaddfr3\trpaddr0\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx770\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx6434\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx7992\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clbrdrr\brdrs\brdrw5\brdrcf18\clpadfr3\clpadr55\clcbpat8\cellx9690\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
2}\cell\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Cras fringilla ipsum magna, in fringilla dui commodo a.}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Ipsum}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
\cell\row\pard\trowd\trql\trleft53\ltrrow\trpaddft3\trpaddt0\trpaddfl3\trpaddl0\trpaddfb3\trpaddb0\trpaddfr3\trpaddr0\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx770\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx6434\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx7992\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clbrdrr\brdrs\brdrw5\brdrcf18\clpadfr3\clpadr55\clcbpat8\cellx9690\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
3}\cell\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\ab\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Fusce vitae vestibulum velit. }\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Lorem}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
\cell\row\pard\trowd\trql\trleft53\ltrrow\trpaddft3\trpaddt0\trpaddfl3\trpaddl0\trpaddfb3\trpaddb0\trpaddfr3\trpaddr0\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx770\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx6434\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clpadfr3\clpadr55\clcbpat8\cellx7992\clpadfl3\clpadl55\clbrdrl\brdrs\brdrw5\brdrcf18\clpadft3\clpadt51\clbrdrb\brdrs\brdrw5\brdrcf18\clpadfb3\clpadb55\clbrdrr\brdrs\brdrw5\brdrcf18\clpadfr3\clpadr55\clcbpat8\cellx9690\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
4}\cell\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Etiam vehicula luctus fermentum.}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql{\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Ipsum}\cell\pard\plain \s58\ql\nowidctlpar\hyphpar0\noline\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\intbl\ql\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
\cell\row\pard\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af11\rtlch \ltrch\loch\fs21\loch\f7\hich\af7
|
||||
Etiam vehicula luctus fermentum. In vel metus congue, pulvinar lectus vel, fermentum dui. Maecenas ante orci, egestas ut aliquet sit amet, sagittis a magna. Aliquam ante quam, pellentesque ut dignissim quis, laoreet eget est. Aliquam erat volutpat. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut ullamcorper justo sapien, in cursus libero viverra eget. Vivamus auctor imperdiet urna, at pulvinar leo posuere laoreet. Suspendisse neque nisl, fringilla at iaculis scelerisque, ornare vel dolor. Ut et pulvinar nunc. Pellentesque fringilla mollis efficitur. Nullam venenatis commodo imperdiet. Morbi velit neque, semper quis lorem quis, efficitur dignissim ipsum. Ut ac lorem sed turpis imperdiet eleifend sit amet id sapien.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Maecenas non lorem quis tellus placerat varius. Nulla facilisi. Aenean congue fringilla justo ut aliquam. Mauris id ex erat. Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis. Morbi viverra semper lorem nec molestie. Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate.}
|
||||
\par \shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbxcolumn\shpbxignore\shpleft2819\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
{\*\flymaincnt5\flyanchor0\flycntnt}{\shp{\*\shpinst\shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbottom2660\shpbxcolumn\shpbxignore\shpleft2819\shpright6819{\sp{\sn shapeType}{\sv 75}}{\sp{\sn wzDescription}{\sv }}{\sp{\sn wzName}{\sv }}{\sp{\sn pib}{\sv {\pict\picscalex100\picscaley100\piccropl0\piccropr0\piccropt0\piccropb0\picw200\pich133\picwgoal4000\pichgoal2660\jpegblip
|
||||
ffd8ffe000104a46494600010101004800480000ffe20c584943435f50524f46494c4500010100000c484c696e6f021000006d6e74725247422058595a2007ce
|
||||
00020009000600310000616373704d5346540000000049454320735247420000000000000000000000000000f6d6000100000000d32d48502020000000000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000001163707274000001500000003364657363000001840000
|
||||
006c77747074000001f000000014626b707400000204000000147258595a00000218000000146758595a0000022c000000146258595a0000024000000014646d
|
||||
6e640000025400000070646d6464000002c400000088767565640000034c0000008676696577000003d4000000246c756d69000003f8000000146d6561730000
|
||||
040c0000002474656368000004300000000c725452430000043c0000080c675452430000043c0000080c625452430000043c0000080c7465787400000000436f
|
||||
70797269676874202863292031393938204865776c6574742d5061636b61726420436f6d70616e79000064657363000000000000001273524742204945433631
|
||||
3936362d322e31000000000000000000000012735247422049454336313936362d322e3100000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000058595a20000000000000f35100010000000116cc58595a20000000000000000000000000000000005859
|
||||
5a200000000000006fa2000038f50000039058595a2000000000000062990000b785000018da58595a2000000000000024a000000f840000b6cf646573630000
|
||||
00000000001649454320687474703a2f2f7777772e6965632e636800000000000000000000001649454320687474703a2f2f7777772e6965632e636800000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000000064657363000000000000002e4945432036313936362d
|
||||
322e312044656661756c742052474220636f6c6f7572207370616365202d207352474200000000000000000000002e4945432036313936362d322e3120446566
|
||||
61756c742052474220636f6c6f7572207370616365202d20735247420000000000000000000000000000000000000000000064657363000000000000002c5265
|
||||
666572656e63652056696577696e6720436f6e646974696f6e20696e2049454336313936362d322e3100000000000000000000002c5265666572656e63652056
|
||||
696577696e6720436f6e646974696f6e20696e2049454336313936362d322e310000000000000000000000000000000000000000000000000000766965770000
|
||||
00000013a4fe00145f2e0010cf140003edcc0004130b00035c9e0000000158595a2000000000004c09560050000000571fe76d65617300000000000000010000
|
||||
00000000000000000000000000000000028f0000000273696720000000004352542063757276000000000000040000000005000a000f00140019001e00230028
|
||||
002d00320037003b00400045004a004f00540059005e00630068006d00720077007c00810086008b00900095009a009f00a400a900ae00b200b700bc00c100c6
|
||||
00cb00d000d500db00e000e500eb00f000f600fb01010107010d01130119011f0125012b01320138013e0145014c0152015901600167016e0175017c0183018b
|
||||
0192019a01a101a901b101b901c101c901d101d901e101e901f201fa0203020c0214021d0226022f02380241024b0254025d02670271027a0284028e029802a2
|
||||
02ac02b602c102cb02d502e002eb02f50300030b03160321032d03380343034f035a03660372037e038a039603a203ae03ba03c703d303e003ec03f904060413
|
||||
0420042d043b0448045504630471047e048c049a04a804b604c404d304e104f004fe050d051c052b053a05490558056705770586059605a605b505c505d505e5
|
||||
05f6060606160627063706480659066a067b068c069d06af06c006d106e306f507070719072b073d074f076107740786079907ac07bf07d207e507f8080b081f
|
||||
08320846085a086e0882089608aa08be08d208e708fb09100925093a094f09640979098f09a409ba09cf09e509fb0a110a270a3d0a540a6a0a810a980aae0ac5
|
||||
0adc0af30b0b0b220b390b510b690b800b980bb00bc80be10bf90c120c2a0c430c5c0c750c8e0ca70cc00cd90cf30d0d0d260d400d5a0d740d8e0da90dc30dde
|
||||
0df80e130e2e0e490e640e7f0e9b0eb60ed20eee0f090f250f410f5e0f7a0f960fb30fcf0fec1009102610431061107e109b10b910d710f511131131114f116d
|
||||
118c11aa11c911e81207122612451264128412a312c312e31303132313431363138313a413c513e5140614271449146a148b14ad14ce14f01512153415561578
|
||||
159b15bd15e0160316261649166c168f16b216d616fa171d17411765178917ae17d217f7181b18401865188a18af18d518fa19201945196b199119b719dd1a04
|
||||
1a2a1a511a771a9e1ac51aec1b141b3b1b631b8a1bb21bda1c021c2a1c521c7b1ca31ccc1cf51d1e1d471d701d991dc31dec1e161e401e6a1e941ebe1ee91f13
|
||||
1f3e1f691f941fbf1fea20152041206c209820c420f0211c2148217521a121ce21fb22272255228222af22dd230a23382366239423c223f0241f244d247c24ab
|
||||
24da250925382568259725c725f726272657268726b726e827182749277a27ab27dc280d283f287128a228d429062938296b299d29d02a022a352a682a9b2acf
|
||||
2b022b362b692b9d2bd12c052c392c6e2ca22cd72d0c2d412d762dab2de12e162e4c2e822eb72eee2f242f5a2f912fc72ffe3035306c30a430db3112314a3182
|
||||
31ba31f2322a3263329b32d4330d3346337f33b833f1342b3465349e34d83513354d358735c235fd3637367236ae36e937243760379c37d738143850388c38c8
|
||||
39053942397f39bc39f93a363a743ab23aef3b2d3b6b3baa3be83c273c653ca43ce33d223d613da13de03e203e603ea03ee03f213f613fa23fe24023406440a6
|
||||
40e74129416a41ac41ee4230427242b542f7433a437d43c044034447448a44ce45124555459a45de4622466746ab46f04735477b47c04805484b489148d7491d
|
||||
496349a949f04a374a7d4ac44b0c4b534b9a4be24c2a4c724cba4d024d4a4d934ddc4e254e6e4eb74f004f494f934fdd5027507150bb51065150519b51e65231
|
||||
527c52c75313535f53aa53f65442548f54db5528557555c2560f565c56a956f75744579257e0582f587d58cb591a596959b85a075a565aa65af55b455b955be5
|
||||
5c355c865cd65d275d785dc95e1a5e6c5ebd5f0f5f615fb36005605760aa60fc614f61a261f56249629c62f06343639763eb6440649464e9653d659265e7663d
|
||||
669266e8673d679367e9683f689668ec6943699a69f16a486a9f6af76b4f6ba76bff6c576caf6d086d606db96e126e6b6ec46f1e6f786fd1702b708670e0713a
|
||||
719571f0724b72a67301735d73b87414747074cc7528758575e1763e769b76f8775677b37811786e78cc792a798979e77a467aa57b047b637bc27c217c817ce1
|
||||
7d417da17e017e627ec27f237f847fe5804780a8810a816b81cd8230829282f4835783ba841d848084e3854785ab860e867286d7873b879f8804886988ce8933
|
||||
899989fe8a648aca8b308b968bfc8c638cca8d318d988dff8e668ece8f368f9e9006906e90d6913f91a89211927a92e3934d93b69420948a94f4955f95c99634
|
||||
969f970a977597e0984c98b89924999099fc9a689ad59b429baf9c1c9c899cf79d649dd29e409eae9f1d9f8b9ffaa069a0d8a147a1b6a226a296a306a376a3e6
|
||||
a456a4c7a538a5a9a61aa68ba6fda76ea7e0a852a8c4a937a9a9aa1caa8fab02ab75abe9ac5cacd0ad44adb8ae2daea1af16af8bb000b075b0eab160b1d6b24b
|
||||
b2c2b338b3aeb425b49cb513b58ab601b679b6f0b768b7e0b859b8d1b94ab9c2ba3bbab5bb2ebba7bc21bc9bbd15bd8fbe0abe84beffbf7abff5c070c0ecc167
|
||||
c1e3c25fc2dbc358c3d4c451c4cec54bc5c8c646c6c3c741c7bfc83dc8bcc93ac9b9ca38cab7cb36cbb6cc35ccb5cd35cdb5ce36ceb6cf37cfb8d039d0bad13c
|
||||
d1bed23fd2c1d344d3c6d449d4cbd54ed5d1d655d6d8d75cd7e0d864d8e8d96cd9f1da76dafbdb80dc05dc8add10dd96de1cdea2df29dfafe036e0bde144e1cc
|
||||
e253e2dbe363e3ebe473e4fce584e60de696e71fe7a9e832e8bce946e9d0ea5beae5eb70ebfbec86ed11ed9cee28eeb4ef40efccf058f0e5f172f1fff28cf319
|
||||
f3a7f434f4c2f550f5def66df6fbf78af819f8a8f938f9c7fa57fae7fb77fc07fc98fd29fdbafe4bfedcff6dffffffdb00430005040404040305040404060505
|
||||
06080d0808070708100b0c090d131014131210121214171d1914161c1612121a231a1c1e1f212121141924272420261d202120ffdb0043010506060807080f08
|
||||
080f201512152020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020ffc2001108008500
|
||||
c803011100021101031101ffc4001c0000010501010100000000000000000000020001030405060708ffc4001a01000301010101000000000000000000000102
|
||||
0300040506ffda000c03010002100310000001cef90fb09a72952d6a75cbea4d4e63c8fb5e46b47b7ac13eefbbc600d1ab0076056ce0100416acafcf72f764c5
|
||||
b2c88cb7775e3dda7264f5754ed3360db03601aa1102d6c81bcfca398411c56ce32d9d4101c847bb834e8bbcea88916beadd3e3db6907534b4989384bd3c6b57
|
||||
b1dcfd09846af1ab3660044e582d96ce0b8c977982f6f3d37b5257036b9fabd4fd0f0a4558fa5e5acf3a5d3e2d4eecbbc3604bdbb9a4db0e6118183157c160f8
|
||||
ac5d5a347f15a74410725d6263afe6eaf48eef09d7074996aa0a7cea3eb705d52c4e9e4f68e0af54a83b33289472ae558e432c5c3514af8776343ced624d7f9f
|
||||
a3d1a27b2b798614fb94983918fcddbe1fd74c0e9e4d74deede75e4647644513222110b640a0d842bc5deb940e0149f97b7dab8d374f1985b3e945d821a0957c
|
||||
a376f95fa3c359e5ed1e7f5773006e8e5132b156216c81607917b60d2a294e7185b8dbd53cdbc926255e8bdaf1d6c2a6347c89f57cf3ea72e3db9b690fbf799d
|
||||
368cdd95ca265121608e056f34e8b516a62837675ea796fa5e67a4b9fa6ca6f41fa7f91e7bccf5f3d9f03ba16d27c2d5312d0b197b291d645264e9d652ba065a
|
||||
6af66b2a894f30e8e9c12dae94b894e8b92d67cef4adcab6267b3f67e7208d2bad7396f84dabbcb61277424f49d5a2d869a74074899200d18311192691ea47a9
|
||||
5453a1e6b75ca97edcd61e3ccf93ee36c8e00d09d1b4e85a117571eaa248c8656ab2e55467d044da33a326916bd36d293cf3aef21e9001471538fc3e857d5061
|
||||
015379d829436c2eff003e769c84390442da26d20d608ead92cbeb20a9b20c4ac919908ccf21e7fbce529d6557a79bb6bf0d20451dcace54ca995321c81d9308
|
||||
b1a28d9aadaef3d1759f31024aee19d779d799ee994ad49e7d274a9372b6996f64be8b7535802719f013a225866d9d73111632309e82d93054684df84e1f55ca
|
||||
46c80c88a8b2d8cb315972332915b886da9914b6cfb2c61c63c50cc0be311780b366c74acd95ca8b21e05945d642b3653c0b006d19138d710ce8c810c6224766
|
||||
0483163097af9b9971a73d2e08a8b2cb84a01b239078160632d85b44c076b2bae4d9958098f173833572f51cff00ffc400271000020202020005050101000000
|
||||
0000000102000304110512101314152120222330310632ffda0008010100010502af2d5695cb5d26526ce5d7af51597af22a09996abbf1aea178c707337fa6eb
|
||||
7a0b73ea42f9f6bc2db338bab55388bc6e341c7e34f6fc79edb8f3dab167b56346e130ccf65c5598bc7d78d67e9e62deb429d9dcdcabee7c44eb5b41e0cea82f
|
||||
e5f168193fe8ec2389cff5b87fb39bbbf3d66769b982bdb2291aa8c1e19950bb1eea4d76389c2e6fa4cf0db1fa9ce9390b7cce401d0ed17e4f169b71f00c1e0c
|
||||
36398c6eb1c7c3ec1e173c65e16e6ff4e5375aafa6c1779b15a24e213f10fa732916d194a68b9da70f9de93391c3afe9cf7d56ca1a5b88ad2ca1e995640338f4
|
||||
e947d2dfcff418718cdce073fd462fe9cfb3ee8cc00bed16b5184ad70b854bea84f553b09dd67710b4cda45d466d268c99c6659c4cda6e4b2bee27713b4dcd89
|
||||
b13719beccdb8b65f9c105b9165cd8f474984377dcdf954f835408b0148d7f90afcbdd64a33b2ae4e4b1fb5c38d768386b8cc3c6cfc756f702455c896ae96ea4
|
||||
6a7c18287f36c53d2c3d71f27cc46d59735552a0dce3ff00e06364315c6b84f4f6c366e7c464ada3a56b2fed282cb120eb0d9d51722d8320c39680faca8c3954
|
||||
439b54f5f5cb6faecacd8b3ad6d0e35261c1130686088ee816c46f0dcdc0e561b0c2e67dd2f6b02d64ec749f6cd88f5abcb53cbf1dee323b0eb9ca557334b4e4
|
||||
7508c138dfe7c19d04d389b84fcf710dc04acb5a3a58a2d606b1c960ac1cb7193de38b9ef5c64f7bc09ef58f0729483ef16caf90e46d38eb94cb563574c2cb37
|
||||
3e3c35353460b277967f31f1bcf643522e552f90130eae9e8717630f1c4f4f8f3c8a27934cf2699e5d027998c91b35046cdc833d466f9899d5c5bd1e6c78ee6e
|
||||
6ccdec16962069d6043111a00c225b6082f58b6033737a9e609e66e7e49f93c3e2796860362c195608996a63d8e5aab3ecdf81f99a86a0604eb06a09a9d60dac
|
||||
5bf5058ad3e3c373b426769b9b3e0c81a75759e75cb036fe9fecd6a0fa089f316c6116f9d819a9a3e3b80cdc30995b180fd0209a83e8d4226a76222d8606dc33
|
||||
535e061319a7ffc40029110002020103030402020300000000000000010211031012131420210430314122324051425271ffda0008010301013f01e75b459059
|
||||
50b244e58d8e71334d367a56a8f54ed32bd9c58b77c8b11b628b123d5cabc1063f5733abc875990eb6675923ac97f473a7f4473a5f44b2a6bdac3e116596633d
|
||||
4cae4458f48c1cbe05e95ff90a38e3f08cb15fb445dd6597d88b2cb3e206577310f4f4f3db224a8911ff0051c6bb6cb2cb2cb225963647cb32ba88c43d22c8cb
|
||||
7c2c648fdd595d97a5e965912d9bcdc6247a97f88fb704e9d13f0c9331ce9d0c631f7c74a2a8c534677e6bba24ff002858c6467ba23efbd13132cbbf04511c5c
|
||||
96ce94e94dacd8cd8c51313fa322a6331ca98fbdca84c4597645511661f102f48e46998da912499e2bc11b26ace338f56cdcc7214af4cdfa98dfe3aa1310b241
|
||||
7d9cd0fece7c7fd8b1d1ff0004e421c91bc721b1f91c46b47a58dd884c421116bec9414be096392d28a1c14858d0e28a4645e0b1fb5626290d9e4de5c5fc95ae
|
||||
e1e48af937c5fd925e28716b4a36336338d9c6718e2977d965a290c64e7b476fe48cf69d448ea247348e591c9239246f917236c99c7238cd838b2bb28a1bd18f
|
||||
4a286b4dc5eb456946d28a36238cd86c66dd1fbf5a5695a228da5e8fddb2fd9bfe4fffc400271100020201030305010101010000000000000102110312132110
|
||||
3031042022415132714061ffda0008010201013f019fa66e64bd3325e9e42f4f33624912c523d2637147ac5f230aa5da9cabc0db63b12ae9815cac92349b66d2
|
||||
364d9364a92fb256fc9cf6a4ad92545144f846055024210e5a4d6df81636fcb3f87a5f7631268d2513e59055148911e993f48cac4ccb1b565f6a86254868d232
|
||||
ae5d18844910e1d084648e8976e86b8a1e2fc1c4998d7cafa3285d26ab921cab1232c3547b715cf4b387e4c985a22bd9432460953a174cd0d2fb504328aa272e
|
||||
192969e0dc1e435a3711ad0e478766376ba648ea4557bd2b1aa20b828aa2465f06456c6868960543e04acc78235726648417831c8dc46ea2528485a0f81a50a0
|
||||
38d74c5fd13fe855d19465f234c7063848795b3fd383511ff492ff00d28a170596722b39341b6cd2fecd2726a66b3273e08b6852470596597d71792bd898bd96
|
||||
7c075fa3a18d53e95eca349a19a24478628d9b4cdb34234c4f81aa3fa5c054fc15d1f5a1a348e2fa210a3621ad46cc4d98fe9b31fd3661fa6d43f4d981b58cdb
|
||||
c68bc68de88f39ba9fd8a66b2d76132cd46a2c691a0aaeb451c0ebaea685919b86b1648fd8e4bebda99e7ad96793495dab351657bafdb7d290e3daa1aff81c7b
|
||||
147fffc40037100001020304080305090100000000000001000203112110223132041213203041517105618123339192a134404450627282b1d1e1ffda000801
|
||||
0100063f02ca665652b036628de46ab1512478545576b1574ea8b5bf3599164591645902c817bb0aeb648b982a78520713bbf4dc9b96799f24760c97741e7373
|
||||
e2b61f4aee3021e75dc737a84e6bacd471f671388544f2a6e39dd0296eed00b263109b337db43c329d10733357a96877533de2d4e866c13371f42811c222d98c
|
||||
100534746efed982d0c71bcda70836caad935426cb9a975aee636b9a5399635d3ba6850703c09a701c954ad56299c56b72689a23a53708aa2f72f6508a94766a
|
||||
29b44d652a8d280ab82a43430015ec6dd6d6a5ae76ccd55682d73bcd4f665642b2158aaaab66a90fe8a90dc7b291d0dcab064b0540aac0b2a92aacca86c9b5f5
|
||||
1c955640561257627c56a8a96a93db3dcc563662b321227d148cfe3b955564c750ba85fed0aeaa8d3eaae43791d94e2682f1e6da2d6911e4f0b5ccbd13a2c8c8
|
||||
d2da1b3c9557557184a996482241527c6d4eed2bed7f0615efe21ed08afc43bb4357745d31dfc153c374b3e8a70fc0e2cfd02bbe06ff009d4a1781fab9ca7a4b
|
||||
2143fd2caabbae7f7bcbbfb5592af00a9c58a18cef5283585a02019a4ecc04369126eea1722b92e4b95b8aff008aec37395c820775388dd76f4064af35d0fb85
|
||||
4703c19852598ac4aa2a99aad15372eae4ab6cc2bb10faabc26ab4536b95ec787457adc38375df70afe49fffc400281000030001030304020301010000000000
|
||||
00011121314161105171208191a1b1c1d1e1f030f1ffda0008010100013f21350b67f11946af629e589cb0d081f4acc914840cd036363f53549a37b9ed0742d9
|
||||
e0c759bac6d3c1a175adf8e8a2b47c1d90e30dbb3e0ffc01cc35bf88fd043766e9b0d8dfaf4e0e06f8285d2b3f22302d122bd28bb22e479a8adb2119c3656eca
|
||||
70bd98d8fa31b2fa9b8a97ab4518af4e93ca159ccfd06f18572c323472e5c2f86216d32946c6fd7745a275309a2a63078105ee6226d8e84ea1f6b086e6658ca9
|
||||
fa0084bd6fa6af819836dcc29c2d09794e8f83225facb4fb18a242e8f52b830431e0cc3178301a55a8bff1e56c15aa16bc19a7034b0cb2acaaf96265213a2d0c
|
||||
4b69af49b2756a8eee784c5e9a5296a3928c2da0b29b753b2468b979c9f8fd0bbc4f725af495d188b04d56a1ca58b81b35f041265a68e53948ee41cc5b71a255
|
||||
884c1806301d12d87f717baee3235941b99fd0584f032aa3a450d6178d847715eb0813685b98bf73f8cc6a19914d9b197535dd94eee4516a84eb44b0ab7b12ec
|
||||
79379c8af2dab52376f68d207112435bd1de64a455e4d60909b20f818a310d01f608f7c2217bc93486bf9670508741b16bf186f59291865d19195028b2b1e9ba
|
||||
c861b0740b3ca1bf94bb9a7b78b1cdae105ac41b6d77625c15ba34a8636868a3a411455e4677e84dbe45802a1acd72317f627da8e1468ecf71e97eaef23cab16
|
||||
a269e4b5ca9cdb007a0978359f935135b27ec5e169921e40370a497217ba29065ca94d6b50e74dec7414cd062fd435ea9e4d29a0ccd42d7036af2356ccbe25c0
|
||||
e32cb6fd0423aff8f8124c27a7bc099f426bf7326a2a5b90abadbeed3f826c2e025f26507ee37c96db35eff900ac4d5b1c1d092d887aa38b185387ba1a771ee6
|
||||
a9fd1859181558e893171b3a256b125772824cad36ce61a1af8a33dd5ec7f9412fb7d1e1f92aad7e4e27b5185f14a1f7d21e4c1e95271feb22ef8f627956cc57
|
||||
b929ca409cb0f28bd4c8f587d0471911453f229334e480d8c6c1c9c80d4744e2860778d0f413e469968124da57b8db6a36211f7583631db23169f03b81c8c98a
|
||||
3268522b794f27784bb191289b1da44190c5f5cd121a73237664482920cfa0d2e8347a1c827dce00dc23d6e4904c793429204fa3a15137b928d223c1cb8878c3
|
||||
a375a31a87ca141f507eb2b746ba309485d48592040c2d0be9d88431a8c21878e48fffda000c030100020003000000106b0631a8f6e44e0c76cfc6a197c34b9c
|
||||
d5a5e0fb54038412dabf4d200da8ebcf66ea2642070e2ecb9e6ef3daaf46c64184b704900e32fd4ed133cd5d6d80b812412cbe809ae198e77100949233ff002e
|
||||
20ddb23398c9b0917cb5da4fe182bd6b94f1d5ab100351287c7902e5b68139144dd8dc3e3dfbc7e8199d0098cd819fcd9cdd48ab2249280e782fc18ff9527a96
|
||||
29a4292210f465cc52715aba0ea39c6ebfffc400271101010100020202010304030000000000010011213110415161207191f03081b1d1c1e1f1ffda00080103
|
||||
01013f104389cc21c966bb8b6ddacdbb30e6e1760317682083f226bd220c3823aa4ead38b03fa0ff009f083a09fabc1fa37d65a708bdc374189867bb39820fcb
|
||||
21cfe2c672e5900fd5f13966f2139790bd8fdce02e184441e37f003e0359e01702e4ce9c8e66d0fae3c5cd93bee548c266afa7fcda31820ba967f002116b1cae
|
||||
5c401e28190d75f05360df682ce5bdd2504f71e0b2ccdf0d8847cc03a472e6757b9b1c1fa7efff0050b2596db9974cd62e4b9b74dc65294b2db6c30ca1b2cbe4
|
||||
4c0b7112c96d89e3708f64e79653ee52cb2cb6db107cc11db166992afecff3f7956a1babebb1ec9336f6adf2571cf4ca665996d806b6c693e2316f81e27b2e27
|
||||
df3fbc4610c28c4625c382f62678cfce4c08632d91c7814eed516ba5b08788d679e0e667cc46407d2c3d6ec246905c6cb4d5847521f5690c0507cc07b9c81663
|
||||
a10cb0624b3af0294a224575e37773bee79e463d486a41f57c39f8eed92b64cb2db6cbe46123ede5053211d42e98e82dac664b938bb865bb0e522a771236dfe3
|
||||
fead3ff1ff0057f2e7c1ab5768ce7af1b16c30afb453e47ac1acd36777399d38253d4bfa95f536c296deaf84c27647cdb21c42ecf036186356bc0bb2b94e4891
|
||||
ebc0697dac3259b6885e00f86acbddadf56d90109ee594b2ccccc925996dbe0b3c02d41f3620b84060782ca6d999667c659e062134f231110c2b65999966dfe8
|
||||
0c30dbe06186dbffc4002511010101000203000104020300000000000100111031214151a120618191b1d1c1e1f1ffda0008010201013f10eede090f86cde188
|
||||
f522993dcc8e692bb9c16df12cf19659270fe33c87715e178409e6daf81ff5c06bb6036d03d36becfb6c7460fbac5870f1965924927058274c709b8fdff88c61
|
||||
1f6ba664e8d58599e72cb2c9249b4f3fa20e05fd99fdff00d42110cf08009c5e3fd91b2db780b2cb2c9261f1c26cce08e118897cf01c3727e45e2d9cf32a1e9b
|
||||
620820b2cb2c9274842e92463c30bf86e144125a31eac10e2ebbb20fb11041671964969772ed7a45b89642fde18390e9790f00bce9d30701071965925dec2d3d
|
||||
465aca26c40fe5261b2e15a0dc9f00b098237252c6218638c25584cb1b133bb116c3a7db6794dff3e61acb6789312be7b00796cbdd9f6f1c09e586e9d41eac5b
|
||||
3d65e1e5115347867833a3f69cee5f0caf575accfd96badbcbb82d59d3b0e6c1c3f90fa12f9914a60dd1bc935ead1eecedfb26b5f6bc026df0c2b82a19698585
|
||||
90e5919a5911213de98888076cabb603a8b599fd5e4e136193931d3abc7026143f46c91a93c197af01f6ff008ff71ee7f25876fe4b07b3fbbc38fe565ece5384
|
||||
895f778982cfc5ea58c2f58bcac1e0bacf51f68fb22d8c3ff75fbbf982f7f9b56afe63af927d30fa9ec43f4dfbad271e999725d8b320d9cee17636a7ee0bdcff
|
||||
0036bd4bedc6c6a71df05e9c9d46f623ea2e4c16c84478eb852d8ec6c42a9da7e251df1bfa326ce0642f76271645bc1e235c78b3e43f6268cb4a489c6fe878ce
|
||||
05267031c8f1bceb0c360c1271b6f19044ffc400261001000202010305000203000000000000010011213141516171108191a1b1c1e1d1f0f1ffda0008010100
|
||||
013f106a73081d65206d754f9e528f6a31aa8a2417d25049ef0c54150cf760d9b8d3da69f12ff497a1083061b1dc4ea2a1babdc7d60e1e5f78b969955bb8c17c
|
||||
acb2b91f134fd21e710016fb200a3e385383f69b3f8517b53f647b0a1e84b0b8b059901a9b0b2e817ea0bea420f58a4c3823486dfc8f7b6b65611c16e282ad41
|
||||
19940776a3fb25a306218e8a7955411a7ba1a06ca6d7f04d18d4e0852ac688bd07d443d2e7a08075ac0f2d1f8ca2cd46d45cb20eb0ea2cf8c67f89627917bebe
|
||||
aa0b187042585d2186a5dc2c05e652c28971c152e3858611136465f4192199999832e0c2b9ac4c7a49ceabfbb950b8fb6287494617479963f2574800f68ac954
|
||||
09783a605f03907112399429a85b12565a615e6ce630c36f41a95a8b2e5c194a3a6fd414000f55b9bb0b1981613acc65db29de86f67f83efd16c4a48220950f5
|
||||
a554099b5972455371298c1781e18004036730bc1bf55972e5cb9443a7de56098841cc5d79911e91f4e6acc5aa03f90fe7a54a8461182d89b65ce86c88ac30c8
|
||||
962710ac42a5729c3e8692e2e22cbf51a0b0ccd9542cadc0c4036cca42a44a7c4bcd6c1b7eae5460cf6b81f16f79c28c18870d546bd23acdccb13307c59ab942
|
||||
42df18cb2f1820de29e6186122303dc09c65bc23d6947183282b2f451084e2a5f8a9f76e2b35d6ae38b4d5e04afce655b61ae513f0fd967e5575d453f631516f
|
||||
a06e2ba8e405e298e834be60758d11b85310e3710b2a106d48a8b5ce5091017820850bd584af37a038250c9e59596f726ea68f8ce66dd656058a7497b36d406e
|
||||
1a45a95728226bd721cc1c415d8a5825059302b022e3761d0387bc52e2594ab58b51531a9530f78d4a08232c15fb91101744b218b788540f7a181e68f50f961b
|
||||
bf662621fad162bb827544c5ae521f1a1a5ea092d4a4ea471431d259e9f80cc415172d6a16e501a5973f3a992b7804b4546570a2e2cfd90ce1e4e2c4bf8b9a97
|
||||
8aadf785dd6bb12985f45e21535f2c3c08f0c7758f94e2bf12ed50ee131c55d897e308bb1fd4b6fcf65528176bde07af9a5e501ed0ce068e122ceed9f83679d4
|
||||
6b0484094ecde6215300ad41d7fac4c933b6781b8a42f217e046258aacdbe496e20bb1b672d388e62054001cda95de51511558e3748665ca8cf4581969ced4f6
|
||||
96590f78a6d5792e3fc119468afa89b9d8b648bb674da00d95e0b8ab869188f4bd40fdf68003e61d3460a2b59a8ad18ca6fd930ccbdc7e88a63c95fccb74bc0b
|
||||
f6156475c2cd48991b175b90edbadb71e0081581c15742794109341b05e5d07b0cbe97b5ff00b595ed53722e09798bc63f0090b180f6250d08e82cb0643ce670
|
||||
d18cb7c29cc23b12b657de39758e344c8103a071e61775a1ff00d599a4401dd36e4ed070095f113350f1275bf92a9156bfe728b1ef7fc41f47b7fa4ca01ef7fe
|
||||
100cfe12ff006da75f88970b4ea2fc852a4e88fb86a7466d622be521be5ab98523ac7f0b21648ed4e52a591cd2d805f9964ca8e0b83e503d79966f50dc0bd26f
|
||||
a83d1a96885ec6f8978b8b16912248d6b1122abc434dfd095a498e8c54ee864a3bd457b116f09ed2fb21c54ab168350ff12b27cebea53d81051043975300bb51
|
||||
999345092c84f28330c75876435eb8260bc01e605c2e183044b8a60458c572e63c13da353dc403097c4a76107883c1021a22e6611790992753c896e44cf5aba9
|
||||
1fa216094904236f31846e62ba865704e489c910b51299c4af5348458c449885f246505b4251fc881b090738778210c41070d44bc45e6386483d677263151c16
|
||||
41ed0eb10c36466a0ed1259166a013094d88a6c83e210c4b2c6e12e67040992585826462a3ac5a84b89527ffd9}}}}}
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s2\ql\nowidctlpar\hyphpar0\sb200\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs32\alang1081\ab\loch\f4\fs32\lang1033{\listtext\pard\plain }\ilvl1\ls1 \li936\ri0\lin936\rin0\fi-576\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{{\*\bkmkstart __DdeLink__109_736781840}\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8{\*\bkmkend __DdeLink__109_736781840}
|
||||
Maecenas non lorem quis tellus placerat varius. Nulla facilisi. Aenean congue fringilla justo ut aliquam. Mauris id ex erat. Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis. Morbi viverra semper lorem nec molestie. Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
In non mauris justo. Duis vehicula mi vel mi pretium, a viverra erat efficitur. Cras aliquam est ac eros varius, id iaculis dui auctor. Duis pretium neque ligula, et pulvinar mi placerat et. Nulla nec nunc sit amet nunc posuere vestibulum. Ut id neque eget tortor mattis tristique. Donec ante est, blandit sit amet tristique vel, lacinia pulvinar arcu. Pellentesque scelerisque fermentum erat, id posuere justo pulvinar ut. Cras id eros sed enim aliquam lobortis. Sed lobortis nisl ut eros efficitur tincidunt. Cras justo mi, porttitor quis mattis vel, ultricies ut purus. Ut facilisis et lacus eu cursus.}{\rtlch \ltrch\loch
|
||||
In eleifend velit vitae libero sollicitudin euismod. }
|
||||
\par \shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbxcolumn\shpbxignore\shpleft2819\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
{\*\flymaincnt5\flyanchor0\flycntnt}{\shp{\*\shpinst\shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbottom2660\shpbxcolumn\shpbxignore\shpleft2819\shpright6819{\sp{\sn shapeType}{\sv 75}}{\sp{\sn wzDescription}{\sv }}{\sp{\sn wzName}{\sv }}{\sp{\sn pib}{\sv {\pict\picscalex100\picscaley100\piccropl0\piccropr0\piccropt0\piccropb0\picw200\pich133\picwgoal4000\pichgoal2660\jpegblip
|
||||
ffd8ffe000104a46494600010101004800480000ffe20c584943435f50524f46494c4500010100000c484c696e6f021000006d6e74725247422058595a2007ce
|
||||
00020009000600310000616373704d5346540000000049454320735247420000000000000000000000000000f6d6000100000000d32d48502020000000000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000001163707274000001500000003364657363000001840000
|
||||
006c77747074000001f000000014626b707400000204000000147258595a00000218000000146758595a0000022c000000146258595a0000024000000014646d
|
||||
6e640000025400000070646d6464000002c400000088767565640000034c0000008676696577000003d4000000246c756d69000003f8000000146d6561730000
|
||||
040c0000002474656368000004300000000c725452430000043c0000080c675452430000043c0000080c625452430000043c0000080c7465787400000000436f
|
||||
70797269676874202863292031393938204865776c6574742d5061636b61726420436f6d70616e79000064657363000000000000001273524742204945433631
|
||||
3936362d322e31000000000000000000000012735247422049454336313936362d322e3100000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000058595a20000000000000f35100010000000116cc58595a20000000000000000000000000000000005859
|
||||
5a200000000000006fa2000038f50000039058595a2000000000000062990000b785000018da58595a2000000000000024a000000f840000b6cf646573630000
|
||||
00000000001649454320687474703a2f2f7777772e6965632e636800000000000000000000001649454320687474703a2f2f7777772e6965632e636800000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000000064657363000000000000002e4945432036313936362d
|
||||
322e312044656661756c742052474220636f6c6f7572207370616365202d207352474200000000000000000000002e4945432036313936362d322e3120446566
|
||||
61756c742052474220636f6c6f7572207370616365202d20735247420000000000000000000000000000000000000000000064657363000000000000002c5265
|
||||
666572656e63652056696577696e6720436f6e646974696f6e20696e2049454336313936362d322e3100000000000000000000002c5265666572656e63652056
|
||||
696577696e6720436f6e646974696f6e20696e2049454336313936362d322e310000000000000000000000000000000000000000000000000000766965770000
|
||||
00000013a4fe00145f2e0010cf140003edcc0004130b00035c9e0000000158595a2000000000004c09560050000000571fe76d65617300000000000000010000
|
||||
00000000000000000000000000000000028f0000000273696720000000004352542063757276000000000000040000000005000a000f00140019001e00230028
|
||||
002d00320037003b00400045004a004f00540059005e00630068006d00720077007c00810086008b00900095009a009f00a400a900ae00b200b700bc00c100c6
|
||||
00cb00d000d500db00e000e500eb00f000f600fb01010107010d01130119011f0125012b01320138013e0145014c0152015901600167016e0175017c0183018b
|
||||
0192019a01a101a901b101b901c101c901d101d901e101e901f201fa0203020c0214021d0226022f02380241024b0254025d02670271027a0284028e029802a2
|
||||
02ac02b602c102cb02d502e002eb02f50300030b03160321032d03380343034f035a03660372037e038a039603a203ae03ba03c703d303e003ec03f904060413
|
||||
0420042d043b0448045504630471047e048c049a04a804b604c404d304e104f004fe050d051c052b053a05490558056705770586059605a605b505c505d505e5
|
||||
05f6060606160627063706480659066a067b068c069d06af06c006d106e306f507070719072b073d074f076107740786079907ac07bf07d207e507f8080b081f
|
||||
08320846085a086e0882089608aa08be08d208e708fb09100925093a094f09640979098f09a409ba09cf09e509fb0a110a270a3d0a540a6a0a810a980aae0ac5
|
||||
0adc0af30b0b0b220b390b510b690b800b980bb00bc80be10bf90c120c2a0c430c5c0c750c8e0ca70cc00cd90cf30d0d0d260d400d5a0d740d8e0da90dc30dde
|
||||
0df80e130e2e0e490e640e7f0e9b0eb60ed20eee0f090f250f410f5e0f7a0f960fb30fcf0fec1009102610431061107e109b10b910d710f511131131114f116d
|
||||
118c11aa11c911e81207122612451264128412a312c312e31303132313431363138313a413c513e5140614271449146a148b14ad14ce14f01512153415561578
|
||||
159b15bd15e0160316261649166c168f16b216d616fa171d17411765178917ae17d217f7181b18401865188a18af18d518fa19201945196b199119b719dd1a04
|
||||
1a2a1a511a771a9e1ac51aec1b141b3b1b631b8a1bb21bda1c021c2a1c521c7b1ca31ccc1cf51d1e1d471d701d991dc31dec1e161e401e6a1e941ebe1ee91f13
|
||||
1f3e1f691f941fbf1fea20152041206c209820c420f0211c2148217521a121ce21fb22272255228222af22dd230a23382366239423c223f0241f244d247c24ab
|
||||
24da250925382568259725c725f726272657268726b726e827182749277a27ab27dc280d283f287128a228d429062938296b299d29d02a022a352a682a9b2acf
|
||||
2b022b362b692b9d2bd12c052c392c6e2ca22cd72d0c2d412d762dab2de12e162e4c2e822eb72eee2f242f5a2f912fc72ffe3035306c30a430db3112314a3182
|
||||
31ba31f2322a3263329b32d4330d3346337f33b833f1342b3465349e34d83513354d358735c235fd3637367236ae36e937243760379c37d738143850388c38c8
|
||||
39053942397f39bc39f93a363a743ab23aef3b2d3b6b3baa3be83c273c653ca43ce33d223d613da13de03e203e603ea03ee03f213f613fa23fe24023406440a6
|
||||
40e74129416a41ac41ee4230427242b542f7433a437d43c044034447448a44ce45124555459a45de4622466746ab46f04735477b47c04805484b489148d7491d
|
||||
496349a949f04a374a7d4ac44b0c4b534b9a4be24c2a4c724cba4d024d4a4d934ddc4e254e6e4eb74f004f494f934fdd5027507150bb51065150519b51e65231
|
||||
527c52c75313535f53aa53f65442548f54db5528557555c2560f565c56a956f75744579257e0582f587d58cb591a596959b85a075a565aa65af55b455b955be5
|
||||
5c355c865cd65d275d785dc95e1a5e6c5ebd5f0f5f615fb36005605760aa60fc614f61a261f56249629c62f06343639763eb6440649464e9653d659265e7663d
|
||||
669266e8673d679367e9683f689668ec6943699a69f16a486a9f6af76b4f6ba76bff6c576caf6d086d606db96e126e6b6ec46f1e6f786fd1702b708670e0713a
|
||||
719571f0724b72a67301735d73b87414747074cc7528758575e1763e769b76f8775677b37811786e78cc792a798979e77a467aa57b047b637bc27c217c817ce1
|
||||
7d417da17e017e627ec27f237f847fe5804780a8810a816b81cd8230829282f4835783ba841d848084e3854785ab860e867286d7873b879f8804886988ce8933
|
||||
899989fe8a648aca8b308b968bfc8c638cca8d318d988dff8e668ece8f368f9e9006906e90d6913f91a89211927a92e3934d93b69420948a94f4955f95c99634
|
||||
969f970a977597e0984c98b89924999099fc9a689ad59b429baf9c1c9c899cf79d649dd29e409eae9f1d9f8b9ffaa069a0d8a147a1b6a226a296a306a376a3e6
|
||||
a456a4c7a538a5a9a61aa68ba6fda76ea7e0a852a8c4a937a9a9aa1caa8fab02ab75abe9ac5cacd0ad44adb8ae2daea1af16af8bb000b075b0eab160b1d6b24b
|
||||
b2c2b338b3aeb425b49cb513b58ab601b679b6f0b768b7e0b859b8d1b94ab9c2ba3bbab5bb2ebba7bc21bc9bbd15bd8fbe0abe84beffbf7abff5c070c0ecc167
|
||||
c1e3c25fc2dbc358c3d4c451c4cec54bc5c8c646c6c3c741c7bfc83dc8bcc93ac9b9ca38cab7cb36cbb6cc35ccb5cd35cdb5ce36ceb6cf37cfb8d039d0bad13c
|
||||
d1bed23fd2c1d344d3c6d449d4cbd54ed5d1d655d6d8d75cd7e0d864d8e8d96cd9f1da76dafbdb80dc05dc8add10dd96de1cdea2df29dfafe036e0bde144e1cc
|
||||
e253e2dbe363e3ebe473e4fce584e60de696e71fe7a9e832e8bce946e9d0ea5beae5eb70ebfbec86ed11ed9cee28eeb4ef40efccf058f0e5f172f1fff28cf319
|
||||
f3a7f434f4c2f550f5def66df6fbf78af819f8a8f938f9c7fa57fae7fb77fc07fc98fd29fdbafe4bfedcff6dffffffdb00430005040404040305040404060505
|
||||
06080d0808070708100b0c090d131014131210121214171d1914161c1612121a231a1c1e1f212121141924272420261d202120ffdb0043010506060807080f08
|
||||
080f201512152020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020ffc2001108008500
|
||||
c803011100021101031101ffc4001c0000010501010100000000000000000000020001030405060708ffc4001a01000301010101000000000000000000000102
|
||||
0300040506ffda000c03010002100310000001cef90fb09a72952d6a75cbea4d4e63c8fb5e46b47b7ac13eefbbc600d1ab0076056ce0100416acafcf72f764c5
|
||||
b2c88cb7775e3dda7264f5754ed3360db03601aa1102d6c81bcfca398411c56ce32d9d4101c847bb834e8bbcea88916beadd3e3db6907534b4989384bd3c6b57
|
||||
b1dcfd09846af1ab3660044e582d96ce0b8c977982f6f3d37b5257036b9fabd4fd0f0a4558fa5e5acf3a5d3e2d4eecbbc3604bdbb9a4db0e6118183157c160f8
|
||||
ac5d5a347f15a74410725d6263afe6eaf48eef09d7074996aa0a7cea3eb705d52c4e9e4f68e0af54a83b33289472ae558e432c5c3514af8776343ced624d7f9f
|
||||
a3d1a27b2b798614fb94983918fcddbe1fd74c0e9e4d74deede75e4647644513222110b640a0d842bc5deb940e0149f97b7dab8d374f1985b3e945d821a0957c
|
||||
a376f95fa3c359e5ed1e7f5773006e8e5132b156216c81607917b60d2a294e7185b8dbd53cdbc926255e8bdaf1d6c2a6347c89f57cf3ea72e3db9b690fbf799d
|
||||
368cdd95ca265121608e056f34e8b516a62837675ea796fa5e67a4b9fa6ca6f41fa7f91e7bccf5f3d9f03ba16d27c2d5312d0b197b291d645264e9d652ba065a
|
||||
6af66b2a894f30e8e9c12dae94b894e8b92d67cef4adcab6267b3f67e7208d2bad7396f84dabbcb61277424f49d5a2d869a74074899200d18311192691ea47a9
|
||||
5453a1e6b75ca97edcd61e3ccf93ee36c8e00d09d1b4e85a117571eaa248c8656ab2e55467d044da33a326916bd36d293cf3aef21e9001471538fc3e857d5061
|
||||
015379d829436c2eff003e769c84390442da26d20d608ead92cbeb20a9b20c4ac919908ccf21e7fbce529d6557a79bb6bf0d20451dcace54ca995321c81d9308
|
||||
b1a28d9aadaef3d1759f31024aee19d779d799ee994ad49e7d274a9372b6996f64be8b7535802719f013a225866d9d73111632309e82d93054684df84e1f55ca
|
||||
46c80c88a8b2d8cb315972332915b886da9914b6cfb2c61c63c50cc0be311780b366c74acd95ca8b21e05945d642b3653c0b006d19138d710ce8c810c6224766
|
||||
0483163097af9b9971a73d2e08a8b2cb84a01b239078160632d85b44c076b2bae4d9958098f173833572f51cff00ffc400271000020202020005050101000000
|
||||
0000000102000304110512101314152120222330310632ffda0008010100010502af2d5695cb5d26526ce5d7af51597af22a09996abbf1aea178c707337fa6eb
|
||||
7a0b73ea42f9f6bc2db338bab55388bc6e341c7e34f6fc79edb8f3dab167b56346e130ccf65c5598bc7d78d67e9e62deb429d9dcdcabee7c44eb5b41e0cea82f
|
||||
e5f168193fe8ec2389cff5b87fb39bbbf3d66769b982bdb2291aa8c1e19950bb1eea4d76389c2e6fa4cf0db1fa9ce9390b7cce401d0ed17e4f169b71f00c1e0c
|
||||
36398c6eb1c7c3ec1e173c65e16e6ff4e5375aafa6c1779b15a24e213f10fa732916d194a68b9da70f9de93391c3afe9cf7d56ca1a5b88ad2ca1e995640338f4
|
||||
e947d2dfcff418718cdce073fd462fe9cfb3ee8cc00bed16b5184ad70b854bea84f553b09dd67710b4cda45d466d268c99c6659c4cda6e4b2bee27713b4dcd89
|
||||
b13719beccdb8b65f9c105b9165cd8f474984377dcdf954f835408b0148d7f90afcbdd64a33b2ae4e4b1fb5c38d768386b8cc3c6cfc756f702455c896ae96ea4
|
||||
6a7c18287f36c53d2c3d71f27cc46d59735552a0dce3ff00e06364315c6b84f4f6c366e7c464ada3a56b2fed282cb120eb0d9d51722d8320c39680faca8c3954
|
||||
439b54f5f5cb6faecacd8b3ad6d0e35261c1130686088ee816c46f0dcdc0e561b0c2e67dd2f6b02d64ec749f6cd88f5abcb53cbf1dee323b0eb9ca557334b4e4
|
||||
7508c138dfe7c19d04d389b84fcf710dc04acb5a3a58a2d606b1c960ac1cb7193de38b9ef5c64f7bc09ef58f0729483ef16caf90e46d38eb94cb563574c2cb37
|
||||
3e3c35353460b277967f31f1bcf643522e552f90130eae9e8717630f1c4f4f8f3c8a27934cf2699e5d027998c91b35046cdc833d466f9899d5c5bd1e6c78ee6e
|
||||
6ccdec16962069d6043111a00c225b6082f58b6033737a9e609e66e7e49f93c3e2796860362c195608996a63d8e5aab3ecdf81f99a86a0604eb06a09a9d60dac
|
||||
5bf5058ad3e3c373b426769b9b3e0c81a75759e75cb036fe9fecd6a0fa089f316c6116f9d819a9a3e3b80cdc30995b180fd0209a83e8d4226a76222d8606dc33
|
||||
535e061319a7ffc40029110002020103030402020300000000000000010211031012131420210430314122324051425271ffda0008010301013f01e75b459059
|
||||
50b244e58d8e71334d367a56a8f54ed32bd9c58b77c8b11b628b123d5cabc1063f5733abc875990eb6675923ac97f473a7f4473a5f44b2a6bdac3e116596633d
|
||||
4cae4458f48c1cbe05e95ff90a38e3f08cb15fb445dd6597d88b2cb3e206577310f4f4f3db224a8911ff0051c6bb6cb2cb2cb225963647cb32ba88c43d22c8cb
|
||||
7c2c648fdd595d97a5e965912d9bcdc6247a97f88fb704e9d13f0c9331ce9d0c631f7c74a2a8c534677e6bba24ff002858c6467ba23efbd13132cbbf04511c5c
|
||||
96ce94e94dacd8cd8c51313fa322a6331ca98fbdca84c4597645511661f102f48e46998da912499e2bc11b26ace338f56cdcc7214af4cdfa98dfe3aa1310b241
|
||||
7d9cd0fece7c7fd8b1d1ff0004e421c91bc721b1f91c46b47a58dd884c421116bec9414be096392d28a1c14858d0e28a4645e0b1fb5626290d9e4de5c5fc95ae
|
||||
e1e48af937c5fd925e28716b4a36336338d9c6718e2977d965a290c64e7b476fe48cf69d448ea247348e591c9239246f917236c99c7238cd838b2bb28a1bd18f
|
||||
4a286b4dc5eb456946d28a36238cd86c66dd1fbf5a5695a228da5e8fddb2fd9bfe4fffc400271100020201030305010101010000000000000102110312132110
|
||||
3031042022415132714061ffda0008010201013f019fa66e64bd3325e9e42f4f33624912c523d2637147ac5f230aa5da9cabc0db63b12ae9815cac92349b66d2
|
||||
364d9364a92fb256fc9cf6a4ad92545144f846055024210e5a4d6df81636fcb3f87a5f7631268d2513e59055148911e993f48cac4ccb1b565f6a86254868d232
|
||||
ae5d18844910e1d084648e8976e86b8a1e2fc1c4998d7cafa3285d26ab921cab1232c3547b715cf4b387e4c985a22bd9432460953a174cd0d2fb504328aa272e
|
||||
192969e0dc1e435a3711ad0e478766376ba648ea4557bd2b1aa20b828aa2465f06456c6868960543e04acc78235726648417831c8dc46ea2528485a0f81a50a0
|
||||
38d74c5fd13fe855d19465f234c7063848795b3fd383511ff492ff00d28a170596722b39341b6cd2fecd2726a66b3273e08b6852470596597d71792bd898bd96
|
||||
7c075fa3a18d53e95eca349a19a24478628d9b4cdb34234c4f81aa3fa5c054fc15d1f5a1a348e2fa210a3621ad46cc4d98fe9b31fd3661fa6d43f4d981b58cdb
|
||||
c68bc68de88f39ba9fd8a66b2d76132cd46a2c691a0aaeb451c0ebaea685919b86b1648fd8e4bebda99e7ad96793495dab351657bafdb7d290e3daa1aff81c7b
|
||||
147fffc40037100001020304080305090100000000000001000203112110223132041213203041517105618123339192a134404450627282b1d1e1ffda000801
|
||||
0100063f02ca665652b036628de46ab1512478545576b1574ea8b5bf3599164591645902c817bb0aeb648b982a78520713bbf4dc9b96799f24760c97741e7373
|
||||
e2b61f4aee3021e75dc737a84e6bacd471f671388544f2a6e39dd0296eed00b263109b337db43c329d10733357a96877533de2d4e866c13371f42811c222d98c
|
||||
100534746efed982d0c71bcda70836caad935426cb9a975aee636b9a5399635d3ba6850703c09a701c954ad56299c56b72689a23a53708aa2f72f6508a94766a
|
||||
29b44d652a8d280ab82a43430015ec6dd6d6a5ae76ccd55682d73bcd4f665642b2158aaaab66a90fe8a90dc7b291d0dcab064b0540aac0b2a92aacca86c9b5f5
|
||||
1c955640561257627c56a8a96a93db3dcc563662b321227d148cfe3b955564c750ba85fed0aeaa8d3eaae43791d94e2682f1e6da2d6911e4f0b5ccbd13a2c8c8
|
||||
d2da1b3c9557557184a996482241527c6d4eed2bed7f0615efe21ed08afc43bb4357745d31dfc153c374b3e8a70fc0e2cfd02bbe06ff009d4a1781fab9ca7a4b
|
||||
2143fd2caabbae7f7bcbbfb5592af00a9c58a18cef5283585a02019a4ecc04369126eea1722b92e4b95b8aff008aec37395c820775388dd76f4064af35d0fb85
|
||||
4703c19852598ac4aa2a99aad15372eae4ab6cc2bb10faabc26ab4536b95ec787457adc38375df70afe49fffc400281000030001030304020301010000000000
|
||||
00011121314161105171208191a1b1c1d1e1f030f1ffda0008010100013f21350b67f11946af629e589cb0d081f4acc914840cd036363f53549a37b9ed0742d9
|
||||
e0c759bac6d3c1a175adf8e8a2b47c1d90e30dbb3e0ffc01cc35bf88fd043766e9b0d8dfaf4e0e06f8285d2b3f22302d122bd28bb22e479a8adb2119c3656eca
|
||||
70bd98d8fa31b2fa9b8a97ab4518af4e93ca159ccfd06f18572c323472e5c2f86216d32946c6fd7745a275309a2a63078105ee6226d8e84ea1f6b086e6658ca9
|
||||
fa0084bd6fa6af819836dcc29c2d09794e8f83225facb4fb18a242e8f52b830431e0cc3178301a55a8bff1e56c15aa16bc19a7034b0cb2acaaf96265213a2d0c
|
||||
4b69af49b2756a8eee784c5e9a5296a3928c2da0b29b753b2468b979c9f8fd0bbc4f725af495d188b04d56a1ca58b81b35f041265a68e53948ee41cc5b71a255
|
||||
884c1806301d12d87f717baee3235941b99fd0584f032aa3a450d6178d847715eb0813685b98bf73f8cc6a19914d9b197535dd94eee4516a84eb44b0ab7b12ec
|
||||
79379c8af2dab52376f68d207112435bd1de64a455e4d60909b20f818a310d01f608f7c2217bc93486bf9670508741b16bf186f59291865d19195028b2b1e9ba
|
||||
c861b0740b3ca1bf94bb9a7b78b1cdae105ac41b6d77625c15ba34a8636868a3a411455e4677e84dbe45802a1acd72317f627da8e1468ecf71e97eaef23cab16
|
||||
a269e4b5ca9cdb007a0978359f935135b27ec5e169921e40370a497217ba29065ca94d6b50e74dec7414cd062fd435ea9e4d29a0ccd42d7036af2356ccbe25c0
|
||||
e32cb6fd0423aff8f8124c27a7bc099f426bf7326a2a5b90abadbeed3f826c2e025f26507ee37c96db35eff900ac4d5b1c1d092d887aa38b185387ba1a771ee6
|
||||
a9fd1859181558e893171b3a256b125772824cad36ce61a1af8a33dd5ec7f9412fb7d1e1f92aad7e4e27b5185f14a1f7d21e4c1e95271feb22ef8f627956cc57
|
||||
b929ca409cb0f28bd4c8f587d0471911453f229334e480d8c6c1c9c80d4744e2860778d0f413e469968124da57b8db6a36211f7583631db23169f03b81c8c98a
|
||||
3268522b794f27784bb191289b1da44190c5f5cd121a73237664482920cfa0d2e8347a1c827dce00dc23d6e4904c793429204fa3a15137b928d223c1cb8878c3
|
||||
a375a31a87ca141f507eb2b746ba309485d48592040c2d0be9d88431a8c21878e48fffda000c030100020003000000106b0631a8f6e44e0c76cfc6a197c34b9c
|
||||
d5a5e0fb54038412dabf4d200da8ebcf66ea2642070e2ecb9e6ef3daaf46c64184b704900e32fd4ed133cd5d6d80b812412cbe809ae198e77100949233ff002e
|
||||
20ddb23398c9b0917cb5da4fe182bd6b94f1d5ab100351287c7902e5b68139144dd8dc3e3dfbc7e8199d0098cd819fcd9cdd48ab2249280e782fc18ff9527a96
|
||||
29a4292210f465cc52715aba0ea39c6ebfffc400271101010100020202010304030000000000010011213110415161207191f03081b1d1c1e1f1ffda00080103
|
||||
01013f104389cc21c966bb8b6ddacdbb30e6e1760317682083f226bd220c3823aa4ead38b03fa0ff009f083a09fabc1fa37d65a708bdc374189867bb39820fcb
|
||||
21cfe2c672e5900fd5f13966f2139790bd8fdce02e184441e37f003e0359e01702e4ce9c8e66d0fae3c5cd93bee548c266afa7fcda31820ba967f002116b1cae
|
||||
5c401e28190d75f05360df682ce5bdd2504f71e0b2ccdf0d8847cc03a472e6757b9b1c1fa7efff0050b2596db9974cd62e4b9b74dc65294b2db6c30ca1b2cbe4
|
||||
4c0b7112c96d89e3708f64e79653ee52cb2cb6db107cc11db166992afecff3f7956a1babebb1ec9336f6adf2571cf4ca665996d806b6c693e2316f81e27b2e27
|
||||
df3fbc4610c28c4625c382f62678cfce4c08632d91c7814eed516ba5b08788d679e0e667cc46407d2c3d6ec246905c6cb4d5847521f5690c0507cc07b9c81663
|
||||
a10cb0624b3af0294a224575e37773bee79e463d486a41f57c39f8eed92b64cb2db6cbe46123ede5053211d42e98e82dac664b938bb865bb0e522a771236dfe3
|
||||
fead3ff1ff0057f2e7c1ab5768ce7af1b16c30afb453e47ac1acd36777399d38253d4bfa95f536c296deaf84c27647cdb21c42ecf036186356bc0bb2b94e4891
|
||||
ebc0697dac3259b6885e00f86acbddadf56d90109ee594b2ccccc925996dbe0b3c02d41f3620b84060782ca6d999667c659e062134f231110c2b65999966dfe8
|
||||
0c30dbe06186dbffc4002511010101000203000104020300000000000100111031214151a120618191b1d1c1e1f1ffda0008010201013f10eede090f86cde188
|
||||
f522993dcc8e692bb9c16df12cf19659270fe33c87715e178409e6daf81ff5c06bb6036d03d36becfb6c7460fbac5870f1965924927058274c709b8fdff88c61
|
||||
1f6ba664e8d58599e72cb2c9249b4f3fa20e05fd99fdff00d42110cf08009c5e3fd91b2db780b2cb2c9261f1c26cce08e118897cf01c3727e45e2d9cf32a1e9b
|
||||
620820b2cb2c9274842e92463c30bf86e144125a31eac10e2ebbb20fb11041671964969772ed7a45b89642fde18390e9790f00bce9d30701071965925dec2d3d
|
||||
465aca26c40fe5261b2e15a0dc9f00b098237252c6218638c25584cb1b133bb116c3a7db6794dff3e61acb6789312be7b00796cbdd9f6f1c09e586e9d41eac5b
|
||||
3d65e1e5115347867833a3f69cee5f0caf575accfd96badbcbb82d59d3b0e6c1c3f90fa12f9914a60dd1bc935ead1eecedfb26b5f6bc026df0c2b82a19698585
|
||||
90e5919a5911213de98888076cabb603a8b599fd5e4e136193931d3abc7026143f46c91a93c197af01f6ff008ff71ee7f25876fe4b07b3fbbc38fe565ece5384
|
||||
895f778982cfc5ea58c2f58bcac1e0bacf51f68fb22d8c3ff75fbbf982f7f9b56afe63af927d30fa9ec43f4dfbad271e999725d8b320d9cee17636a7ee0bdcff
|
||||
0036bd4bedc6c6a71df05e9c9d46f623ea2e4c16c84478eb852d8ec6c42a9da7e251df1bfa326ce0642f76271645bc1e235c78b3e43f6268cb4a489c6fe878ce
|
||||
05267031c8f1bceb0c360c1271b6f19044ffc400261001000202010305000203000000000000010011213141516171108191a1b1c1e1d1f0f1ffda0008010100
|
||||
013f106a73081d65206d754f9e528f6a31aa8a2417d25049ef0c54150cf760d9b8d3da69f12ff497a1083061b1dc4ea2a1babdc7d60e1e5f78b969955bb8c17c
|
||||
acb2b91f134fd21e710016fb200a3e385383f69b3f8517b53f647b0a1e84b0b8b059901a9b0b2e817ea0bea420f58a4c3823486dfc8f7b6b65611c16e282ad41
|
||||
19940776a3fb25a306218e8a7955411a7ba1a06ca6d7f04d18d4e0852ac688bd07d443d2e7a08075ac0f2d1f8ca2cd46d45cb20eb0ea2cf8c67f89627917bebe
|
||||
aa0b187042585d2186a5dc2c05e652c28971c152e3858611136465f4192199999832e0c2b9ac4c7a49ceabfbb950b8fb6287494617479963f2574800f68ac954
|
||||
09783a605f03907112399429a85b12565a615e6ce630c36f41a95a8b2e5c194a3a6fd414000f55b9bb0b1981613acc65db29de86f67f83efd16c4a48220950f5
|
||||
a554099b5972455371298c1781e18004036730bc1bf55972e5cb9443a7de56098841cc5d79911e91f4e6acc5aa03f90fe7a54a8461182d89b65ce86c88ac30c8
|
||||
962710ac42a5729c3e8692e2e22cbf51a0b0ccd9542cadc0c4036cca42a44a7c4bcd6c1b7eae5460cf6b81f16f79c28c18870d546bd23acdccb13307c59ab942
|
||||
42df18cb2f1820de29e6186122303dc09c65bc23d6947183282b2f451084e2a5f8a9f76e2b35d6ae38b4d5e04afce655b61ae513f0fd967e5575d453f631516f
|
||||
a06e2ba8e405e298e834be60758d11b85310e3710b2a106d48a8b5ce5091017820850bd584af37a038250c9e59596f726ea68f8ce66dd656058a7497b36d406e
|
||||
1a45a95728226bd721cc1c415d8a5825059302b022e3761d0387bc52e2594ab58b51531a9530f78d4a08232c15fb91101744b218b788540f7a181e68f50f961b
|
||||
bf662621fad162bb827544c5ae521f1a1a5ea092d4a4ea471431d259e9f80cc415172d6a16e501a5973f3a992b7804b4546570a2e2cfd90ce1e4e2c4bf8b9a97
|
||||
8aadf785dd6bb12985f45e21535f2c3c08f0c7758f94e2bf12ed50ee131c55d897e308bb1fd4b6fcf65528176bde07af9a5e501ed0ce068e122ceed9f83679d4
|
||||
6b0484094ecde6215300ad41d7fac4c933b6781b8a42f217e046258aacdbe496e20bb1b672d388e62054001cda95de51511558e3748665ca8cf4581969ced4f6
|
||||
96590f78a6d5792e3fc119468afa89b9d8b648bb674da00d95e0b8ab869188f4bd40fdf68003e61d3460a2b59a8ad18ca6fd930ccbdc7e88a63c95fccb74bc0b
|
||||
f6156475c2cd48991b175b90edbadb71e0081581c15742794109341b05e5d07b0cbe97b5ff00b595ed53722e09798bc63f0090b180f6250d08e82cb0643ce670
|
||||
d18cb7c29cc23b12b657de39758e344c8103a071e61775a1ff00d599a4401dd36e4ed070095f113350f1275bf92a9156bfe728b1ef7fc41f47b7fa4ca01ef7fe
|
||||
100cfe12ff006da75f88970b4ea2fc852a4e88fb86a7466d622be521be5ab98523ac7f0b21648ed4e52a591cd2d805f9964ca8e0b83e503d79966f50dc0bd26f
|
||||
a83d1a96885ec6f8978b8b16912248d6b1122abc434dfd095a498e8c54ee864a3bd457b116f09ed2fb21c54ab168350ff12b27cebea53d81051043975300bb51
|
||||
999345092c84f28330c75876435eb8260bc01e605c2e183044b8a60458c572e63c13da353dc403097c4a76107883c1021a22e6611790992753c896e44cf5aba9
|
||||
1fa216094904236f31846e62ba865704e489c910b51299c4af5348458c449885f246505b4251fc881b090738778210c41070d44bc45e6386483d677263151c16
|
||||
41ed0eb10c36466a0ed1259166a013094d88a6c83e210c4b2c6e12e67040992585826462a3ac5a84b89527ffd9}}}}}
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Fusce vitae vestibulum velit. Pellentesque vulputate lectus quis pellentesque commodo. Aliquam erat volutpat. Vestibulum in egestas velit. Pellentesque fermentum nisl vitae fringilla venenatis. Etiam id mauris vitae orci maximus ultricies. Cras fringilla ipsum magna, in fringilla dui commodo a.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Etiam vehicula luctus fermentum. In vel metus congue, pulvinar lectus vel, fermentum dui. Maecenas ante orci, egestas ut aliquet sit amet, sagittis a magna. Aliquam ante quam, pellentesque ut dignissim quis, laoreet eget est. Aliquam erat volutpat. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut ullamcorper justo sapien, in cursus libero viverra eget. Vivamus auctor imperdiet urna, at pulvinar leo posuere laoreet. Suspendisse neque nisl, fringilla at iaculis scelerisque, ornare vel dolor. Ut et pulvinar nunc. Pellentesque fringilla mollis efficitur. Nullam venenatis commodo imperdiet. Morbi velit neque, semper quis lorem quis, efficitur dignissim ipsum. Ut ac lorem sed turpis imperdiet eleifend sit amet id sapien.}
|
||||
\par \pard\plain \s1\ql\nowidctlpar\hyphpar0\sb240\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs36\alang1081\ab\loch\f4\fs36\lang1033{\listtext\pard\plain }\ilvl0\ls1 \li792\ri0\lin792\rin0\fi-432{\rtlch \ltrch\loch
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Nunc ac faucibus odio. Vestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. Nulla eget justo in felis tristique fringilla. Morbi sit amet tortor quis risus auctor condimentum. Morbi in ullamcorper elit. Nulla iaculis tellus sit amet mauris tempus fringilla.}
|
||||
\par \pard\plain \s2\ql\nowidctlpar\hyphpar0\sb200\sa120\keepn\ltrpar\cf17\b\dbch\af9\langfe2052\dbch\af13\afs32\alang1081\ab\loch\f4\fs32\lang1033{\listtext\pard\plain }\ilvl1\ls1 \li936\ri0\lin936\rin0\fi-576{\rtlch \ltrch\loch
|
||||
Maecenas mauris lectus, lobortis et purus mattis, blandit dictum tellus. }
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Maecenas non lorem quis tellus placerat varius. Nulla facilisi. Aenean congue fringilla justo ut aliquam. Mauris id ex erat. Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis. Morbi viverra semper lorem nec molestie. Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate.}
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbxcolumn\shpbxignore\shpleft2819\pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
{\*\flymaincnt5\flyanchor0\flycntnt}{\shp{\*\shpinst\shpwr2\shpwrk3\shpbypara\shpbyignore\shptop0\shpbottom2660\shpbxcolumn\shpbxignore\shpleft2819\shpright6819{\sp{\sn shapeType}{\sv 75}}{\sp{\sn wzDescription}{\sv }}{\sp{\sn wzName}{\sv }}{\sp{\sn pib}{\sv {\pict\picscalex100\picscaley100\piccropl0\piccropr0\piccropt0\piccropb0\picw200\pich133\picwgoal4000\pichgoal2660\jpegblip
|
||||
ffd8ffe000104a46494600010101004800480000ffe20c584943435f50524f46494c4500010100000c484c696e6f021000006d6e74725247422058595a2007ce
|
||||
00020009000600310000616373704d5346540000000049454320735247420000000000000000000000000000f6d6000100000000d32d48502020000000000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000001163707274000001500000003364657363000001840000
|
||||
006c77747074000001f000000014626b707400000204000000147258595a00000218000000146758595a0000022c000000146258595a0000024000000014646d
|
||||
6e640000025400000070646d6464000002c400000088767565640000034c0000008676696577000003d4000000246c756d69000003f8000000146d6561730000
|
||||
040c0000002474656368000004300000000c725452430000043c0000080c675452430000043c0000080c625452430000043c0000080c7465787400000000436f
|
||||
70797269676874202863292031393938204865776c6574742d5061636b61726420436f6d70616e79000064657363000000000000001273524742204945433631
|
||||
3936362d322e31000000000000000000000012735247422049454336313936362d322e3100000000000000000000000000000000000000000000000000000000
|
||||
0000000000000000000000000000000000000000000058595a20000000000000f35100010000000116cc58595a20000000000000000000000000000000005859
|
||||
5a200000000000006fa2000038f50000039058595a2000000000000062990000b785000018da58595a2000000000000024a000000f840000b6cf646573630000
|
||||
00000000001649454320687474703a2f2f7777772e6965632e636800000000000000000000001649454320687474703a2f2f7777772e6965632e636800000000
|
||||
00000000000000000000000000000000000000000000000000000000000000000000000000000000000064657363000000000000002e4945432036313936362d
|
||||
322e312044656661756c742052474220636f6c6f7572207370616365202d207352474200000000000000000000002e4945432036313936362d322e3120446566
|
||||
61756c742052474220636f6c6f7572207370616365202d20735247420000000000000000000000000000000000000000000064657363000000000000002c5265
|
||||
666572656e63652056696577696e6720436f6e646974696f6e20696e2049454336313936362d322e3100000000000000000000002c5265666572656e63652056
|
||||
696577696e6720436f6e646974696f6e20696e2049454336313936362d322e310000000000000000000000000000000000000000000000000000766965770000
|
||||
00000013a4fe00145f2e0010cf140003edcc0004130b00035c9e0000000158595a2000000000004c09560050000000571fe76d65617300000000000000010000
|
||||
00000000000000000000000000000000028f0000000273696720000000004352542063757276000000000000040000000005000a000f00140019001e00230028
|
||||
002d00320037003b00400045004a004f00540059005e00630068006d00720077007c00810086008b00900095009a009f00a400a900ae00b200b700bc00c100c6
|
||||
00cb00d000d500db00e000e500eb00f000f600fb01010107010d01130119011f0125012b01320138013e0145014c0152015901600167016e0175017c0183018b
|
||||
0192019a01a101a901b101b901c101c901d101d901e101e901f201fa0203020c0214021d0226022f02380241024b0254025d02670271027a0284028e029802a2
|
||||
02ac02b602c102cb02d502e002eb02f50300030b03160321032d03380343034f035a03660372037e038a039603a203ae03ba03c703d303e003ec03f904060413
|
||||
0420042d043b0448045504630471047e048c049a04a804b604c404d304e104f004fe050d051c052b053a05490558056705770586059605a605b505c505d505e5
|
||||
05f6060606160627063706480659066a067b068c069d06af06c006d106e306f507070719072b073d074f076107740786079907ac07bf07d207e507f8080b081f
|
||||
08320846085a086e0882089608aa08be08d208e708fb09100925093a094f09640979098f09a409ba09cf09e509fb0a110a270a3d0a540a6a0a810a980aae0ac5
|
||||
0adc0af30b0b0b220b390b510b690b800b980bb00bc80be10bf90c120c2a0c430c5c0c750c8e0ca70cc00cd90cf30d0d0d260d400d5a0d740d8e0da90dc30dde
|
||||
0df80e130e2e0e490e640e7f0e9b0eb60ed20eee0f090f250f410f5e0f7a0f960fb30fcf0fec1009102610431061107e109b10b910d710f511131131114f116d
|
||||
118c11aa11c911e81207122612451264128412a312c312e31303132313431363138313a413c513e5140614271449146a148b14ad14ce14f01512153415561578
|
||||
159b15bd15e0160316261649166c168f16b216d616fa171d17411765178917ae17d217f7181b18401865188a18af18d518fa19201945196b199119b719dd1a04
|
||||
1a2a1a511a771a9e1ac51aec1b141b3b1b631b8a1bb21bda1c021c2a1c521c7b1ca31ccc1cf51d1e1d471d701d991dc31dec1e161e401e6a1e941ebe1ee91f13
|
||||
1f3e1f691f941fbf1fea20152041206c209820c420f0211c2148217521a121ce21fb22272255228222af22dd230a23382366239423c223f0241f244d247c24ab
|
||||
24da250925382568259725c725f726272657268726b726e827182749277a27ab27dc280d283f287128a228d429062938296b299d29d02a022a352a682a9b2acf
|
||||
2b022b362b692b9d2bd12c052c392c6e2ca22cd72d0c2d412d762dab2de12e162e4c2e822eb72eee2f242f5a2f912fc72ffe3035306c30a430db3112314a3182
|
||||
31ba31f2322a3263329b32d4330d3346337f33b833f1342b3465349e34d83513354d358735c235fd3637367236ae36e937243760379c37d738143850388c38c8
|
||||
39053942397f39bc39f93a363a743ab23aef3b2d3b6b3baa3be83c273c653ca43ce33d223d613da13de03e203e603ea03ee03f213f613fa23fe24023406440a6
|
||||
40e74129416a41ac41ee4230427242b542f7433a437d43c044034447448a44ce45124555459a45de4622466746ab46f04735477b47c04805484b489148d7491d
|
||||
496349a949f04a374a7d4ac44b0c4b534b9a4be24c2a4c724cba4d024d4a4d934ddc4e254e6e4eb74f004f494f934fdd5027507150bb51065150519b51e65231
|
||||
527c52c75313535f53aa53f65442548f54db5528557555c2560f565c56a956f75744579257e0582f587d58cb591a596959b85a075a565aa65af55b455b955be5
|
||||
5c355c865cd65d275d785dc95e1a5e6c5ebd5f0f5f615fb36005605760aa60fc614f61a261f56249629c62f06343639763eb6440649464e9653d659265e7663d
|
||||
669266e8673d679367e9683f689668ec6943699a69f16a486a9f6af76b4f6ba76bff6c576caf6d086d606db96e126e6b6ec46f1e6f786fd1702b708670e0713a
|
||||
719571f0724b72a67301735d73b87414747074cc7528758575e1763e769b76f8775677b37811786e78cc792a798979e77a467aa57b047b637bc27c217c817ce1
|
||||
7d417da17e017e627ec27f237f847fe5804780a8810a816b81cd8230829282f4835783ba841d848084e3854785ab860e867286d7873b879f8804886988ce8933
|
||||
899989fe8a648aca8b308b968bfc8c638cca8d318d988dff8e668ece8f368f9e9006906e90d6913f91a89211927a92e3934d93b69420948a94f4955f95c99634
|
||||
969f970a977597e0984c98b89924999099fc9a689ad59b429baf9c1c9c899cf79d649dd29e409eae9f1d9f8b9ffaa069a0d8a147a1b6a226a296a306a376a3e6
|
||||
a456a4c7a538a5a9a61aa68ba6fda76ea7e0a852a8c4a937a9a9aa1caa8fab02ab75abe9ac5cacd0ad44adb8ae2daea1af16af8bb000b075b0eab160b1d6b24b
|
||||
b2c2b338b3aeb425b49cb513b58ab601b679b6f0b768b7e0b859b8d1b94ab9c2ba3bbab5bb2ebba7bc21bc9bbd15bd8fbe0abe84beffbf7abff5c070c0ecc167
|
||||
c1e3c25fc2dbc358c3d4c451c4cec54bc5c8c646c6c3c741c7bfc83dc8bcc93ac9b9ca38cab7cb36cbb6cc35ccb5cd35cdb5ce36ceb6cf37cfb8d039d0bad13c
|
||||
d1bed23fd2c1d344d3c6d449d4cbd54ed5d1d655d6d8d75cd7e0d864d8e8d96cd9f1da76dafbdb80dc05dc8add10dd96de1cdea2df29dfafe036e0bde144e1cc
|
||||
e253e2dbe363e3ebe473e4fce584e60de696e71fe7a9e832e8bce946e9d0ea5beae5eb70ebfbec86ed11ed9cee28eeb4ef40efccf058f0e5f172f1fff28cf319
|
||||
f3a7f434f4c2f550f5def66df6fbf78af819f8a8f938f9c7fa57fae7fb77fc07fc98fd29fdbafe4bfedcff6dffffffdb00430005040404040305040404060505
|
||||
06080d0808070708100b0c090d131014131210121214171d1914161c1612121a231a1c1e1f212121141924272420261d202120ffdb0043010506060807080f08
|
||||
080f201512152020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020202020ffc2001108008500
|
||||
c803011100021101031101ffc4001c0000010501010100000000000000000000020001030405060708ffc4001a01000301010101000000000000000000000102
|
||||
0300040506ffda000c03010002100310000001cef90fb09a72952d6a75cbea4d4e63c8fb5e46b47b7ac13eefbbc600d1ab0076056ce0100416acafcf72f764c5
|
||||
b2c88cb7775e3dda7264f5754ed3360db03601aa1102d6c81bcfca398411c56ce32d9d4101c847bb834e8bbcea88916beadd3e3db6907534b4989384bd3c6b57
|
||||
b1dcfd09846af1ab3660044e582d96ce0b8c977982f6f3d37b5257036b9fabd4fd0f0a4558fa5e5acf3a5d3e2d4eecbbc3604bdbb9a4db0e6118183157c160f8
|
||||
ac5d5a347f15a74410725d6263afe6eaf48eef09d7074996aa0a7cea3eb705d52c4e9e4f68e0af54a83b33289472ae558e432c5c3514af8776343ced624d7f9f
|
||||
a3d1a27b2b798614fb94983918fcddbe1fd74c0e9e4d74deede75e4647644513222110b640a0d842bc5deb940e0149f97b7dab8d374f1985b3e945d821a0957c
|
||||
a376f95fa3c359e5ed1e7f5773006e8e5132b156216c81607917b60d2a294e7185b8dbd53cdbc926255e8bdaf1d6c2a6347c89f57cf3ea72e3db9b690fbf799d
|
||||
368cdd95ca265121608e056f34e8b516a62837675ea796fa5e67a4b9fa6ca6f41fa7f91e7bccf5f3d9f03ba16d27c2d5312d0b197b291d645264e9d652ba065a
|
||||
6af66b2a894f30e8e9c12dae94b894e8b92d67cef4adcab6267b3f67e7208d2bad7396f84dabbcb61277424f49d5a2d869a74074899200d18311192691ea47a9
|
||||
5453a1e6b75ca97edcd61e3ccf93ee36c8e00d09d1b4e85a117571eaa248c8656ab2e55467d044da33a326916bd36d293cf3aef21e9001471538fc3e857d5061
|
||||
015379d829436c2eff003e769c84390442da26d20d608ead92cbeb20a9b20c4ac919908ccf21e7fbce529d6557a79bb6bf0d20451dcace54ca995321c81d9308
|
||||
b1a28d9aadaef3d1759f31024aee19d779d799ee994ad49e7d274a9372b6996f64be8b7535802719f013a225866d9d73111632309e82d93054684df84e1f55ca
|
||||
46c80c88a8b2d8cb315972332915b886da9914b6cfb2c61c63c50cc0be311780b366c74acd95ca8b21e05945d642b3653c0b006d19138d710ce8c810c6224766
|
||||
0483163097af9b9971a73d2e08a8b2cb84a01b239078160632d85b44c076b2bae4d9958098f173833572f51cff00ffc400271000020202020005050101000000
|
||||
0000000102000304110512101314152120222330310632ffda0008010100010502af2d5695cb5d26526ce5d7af51597af22a09996abbf1aea178c707337fa6eb
|
||||
7a0b73ea42f9f6bc2db338bab55388bc6e341c7e34f6fc79edb8f3dab167b56346e130ccf65c5598bc7d78d67e9e62deb429d9dcdcabee7c44eb5b41e0cea82f
|
||||
e5f168193fe8ec2389cff5b87fb39bbbf3d66769b982bdb2291aa8c1e19950bb1eea4d76389c2e6fa4cf0db1fa9ce9390b7cce401d0ed17e4f169b71f00c1e0c
|
||||
36398c6eb1c7c3ec1e173c65e16e6ff4e5375aafa6c1779b15a24e213f10fa732916d194a68b9da70f9de93391c3afe9cf7d56ca1a5b88ad2ca1e995640338f4
|
||||
e947d2dfcff418718cdce073fd462fe9cfb3ee8cc00bed16b5184ad70b854bea84f553b09dd67710b4cda45d466d268c99c6659c4cda6e4b2bee27713b4dcd89
|
||||
b13719beccdb8b65f9c105b9165cd8f474984377dcdf954f835408b0148d7f90afcbdd64a33b2ae4e4b1fb5c38d768386b8cc3c6cfc756f702455c896ae96ea4
|
||||
6a7c18287f36c53d2c3d71f27cc46d59735552a0dce3ff00e06364315c6b84f4f6c366e7c464ada3a56b2fed282cb120eb0d9d51722d8320c39680faca8c3954
|
||||
439b54f5f5cb6faecacd8b3ad6d0e35261c1130686088ee816c46f0dcdc0e561b0c2e67dd2f6b02d64ec749f6cd88f5abcb53cbf1dee323b0eb9ca557334b4e4
|
||||
7508c138dfe7c19d04d389b84fcf710dc04acb5a3a58a2d606b1c960ac1cb7193de38b9ef5c64f7bc09ef58f0729483ef16caf90e46d38eb94cb563574c2cb37
|
||||
3e3c35353460b277967f31f1bcf643522e552f90130eae9e8717630f1c4f4f8f3c8a27934cf2699e5d027998c91b35046cdc833d466f9899d5c5bd1e6c78ee6e
|
||||
6ccdec16962069d6043111a00c225b6082f58b6033737a9e609e66e7e49f93c3e2796860362c195608996a63d8e5aab3ecdf81f99a86a0604eb06a09a9d60dac
|
||||
5bf5058ad3e3c373b426769b9b3e0c81a75759e75cb036fe9fecd6a0fa089f316c6116f9d819a9a3e3b80cdc30995b180fd0209a83e8d4226a76222d8606dc33
|
||||
535e061319a7ffc40029110002020103030402020300000000000000010211031012131420210430314122324051425271ffda0008010301013f01e75b459059
|
||||
50b244e58d8e71334d367a56a8f54ed32bd9c58b77c8b11b628b123d5cabc1063f5733abc875990eb6675923ac97f473a7f4473a5f44b2a6bdac3e116596633d
|
||||
4cae4458f48c1cbe05e95ff90a38e3f08cb15fb445dd6597d88b2cb3e206577310f4f4f3db224a8911ff0051c6bb6cb2cb2cb225963647cb32ba88c43d22c8cb
|
||||
7c2c648fdd595d97a5e965912d9bcdc6247a97f88fb704e9d13f0c9331ce9d0c631f7c74a2a8c534677e6bba24ff002858c6467ba23efbd13132cbbf04511c5c
|
||||
96ce94e94dacd8cd8c51313fa322a6331ca98fbdca84c4597645511661f102f48e46998da912499e2bc11b26ace338f56cdcc7214af4cdfa98dfe3aa1310b241
|
||||
7d9cd0fece7c7fd8b1d1ff0004e421c91bc721b1f91c46b47a58dd884c421116bec9414be096392d28a1c14858d0e28a4645e0b1fb5626290d9e4de5c5fc95ae
|
||||
e1e48af937c5fd925e28716b4a36336338d9c6718e2977d965a290c64e7b476fe48cf69d448ea247348e591c9239246f917236c99c7238cd838b2bb28a1bd18f
|
||||
4a286b4dc5eb456946d28a36238cd86c66dd1fbf5a5695a228da5e8fddb2fd9bfe4fffc400271100020201030305010101010000000000000102110312132110
|
||||
3031042022415132714061ffda0008010201013f019fa66e64bd3325e9e42f4f33624912c523d2637147ac5f230aa5da9cabc0db63b12ae9815cac92349b66d2
|
||||
364d9364a92fb256fc9cf6a4ad92545144f846055024210e5a4d6df81636fcb3f87a5f7631268d2513e59055148911e993f48cac4ccb1b565f6a86254868d232
|
||||
ae5d18844910e1d084648e8976e86b8a1e2fc1c4998d7cafa3285d26ab921cab1232c3547b715cf4b387e4c985a22bd9432460953a174cd0d2fb504328aa272e
|
||||
192969e0dc1e435a3711ad0e478766376ba648ea4557bd2b1aa20b828aa2465f06456c6868960543e04acc78235726648417831c8dc46ea2528485a0f81a50a0
|
||||
38d74c5fd13fe855d19465f234c7063848795b3fd383511ff492ff00d28a170596722b39341b6cd2fecd2726a66b3273e08b6852470596597d71792bd898bd96
|
||||
7c075fa3a18d53e95eca349a19a24478628d9b4cdb34234c4f81aa3fa5c054fc15d1f5a1a348e2fa210a3621ad46cc4d98fe9b31fd3661fa6d43f4d981b58cdb
|
||||
c68bc68de88f39ba9fd8a66b2d76132cd46a2c691a0aaeb451c0ebaea685919b86b1648fd8e4bebda99e7ad96793495dab351657bafdb7d290e3daa1aff81c7b
|
||||
147fffc40037100001020304080305090100000000000001000203112110223132041213203041517105618123339192a134404450627282b1d1e1ffda000801
|
||||
0100063f02ca665652b036628de46ab1512478545576b1574ea8b5bf3599164591645902c817bb0aeb648b982a78520713bbf4dc9b96799f24760c97741e7373
|
||||
e2b61f4aee3021e75dc737a84e6bacd471f671388544f2a6e39dd0296eed00b263109b337db43c329d10733357a96877533de2d4e866c13371f42811c222d98c
|
||||
100534746efed982d0c71bcda70836caad935426cb9a975aee636b9a5399635d3ba6850703c09a701c954ad56299c56b72689a23a53708aa2f72f6508a94766a
|
||||
29b44d652a8d280ab82a43430015ec6dd6d6a5ae76ccd55682d73bcd4f665642b2158aaaab66a90fe8a90dc7b291d0dcab064b0540aac0b2a92aacca86c9b5f5
|
||||
1c955640561257627c56a8a96a93db3dcc563662b321227d148cfe3b955564c750ba85fed0aeaa8d3eaae43791d94e2682f1e6da2d6911e4f0b5ccbd13a2c8c8
|
||||
d2da1b3c9557557184a996482241527c6d4eed2bed7f0615efe21ed08afc43bb4357745d31dfc153c374b3e8a70fc0e2cfd02bbe06ff009d4a1781fab9ca7a4b
|
||||
2143fd2caabbae7f7bcbbfb5592af00a9c58a18cef5283585a02019a4ecc04369126eea1722b92e4b95b8aff008aec37395c820775388dd76f4064af35d0fb85
|
||||
4703c19852598ac4aa2a99aad15372eae4ab6cc2bb10faabc26ab4536b95ec787457adc38375df70afe49fffc400281000030001030304020301010000000000
|
||||
00011121314161105171208191a1b1c1d1e1f030f1ffda0008010100013f21350b67f11946af629e589cb0d081f4acc914840cd036363f53549a37b9ed0742d9
|
||||
e0c759bac6d3c1a175adf8e8a2b47c1d90e30dbb3e0ffc01cc35bf88fd043766e9b0d8dfaf4e0e06f8285d2b3f22302d122bd28bb22e479a8adb2119c3656eca
|
||||
70bd98d8fa31b2fa9b8a97ab4518af4e93ca159ccfd06f18572c323472e5c2f86216d32946c6fd7745a275309a2a63078105ee6226d8e84ea1f6b086e6658ca9
|
||||
fa0084bd6fa6af819836dcc29c2d09794e8f83225facb4fb18a242e8f52b830431e0cc3178301a55a8bff1e56c15aa16bc19a7034b0cb2acaaf96265213a2d0c
|
||||
4b69af49b2756a8eee784c5e9a5296a3928c2da0b29b753b2468b979c9f8fd0bbc4f725af495d188b04d56a1ca58b81b35f041265a68e53948ee41cc5b71a255
|
||||
884c1806301d12d87f717baee3235941b99fd0584f032aa3a450d6178d847715eb0813685b98bf73f8cc6a19914d9b197535dd94eee4516a84eb44b0ab7b12ec
|
||||
79379c8af2dab52376f68d207112435bd1de64a455e4d60909b20f818a310d01f608f7c2217bc93486bf9670508741b16bf186f59291865d19195028b2b1e9ba
|
||||
c861b0740b3ca1bf94bb9a7b78b1cdae105ac41b6d77625c15ba34a8636868a3a411455e4677e84dbe45802a1acd72317f627da8e1468ecf71e97eaef23cab16
|
||||
a269e4b5ca9cdb007a0978359f935135b27ec5e169921e40370a497217ba29065ca94d6b50e74dec7414cd062fd435ea9e4d29a0ccd42d7036af2356ccbe25c0
|
||||
e32cb6fd0423aff8f8124c27a7bc099f426bf7326a2a5b90abadbeed3f826c2e025f26507ee37c96db35eff900ac4d5b1c1d092d887aa38b185387ba1a771ee6
|
||||
a9fd1859181558e893171b3a256b125772824cad36ce61a1af8a33dd5ec7f9412fb7d1e1f92aad7e4e27b5185f14a1f7d21e4c1e95271feb22ef8f627956cc57
|
||||
b929ca409cb0f28bd4c8f587d0471911453f229334e480d8c6c1c9c80d4744e2860778d0f413e469968124da57b8db6a36211f7583631db23169f03b81c8c98a
|
||||
3268522b794f27784bb191289b1da44190c5f5cd121a73237664482920cfa0d2e8347a1c827dce00dc23d6e4904c793429204fa3a15137b928d223c1cb8878c3
|
||||
a375a31a87ca141f507eb2b746ba309485d48592040c2d0be9d88431a8c21878e48fffda000c030100020003000000106b0631a8f6e44e0c76cfc6a197c34b9c
|
||||
d5a5e0fb54038412dabf4d200da8ebcf66ea2642070e2ecb9e6ef3daaf46c64184b704900e32fd4ed133cd5d6d80b812412cbe809ae198e77100949233ff002e
|
||||
20ddb23398c9b0917cb5da4fe182bd6b94f1d5ab100351287c7902e5b68139144dd8dc3e3dfbc7e8199d0098cd819fcd9cdd48ab2249280e782fc18ff9527a96
|
||||
29a4292210f465cc52715aba0ea39c6ebfffc400271101010100020202010304030000000000010011213110415161207191f03081b1d1c1e1f1ffda00080103
|
||||
01013f104389cc21c966bb8b6ddacdbb30e6e1760317682083f226bd220c3823aa4ead38b03fa0ff009f083a09fabc1fa37d65a708bdc374189867bb39820fcb
|
||||
21cfe2c672e5900fd5f13966f2139790bd8fdce02e184441e37f003e0359e01702e4ce9c8e66d0fae3c5cd93bee548c266afa7fcda31820ba967f002116b1cae
|
||||
5c401e28190d75f05360df682ce5bdd2504f71e0b2ccdf0d8847cc03a472e6757b9b1c1fa7efff0050b2596db9974cd62e4b9b74dc65294b2db6c30ca1b2cbe4
|
||||
4c0b7112c96d89e3708f64e79653ee52cb2cb6db107cc11db166992afecff3f7956a1babebb1ec9336f6adf2571cf4ca665996d806b6c693e2316f81e27b2e27
|
||||
df3fbc4610c28c4625c382f62678cfce4c08632d91c7814eed516ba5b08788d679e0e667cc46407d2c3d6ec246905c6cb4d5847521f5690c0507cc07b9c81663
|
||||
a10cb0624b3af0294a224575e37773bee79e463d486a41f57c39f8eed92b64cb2db6cbe46123ede5053211d42e98e82dac664b938bb865bb0e522a771236dfe3
|
||||
fead3ff1ff0057f2e7c1ab5768ce7af1b16c30afb453e47ac1acd36777399d38253d4bfa95f536c296deaf84c27647cdb21c42ecf036186356bc0bb2b94e4891
|
||||
ebc0697dac3259b6885e00f86acbddadf56d90109ee594b2ccccc925996dbe0b3c02d41f3620b84060782ca6d999667c659e062134f231110c2b65999966dfe8
|
||||
0c30dbe06186dbffc4002511010101000203000104020300000000000100111031214151a120618191b1d1c1e1f1ffda0008010201013f10eede090f86cde188
|
||||
f522993dcc8e692bb9c16df12cf19659270fe33c87715e178409e6daf81ff5c06bb6036d03d36becfb6c7460fbac5870f1965924927058274c709b8fdff88c61
|
||||
1f6ba664e8d58599e72cb2c9249b4f3fa20e05fd99fdff00d42110cf08009c5e3fd91b2db780b2cb2c9261f1c26cce08e118897cf01c3727e45e2d9cf32a1e9b
|
||||
620820b2cb2c9274842e92463c30bf86e144125a31eac10e2ebbb20fb11041671964969772ed7a45b89642fde18390e9790f00bce9d30701071965925dec2d3d
|
||||
465aca26c40fe5261b2e15a0dc9f00b098237252c6218638c25584cb1b133bb116c3a7db6794dff3e61acb6789312be7b00796cbdd9f6f1c09e586e9d41eac5b
|
||||
3d65e1e5115347867833a3f69cee5f0caf575accfd96badbcbb82d59d3b0e6c1c3f90fa12f9914a60dd1bc935ead1eecedfb26b5f6bc026df0c2b82a19698585
|
||||
90e5919a5911213de98888076cabb603a8b599fd5e4e136193931d3abc7026143f46c91a93c197af01f6ff008ff71ee7f25876fe4b07b3fbbc38fe565ece5384
|
||||
895f778982cfc5ea58c2f58bcac1e0bacf51f68fb22d8c3ff75fbbf982f7f9b56afe63af927d30fa9ec43f4dfbad271e999725d8b320d9cee17636a7ee0bdcff
|
||||
0036bd4bedc6c6a71df05e9c9d46f623ea2e4c16c84478eb852d8ec6c42a9da7e251df1bfa326ce0642f76271645bc1e235c78b3e43f6268cb4a489c6fe878ce
|
||||
05267031c8f1bceb0c360c1271b6f19044ffc400261001000202010305000203000000000000010011213141516171108191a1b1c1e1d1f0f1ffda0008010100
|
||||
013f106a73081d65206d754f9e528f6a31aa8a2417d25049ef0c54150cf760d9b8d3da69f12ff497a1083061b1dc4ea2a1babdc7d60e1e5f78b969955bb8c17c
|
||||
acb2b91f134fd21e710016fb200a3e385383f69b3f8517b53f647b0a1e84b0b8b059901a9b0b2e817ea0bea420f58a4c3823486dfc8f7b6b65611c16e282ad41
|
||||
19940776a3fb25a306218e8a7955411a7ba1a06ca6d7f04d18d4e0852ac688bd07d443d2e7a08075ac0f2d1f8ca2cd46d45cb20eb0ea2cf8c67f89627917bebe
|
||||
aa0b187042585d2186a5dc2c05e652c28971c152e3858611136465f4192199999832e0c2b9ac4c7a49ceabfbb950b8fb6287494617479963f2574800f68ac954
|
||||
09783a605f03907112399429a85b12565a615e6ce630c36f41a95a8b2e5c194a3a6fd414000f55b9bb0b1981613acc65db29de86f67f83efd16c4a48220950f5
|
||||
a554099b5972455371298c1781e18004036730bc1bf55972e5cb9443a7de56098841cc5d79911e91f4e6acc5aa03f90fe7a54a8461182d89b65ce86c88ac30c8
|
||||
962710ac42a5729c3e8692e2e22cbf51a0b0ccd9542cadc0c4036cca42a44a7c4bcd6c1b7eae5460cf6b81f16f79c28c18870d546bd23acdccb13307c59ab942
|
||||
42df18cb2f1820de29e6186122303dc09c65bc23d6947183282b2f451084e2a5f8a9f76e2b35d6ae38b4d5e04afce655b61ae513f0fd967e5575d453f631516f
|
||||
a06e2ba8e405e298e834be60758d11b85310e3710b2a106d48a8b5ce5091017820850bd584af37a038250c9e59596f726ea68f8ce66dd656058a7497b36d406e
|
||||
1a45a95728226bd721cc1c415d8a5825059302b022e3761d0387bc52e2594ab58b51531a9530f78d4a08232c15fb91101744b218b788540f7a181e68f50f961b
|
||||
bf662621fad162bb827544c5ae521f1a1a5ea092d4a4ea471431d259e9f80cc415172d6a16e501a5973f3a992b7804b4546570a2e2cfd90ce1e4e2c4bf8b9a97
|
||||
8aadf785dd6bb12985f45e21535f2c3c08f0c7758f94e2bf12ed50ee131c55d897e308bb1fd4b6fcf65528176bde07af9a5e501ed0ce068e122ceed9f83679d4
|
||||
6b0484094ecde6215300ad41d7fac4c933b6781b8a42f217e046258aacdbe496e20bb1b672d388e62054001cda95de51511558e3748665ca8cf4581969ced4f6
|
||||
96590f78a6d5792e3fc119468afa89b9d8b648bb674da00d95e0b8ab869188f4bd40fdf68003e61d3460a2b59a8ad18ca6fd930ccbdc7e88a63c95fccb74bc0b
|
||||
f6156475c2cd48991b175b90edbadb71e0081581c15742794109341b05e5d07b0cbe97b5ff00b595ed53722e09798bc63f0090b180f6250d08e82cb0643ce670
|
||||
d18cb7c29cc23b12b657de39758e344c8103a071e61775a1ff00d599a4401dd36e4ed070095f113350f1275bf92a9156bfe728b1ef7fc41f47b7fa4ca01ef7fe
|
||||
100cfe12ff006da75f88970b4ea2fc852a4e88fb86a7466d622be521be5ab98523ac7f0b21648ed4e52a591cd2d805f9964ca8e0b83e503d79966f50dc0bd26f
|
||||
a83d1a96885ec6f8978b8b16912248d6b1122abc434dfd095a498e8c54ee864a3bd457b116f09ed2fb21c54ab168350ff12b27cebea53d81051043975300bb51
|
||||
999345092c84f28330c75876435eb8260bc01e605c2e183044b8a60458c572e63c13da353dc403097c4a76107883c1021a22e6611790992753c896e44cf5aba9
|
||||
1fa216094904236f31846e62ba865704e489c910b51299c4af5348458c449885f246505b4251fc881b090738778210c41070d44bc45e6386483d677263151c16
|
||||
41ed0eb10c36466a0ed1259166a013094d88a6c83e210c4b2c6e12e67040992585826462a3ac5a84b89527ffd9}}}}}
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225\rtlch \ltrch\loch
|
||||
|
||||
\par \pard\plain \s51\sl288\slmult1\ql\nowidctlpar\hyphpar0\sb0\sa140\ltrpar\cf17\dbch\af9\langfe2052\dbch\af13\afs24\alang1081\loch\f3\fs24\lang1033\qj\widctlpar\sb0\sa225{\scaps0\caps0\cf1\expnd0\expndtw0\i0\b0\dbch\af12\rtlch \ltrch\loch\fs21\loch\f8\hich\af8
|
||||
Nunc ac faucibus odio. Vestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. }
|
||||
\par }
|
BIN
modules/files/src/test/resources/examples/sample.xls
Normal file
BIN
modules/files/src/test/resources/examples/sample.xlsx
Normal file
30
modules/files/src/test/resources/letter-de.html
Executable file
@ -0,0 +1,30 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<style>
|
||||
body {
|
||||
padding: 2em 5em;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<pre>
|
||||
<code>
|
||||
Max Mustermann
|
||||
Lilienweg 21
|
||||
12345 Nebendorf
|
||||
E-Mail: max.muster@gmail.com
|
||||
</code>
|
||||
</pre>
|
||||
<p>Max Mustermann, Lilienweg 21, 12345 Nebendorf</p>
|
||||
<p>EasyCare AG<br> Abteilung Buchhaltung<br> Ackerweg 12<br> 12346 Ulmen<br></p>
|
||||
<p>Nebendorf, 3. September 2019</p>
|
||||
<h2>Sehr geehrte Damen und Herren</h2>
|
||||
<p>hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum <em>nächstmöglichen</em> Termin.</p>
|
||||
<p>Bitte senden Sie mir innerhalb der gesetzlichen Frist von <strong>14 Tagen</strong> eine Kündigungsbestätigung zu.</p>
|
||||
<p>Vielen Dank im Vorraus!</p>
|
||||
<p>Mit freundlichen Grüßen</p>
|
||||
<p>Max Mustermann</p>
|
||||
</body>
|
||||
</html>
|
29
modules/files/src/test/resources/letter-de.md
Normal file
@ -0,0 +1,29 @@
|
||||
Max Mustermann
|
||||
Lilienweg 21
|
||||
12345 Nebendorf
|
||||
E-Mail: max.muster@gmail.com
|
||||
|
||||
Max Mustermann, Lilienweg 21, 12345 Nebendorf
|
||||
|
||||
|
||||
EasyCare AG<br>
|
||||
Abteilung Buchhaltung<br>
|
||||
Ackerweg 12<br>
|
||||
12346 Ulmen<br>
|
||||
|
||||
|
||||
Nebendorf, 3. September 2019
|
||||
## Sehr geehrte Damen und Herren,
|
||||
|
||||
hiermit kündige ich meine Mitgliedschaft in der Kranken- und
|
||||
Pflegeversicherung zum *nächstmöglichen* Termin.
|
||||
|
||||
Bitte senden Sie mir innerhalb der gesetzlichen Frist von **14 Tagen**
|
||||
eine Kündigungsbestätigung zu.
|
||||
|
||||
|
||||
Vielen Dank im Vorraus!
|
||||
|
||||
Mit freundlichen Grüßen
|
||||
|
||||
Max Mustermann
|
30
modules/files/src/test/resources/letter-de.txt
Normal file
@ -0,0 +1,30 @@
|
||||
Max Mustermann
|
||||
|
||||
Lilienweg 21
|
||||
|
||||
12345 Nebendorf
|
||||
|
||||
E-Mail: max.muster@gmail.com
|
||||
|
||||
Max Mustermann, Lilienweg 21, 12345 Nebendorf
|
||||
|
||||
EasyCare AG
|
||||
Abteilung Buchhaltung
|
||||
Ackerweg 12
|
||||
|
||||
12346 Ulmen
|
||||
|
||||
Nebendorf, 3. September 2019
|
||||
Sehr geehrte Damen und Herren,
|
||||
|
||||
hiermit kündige ich meine Mitgliedschaft in der Kranken- und Pflegeversicherung zum
|
||||
nächstmöglichen Termin.
|
||||
|
||||
Bitte senden Sie mir innerhalb der gesetzlichen Frist von 14 Tagen eine Kündigungsbe-
|
||||
stätigung zu.
|
||||
|
||||
Vielen Dank im Vorraus!
|
||||
|
||||
Mit freundlichen Grüßen
|
||||
|
||||
Max Mustermann
|
38
modules/files/src/test/resources/letter-en.txt
Normal file
@ -0,0 +1,38 @@
|
||||
Derek Jeter
|
||||
|
||||
123 Elm Ave.
|
||||
|
||||
Treesville, ON M1N 2P3
|
||||
November 7, 2016
|
||||
|
||||
Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016
|
||||
|
||||
Mr. M. Leaf
|
||||
|
||||
Chief of Syrup Production
|
||||
Old Sticky Pancake Company
|
||||
456 Maple Lane
|
||||
|
||||
Forest, ON 7W8 9Y0
|
||||
|
||||
Hemptown, September 3, 2019
|
||||
Dear Mr. Leaf,
|
||||
|
||||
Let me begin by thanking you for your past contributions to our Little League baseball
|
||||
team. Your sponsorship aided in the purchase of ten full uniforms and several pieces of
|
||||
baseball equipment for last year’s season.
|
||||
|
||||
Next month, our company is planning an employee appreciation pancake breakfast hon-
|
||||
oring retired employees for their past years of service and present employees for their
|
||||
loyalty and dedication in spite of the current difficult economic conditions.
|
||||
|
||||
We would like to place an order with your company for 25 pounds of pancake mix and
|
||||
five gallons of maple syrup. We hope you will be able to provide these products in the
|
||||
bulk quantities we require.
|
||||
|
||||
As you are a committed corporate sponsor and long-time associate, we hope that you
|
||||
will be able to join us for breakfast on December 12, 2016.
|
||||
|
||||
Respectfully yours,
|
||||
|
||||
Derek Jeter
|
@ -3,12 +3,12 @@
|
||||
<withJansi>true</withJansi>
|
||||
|
||||
<encoder>
|
||||
<pattern>[%thread] %highlight(%-5level) %cyan(%logger{15}) - %msg %n</pattern>
|
||||
<pattern>%highlight(%-5level) %cyan(%logger{15}) - %msg %n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<logger name="docspell" level="debug" />
|
||||
<root level="INFO">
|
||||
<root level="error">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
BIN
modules/files/src/test/resources/scanner/jfif.jpg
Executable file
After Width: | Height: | Size: 235 KiB |
BIN
modules/files/src/test/resources/scanner/pdf13.pdf
Executable file
BIN
modules/files/src/test/resources/scanner/pdfa14.pdf
Executable file
@ -0,0 +1,14 @@
|
||||
package docspell.files
|
||||
|
||||
import docspell.common._
|
||||
|
||||
trait ExampleFilesSupport {
|
||||
|
||||
def createUrl(resource: String): LenientUri =
|
||||
Option(getClass.getResource("/" + resource)) match {
|
||||
case Some(u) => LenientUri.fromJava(u)
|
||||
case None => sys.error(s"Resource '$resource' not found")
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.implicits._
|
||||
import cats.effect.{Blocker, IO}
|
||||
import minitest.SimpleTestSuite
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
import scala.util.Using
|
||||
|
||||
object ImageSizeTest extends SimpleTestSuite {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
||||
|
||||
//tiff files are not supported on the jdk by default
|
||||
//requires an external library
|
||||
val files = List(
|
||||
ExampleFiles.camera_letter_en_jpg -> Dimension(1695, 2378),
|
||||
ExampleFiles.camera_letter_en_png -> Dimension(1695, 2378),
|
||||
// ExampleFiles.camera_letter_en_tiff -> Dimension(1695, 2378),
|
||||
ExampleFiles.scanner_jfif_jpg -> Dimension(2480, 3514),
|
||||
ExampleFiles.bombs_20K_gray_jpeg -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_gray_png -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_rgb_jpeg -> Dimension(20000, 20000),
|
||||
ExampleFiles.bombs_20K_rgb_png -> Dimension(20000, 20000)
|
||||
)
|
||||
|
||||
test("get sizes from input-stream") {
|
||||
files.foreach {
|
||||
case (uri, expect) =>
|
||||
val url = uri.toJavaUrl.fold(sys.error, identity)
|
||||
Using.resource(url.openStream()) { in =>
|
||||
val dim = ImageSize.get(in)
|
||||
assertEquals(dim, expect.some)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("get sizes from stream") {
|
||||
files.foreach {
|
||||
case (uri, expect) =>
|
||||
val stream = uri.readURL[IO](8192, blocker)
|
||||
val dim = ImageSize.get(stream).unsafeRunSync()
|
||||
assertEquals(dim, expect.some)
|
||||
}
|
||||
}
|
||||
}
|
25
modules/files/src/test/scala/docspell/files/Playing.scala
Normal file
@ -0,0 +1,25 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.effect.{Blocker, ExitCode, IO, IOApp}
|
||||
import docspell.common.MimeTypeHint
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
|
||||
object Playing extends IOApp {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
|
||||
|
||||
def run(args: List[String]): IO[ExitCode] = IO {
|
||||
//val ods = ExampleFiles.examples_sample_ods.readURL[IO](8192, blocker)
|
||||
//val odt = ExampleFiles.examples_sample_odt.readURL[IO](8192, blocker)
|
||||
val rtf = ExampleFiles.examples_sample_rtf.readURL[IO](8192, blocker)
|
||||
|
||||
val x = for {
|
||||
odsm1 <- TikaMimetype.detect(rtf,
|
||||
MimeTypeHint.filename(ExampleFiles.examples_sample_rtf.path.segments.last))
|
||||
odsm2 <- TikaMimetype.detect(rtf, MimeTypeHint.none)
|
||||
} yield (odsm1, odsm2)
|
||||
println(x.unsafeRunSync())
|
||||
ExitCode.Success
|
||||
}
|
||||
}
|
29
modules/files/src/test/scala/docspell/files/TestFiles.scala
Normal file
@ -0,0 +1,29 @@
|
||||
package docspell.files
|
||||
|
||||
import cats.effect.{Blocker, IO}
|
||||
import fs2.Stream
|
||||
|
||||
import scala.concurrent.ExecutionContext
|
||||
|
||||
object TestFiles {
|
||||
val blocker = Blocker.liftExecutionContext(ExecutionContext.global)
|
||||
implicit val CS = IO.contextShift(ExecutionContext.global)
|
||||
|
||||
val letterSourceDE: Stream[IO, Byte] =
|
||||
ExampleFiles.letter_de_pdf
|
||||
.readURL[IO](8 * 1024, blocker)
|
||||
|
||||
val letterSourceEN: Stream[IO, Byte] =
|
||||
ExampleFiles.letter_en_pdf
|
||||
.readURL[IO](8 * 1024, blocker)
|
||||
|
||||
lazy val letterDEText =
|
||||
ExampleFiles.letter_de_txt
|
||||
.readText[IO](8 * 1024, blocker)
|
||||
.unsafeRunSync
|
||||
|
||||
lazy val letterENText =
|
||||
ExampleFiles.letter_en_txt
|
||||
.readText[IO](8 * 1024, blocker)
|
||||
.unsafeRunSync
|
||||
}
|