From 8143a4edccde20c146e3b180d598603dfafc27d1 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sun, 16 Feb 2020 21:37:26 +0100 Subject: [PATCH] Adding extraction primitives --- build.sbt | 4 +- .../scala/docspell/analysis/TestFiles.scala | 21 - .../docspell/analysis/date/DateFindSpec.scala | 2 +- .../analysis/nlp/TextAnalyserSuite.scala | 38 +- .../docspell/convert/flexmark/Markdown.scala | 69 ++ .../convert/flexmark/MarkdownConfig.scala | 3 + modules/extract/NOTICE | 11 + .../odf/NSNormalizerContentHandler.java | 99 +++ .../parser/odf/OpenDocumentContentParser.java | 606 ++++++++++++++++++ .../parser/odf/OpenDocumentMetaParser.java | 199 ++++++ .../tika/parser/odf/OpenDocumentParser.java | 256 ++++++++ .../parser/xml/AbstractMetadataHandler.java | 93 +++ .../AttributeDependantMetadataHandler.java | 82 +++ .../parser/xml/AttributeMetadataHandler.java | 61 ++ .../apache/tika/parser/xml/DcXMLParser.java | 60 ++ .../parser/xml/ElementMetadataHandler.java | 241 +++++++ .../tika/parser/xml/FictionBookParser.java | 114 ++++ .../tika/parser/xml/MetadataHandler.java | 85 +++ .../org/apache/tika/parser/xml/XMLParser.java | 90 +++ .../docspell/extract/ExtractResult.scala | 29 + .../docspell/extract/odf/OdfExtract.scala | 30 + .../extract/pdfbox/PdfboxExtract.scala | 34 + .../docspell/extract/poi/PoiExtract.scala | 85 +++ .../scala/docspell/extract/poi/PoiTypes.scala | 16 + .../docspell/extract/rtf/RtfExtract.scala | 24 + .../extract/ocr/TextExtractionSuite.scala | 13 +- .../docspell/extract/odf/OdfExtractTest.scala | 28 + .../extract/pdfbox/PdfboxExtractTest.scala | 48 ++ .../docspell/extract/poi/PoiExtractTest.scala | 39 ++ .../docspell/extract/rtf/RtfExtractTest.scala | 14 + .../main/scala/docspell/files/Dimension.scala | 7 + .../main/scala/docspell/files/ImageSize.scala | 61 ++ .../src/test/resources/bombs/20K-gray.jpeg | Bin 0 -> 1562661 bytes .../src/test/resources/bombs/20K-gray.png | Bin 0 -> 48829 bytes .../src/test/resources/bombs/20K-rgb.jpeg | Bin 0 -> 2344037 bytes .../src/test/resources/bombs/20K-rgb.png | Bin 0 -> 1207693 bytes .../files/src/test/resources/letter-en.txt | 6 +- .../src/test/resources/logback-test.xml} | 4 +- .../scala/docspell/files/ImageSizeTest.scala | 46 ++ .../scala/docspell/files}/TestFiles.scala | 13 +- modules/microsite/docs/dev/adr.md | 5 + .../docs/dev/adr/0006_more-file-types.md | 18 +- .../docs/dev/adr/0011_extract_text.md | 77 +++ .../docs/dev/adr/img/process-files.png | Bin 0 -> 50465 bytes .../microsite/docs/dev/adr/process-files.puml | 43 ++ project/Dependencies.scala | 40 +- 46 files changed, 2731 insertions(+), 83 deletions(-) delete mode 100644 modules/analysis/src/test/scala/docspell/analysis/TestFiles.scala create mode 100644 modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala create mode 100644 modules/convert/src/main/scala/docspell/convert/flexmark/MarkdownConfig.scala create mode 100644 modules/extract/NOTICE create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/AbstractMetadataHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeDependantMetadataHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeMetadataHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/ElementMetadataHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/MetadataHandler.java create mode 100644 modules/extract/src/main/java/org/apache/tika/parser/xml/XMLParser.java create mode 100644 modules/extract/src/main/scala/docspell/extract/ExtractResult.scala create mode 100644 modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala create mode 100644 modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala create mode 100644 modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala create mode 100644 modules/extract/src/main/scala/docspell/extract/poi/PoiTypes.scala create mode 100644 modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala create mode 100644 modules/extract/src/test/scala/docspell/extract/odf/OdfExtractTest.scala create mode 100644 modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala create mode 100644 modules/extract/src/test/scala/docspell/extract/poi/PoiExtractTest.scala create mode 100644 modules/extract/src/test/scala/docspell/extract/rtf/RtfExtractTest.scala create mode 100644 modules/files/src/main/scala/docspell/files/Dimension.scala create mode 100644 modules/files/src/main/scala/docspell/files/ImageSize.scala create mode 100644 modules/files/src/test/resources/bombs/20K-gray.jpeg create mode 100644 modules/files/src/test/resources/bombs/20K-gray.png create mode 100644 modules/files/src/test/resources/bombs/20K-rgb.jpeg create mode 100644 modules/files/src/test/resources/bombs/20K-rgb.png rename modules/{extract/src/test/resources/logback.xml => files/src/test/resources/logback-test.xml} (71%) create mode 100644 modules/files/src/test/scala/docspell/files/ImageSizeTest.scala rename modules/{extract/src/test/scala/docspell/extract => files/src/test/scala/docspell/files}/TestFiles.scala (72%) create mode 100644 modules/microsite/docs/dev/adr/0011_extract_text.md create mode 100644 modules/microsite/docs/dev/adr/img/process-files.png create mode 100644 modules/microsite/docs/dev/adr/process-files.puml diff --git a/build.sbt b/build.sbt index afe1e09c..9ddd50a1 100644 --- a/build.sbt +++ b/build.sbt @@ -205,7 +205,9 @@ val extract = project.in(file("modules/extract")). libraryDependencies ++= Dependencies.fs2 ++ Dependencies.pdfbox ++ - Dependencies.poi + Dependencies.poi ++ + Dependencies.commonsIO ++ + Dependencies.julOverSlf4j ).dependsOn(common, files % "compile->compile;test->test") val convert = project.in(file("modules/convert")). diff --git a/modules/analysis/src/test/scala/docspell/analysis/TestFiles.scala b/modules/analysis/src/test/scala/docspell/analysis/TestFiles.scala deleted file mode 100644 index c01d6ad1..00000000 --- a/modules/analysis/src/test/scala/docspell/analysis/TestFiles.scala +++ /dev/null @@ -1,21 +0,0 @@ -package docspell.analysis - -import cats.effect.{Blocker, IO} -import docspell.files._ - -import scala.concurrent.ExecutionContext - -object TestFiles { - val blocker = Blocker.liftExecutionContext(ExecutionContext.global) - implicit val CS = IO.contextShift(ExecutionContext.global) - - lazy val letterDEText = - ExampleFiles.letter_de_txt - .readText[IO](16 * 1024, blocker) - .unsafeRunSync - - lazy val letterENText = - ExampleFiles.letter_en_txt - .readText[IO](16 * 1024, blocker) - .unsafeRunSync -} diff --git a/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala b/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala index 30f0b5bc..5ffc853f 100644 --- a/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala +++ b/modules/analysis/src/test/scala/docspell/analysis/date/DateFindSpec.scala @@ -1,6 +1,6 @@ package docspell.analysis.date -import docspell.analysis.TestFiles +import docspell.files.TestFiles import minitest.SimpleTestSuite import docspell.common.Language diff --git a/modules/analysis/src/test/scala/docspell/analysis/nlp/TextAnalyserSuite.scala b/modules/analysis/src/test/scala/docspell/analysis/nlp/TextAnalyserSuite.scala index 7c0f150d..cb932cf4 100644 --- a/modules/analysis/src/test/scala/docspell/analysis/nlp/TextAnalyserSuite.scala +++ b/modules/analysis/src/test/scala/docspell/analysis/nlp/TextAnalyserSuite.scala @@ -1,7 +1,7 @@ package docspell.analysis.nlp import minitest.SimpleTestSuite -import docspell.analysis.TestFiles +import docspell.files.TestFiles import docspell.common._ object TextAnalyserSuite extends SimpleTestSuite { @@ -12,25 +12,23 @@ object TextAnalyserSuite extends SimpleTestSuite { NerLabel("Derek", NerTag.Person, 0, 5), NerLabel("Jeter", NerTag.Person, 6, 11), NerLabel("Treesville", NerTag.Person, 27, 37), - NerLabel("Derek", NerTag.Person, 69, 74), - NerLabel("Jeter", NerTag.Person, 75, 80), - NerLabel("Treesville", NerTag.Location, 96, 106), - NerLabel("M.", NerTag.Person, 142, 144), - NerLabel("Leat", NerTag.Person, 145, 149), - NerLabel("Syrup", NerTag.Organization, 160, 165), - NerLabel("Production", NerTag.Organization, 166, 176), - NerLabel("Old", NerTag.Organization, 177, 180), - NerLabel("Sticky", NerTag.Organization, 181, 187), - NerLabel("Pancake", NerTag.Organization, 188, 195), - NerLabel("Company", NerTag.Organization, 196, 203), - NerLabel("Maple", NerTag.Location, 208, 213), - NerLabel("Lane", NerTag.Location, 214, 218), - NerLabel("Forest", NerTag.Location, 220, 226), - NerLabel("Hemptown", NerTag.Location, 241, 249), - NerLabel("Little", NerTag.Organization, 349, 355), - NerLabel("League", NerTag.Organization, 356, 362), - NerLabel("Derek", NerTag.Person, 1119, 1124), - NerLabel("Jeter", NerTag.Person, 1125, 1130) + NerLabel("Derek", NerTag.Person, 68, 73), + NerLabel("Jeter", NerTag.Person, 74, 79), + NerLabel("Treesville", NerTag.Location, 95, 105), + NerLabel("Syrup", NerTag.Organization, 159, 164), + NerLabel("Production", NerTag.Organization, 165, 175), + NerLabel("Old", NerTag.Organization, 176, 179), + NerLabel("Sticky", NerTag.Organization, 180, 186), + NerLabel("Pancake", NerTag.Organization, 187, 194), + NerLabel("Company", NerTag.Organization, 195, 202), + NerLabel("Maple", NerTag.Location, 207, 212), + NerLabel("Lane", NerTag.Location, 213, 217), + NerLabel("Forest", NerTag.Location, 219, 225), + NerLabel("Hemptown", NerTag.Location, 239, 247), + NerLabel("Little", NerTag.Organization, 347, 353), + NerLabel("League", NerTag.Organization, 354, 360), + NerLabel("Derek", NerTag.Person, 1117, 1122), + NerLabel("Jeter", NerTag.Person, 1123, 1128) ) assertEquals(labels, expect) } diff --git a/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala b/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala new file mode 100644 index 00000000..94b32811 --- /dev/null +++ b/modules/convert/src/main/scala/docspell/convert/flexmark/Markdown.scala @@ -0,0 +1,69 @@ +package docspell.convert.flexmark + +import java.io.{InputStream, InputStreamReader} +import java.nio.charset.StandardCharsets +import java.util + +import cats.effect.Sync +import cats.implicits._ +import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension +import com.vladsch.flexmark.ext.tables.TablesExtension +import com.vladsch.flexmark.html.HtmlRenderer +import com.vladsch.flexmark.parser.Parser +import com.vladsch.flexmark.util.data.{DataKey, MutableDataSet} +import fs2.Stream + +import scala.util.Try + +object Markdown { + + def toHtml(is: InputStream, cfg: MarkdownConfig): Either[Throwable, String] = { + val p = createParser() + val r = createRenderer() + Try { + val reader = new InputStreamReader(is, StandardCharsets.UTF_8) + val doc = p.parseReader(reader) + wrapHtml(r.render(doc), cfg) + }.toEither + } + + + def toHtml(md: String, cfg: MarkdownConfig): String = { + val p = createParser() + val r = createRenderer() + val doc = p.parse(md) + wrapHtml(r.render(doc), cfg) + } + + def toHtml[F[_]: Sync](data: Stream[F, Byte], cfg: MarkdownConfig): F[String] = + data.through(fs2.text.utf8Decode).compile.foldMonoid. + map(str => toHtml(str, cfg)) + + private def wrapHtml(body: String, cfg: MarkdownConfig): String = { + s""" + | + | + | + | + |$body + | + | + |""".stripMargin + } + + private def createParser(): Parser = { + val opts = new MutableDataSet() + opts.set(Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]], + util.Arrays.asList(TablesExtension.create(), + StrikethroughExtension.create())); + + Parser.builder(opts).build() + } + + private def createRenderer(): HtmlRenderer = { + val opts = new MutableDataSet() + HtmlRenderer.builder(opts).build() + } +} diff --git a/modules/convert/src/main/scala/docspell/convert/flexmark/MarkdownConfig.scala b/modules/convert/src/main/scala/docspell/convert/flexmark/MarkdownConfig.scala new file mode 100644 index 00000000..3d0a5ab3 --- /dev/null +++ b/modules/convert/src/main/scala/docspell/convert/flexmark/MarkdownConfig.scala @@ -0,0 +1,3 @@ +package docspell.convert.flexmark + +case class MarkdownConfig(internalCss: String) diff --git a/modules/extract/NOTICE b/modules/extract/NOTICE new file mode 100644 index 00000000..05ccbbcc --- /dev/null +++ b/modules/extract/NOTICE @@ -0,0 +1,11 @@ +The Java source files in docspell-extract are unmodified copies of +those found in the Apache Tika parser project. It follows the +NOTICE.txt file from Apache Tika parsers: + +Apache Tika parsers +Copyright 2007-2019 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + + diff --git a/modules/extract/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java new file mode 100644 index 00000000..80b2301c --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.odf; + +import org.apache.tika.sax.ContentHandlerDecorator; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Locale; + +/** + * Content handler decorator that: + */ +public class NSNormalizerContentHandler extends ContentHandlerDecorator { + + private static final String OLD_NS = + "http://openoffice.org/2000/"; + + private static final String NEW_NS = + "urn:oasis:names:tc:opendocument:xmlns:"; + + private static final String DTD_PUBLIC_ID = + "-//OpenOffice.org//DTD OfficeDocument 1.0//EN"; + + public NSNormalizerContentHandler(ContentHandler handler) { + super(handler); + } + + private String mapOldNS(String ns) { + if (ns != null && ns.startsWith(OLD_NS)) { + return NEW_NS + ns.substring(OLD_NS.length()) + ":1.0"; + } else { + return ns; + } + } + + @Override + public void startElement( + String namespaceURI, String localName, String qName, + Attributes atts) throws SAXException { + AttributesImpl natts = new AttributesImpl(); + for (int i = 0; i < atts.getLength(); i++) { + natts.addAttribute( + mapOldNS(atts.getURI(i)), atts.getLocalName(i), + atts.getQName(i), atts.getType(i), atts.getValue(i)); + } + super.startElement(mapOldNS(namespaceURI), localName, qName, atts); + } + + @Override + public void endElement(String namespaceURI, String localName, String qName) + throws SAXException { + super.endElement(mapOldNS(namespaceURI), localName, qName); + } + + @Override + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + super.startPrefixMapping(prefix, mapOldNS(uri)); + } + + /** + * do not load any DTDs (may be requested by parser). Fake the DTD by + * returning a empty string as InputSource + */ + @Override + public InputSource resolveEntity(String publicId, String systemId) + throws IOException, SAXException { + if ((systemId != null && systemId.toLowerCase(Locale.ROOT).endsWith(".dtd")) + || DTD_PUBLIC_ID.equals(publicId)) { + return new InputSource(new StringReader("")); + } else { + return super.resolveEntity(publicId, systemId); + } + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java new file mode 100644 index 00000000..066f3e95 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java @@ -0,0 +1,606 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.odf; + +import org.apache.commons.io.input.CloseShieldInputStream; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.ElementMappingContentHandler; +import org.apache.tika.sax.ElementMappingContentHandler.TargetElement; +import org.apache.tika.sax.OfflineContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.utils.XMLReaderUtils; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.namespace.QName; +import java.io.IOException; +import java.io.InputStream; +import java.util.BitSet; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import static org.apache.tika.sax.XHTMLContentHandler.XHTML; + +/** + * Parser for ODF content.xml files. + */ +public class OpenDocumentContentParser extends AbstractParser { + private interface Style { + } + + private static class TextStyle implements Style { + public boolean italic; + public boolean bold; + public boolean underlined; + + @Override + public String toString() { + return "TextStyle{" + + "italic=" + italic + + ", bold=" + bold + + ", underlined=" + underlined + + '}'; + } + } + + private static class ListStyle implements Style { + public boolean ordered; + + public String getTag() { + return ordered ? "ol" : "ul"; + } + } + + private static final class OpenDocumentElementMappingContentHandler extends + ElementMappingContentHandler { + private static final char[] SPACE = new char[]{ ' '}; + private static final String CLASS = "class"; + private static final Attributes ANNOTATION_ATTRIBUTES = buildAttributes(CLASS, "annotation"); + private static final Attributes NOTE_ATTRIBUTES = buildAttributes(CLASS, "note"); + private static final Attributes NOTES_ATTRIBUTES = buildAttributes(CLASS, "notes"); + + private static Attributes buildAttributes(String key, String value) { + AttributesImpl attrs = new AttributesImpl(); + attrs.addAttribute("", key, key, "CDATA", value); + return attrs; + } + + private final ContentHandler handler; + private final BitSet textNodeStack = new BitSet(); + private int nodeDepth = 0; + private int completelyFiltered = 0; + private Stack headingStack = new Stack(); + private Map paragraphTextStyleMap = new HashMap(); + private Map textStyleMap = new HashMap(); + private Map listStyleMap = new HashMap(); + private String currParagraphStyleName; //paragraph style name + private TextStyle currTextStyle; //this is the text style for particular spans/paragraphs + private String currTextStyleName; + + private Stack listStyleStack = new Stack(); + private ListStyle listStyle; + + // True if we are currently in the named style: + private boolean curUnderlined; + private boolean curBold; + private boolean curItalic; + + //have we written the start style tags + //yet for the current text style + boolean hasWrittenStartStyleTags = false; + + private int pDepth = 0; //

can appear inside comments and other things that are already inside

+ //we need to track our pDepth and only output

if we're at the main level + + + private OpenDocumentElementMappingContentHandler(ContentHandler handler, + Map mappings) { + super(handler, mappings); + this.handler = handler; + } + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + // only forward content of tags from text:-namespace + if (completelyFiltered == 0 && nodeDepth > 0 + && textNodeStack.get(nodeDepth - 1)) { + if (!hasWrittenStartStyleTags) { + updateStyleTags(); + hasWrittenStartStyleTags = true; + } + super.characters(ch, start, length); + } + } + + // helper for checking tags which need complete filtering + // (with sub-tags) + private boolean needsCompleteFiltering( + String namespaceURI, String localName) { + if (TEXT_NS.equals(namespaceURI)) { + return localName.endsWith("-template") + || localName.endsWith("-style"); + } + return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName); + } + + // map the heading level to HTML tags + private String getXHTMLHeaderTagName(Attributes atts) { + String depthStr = atts.getValue(TEXT_NS, "outline-level"); + if (depthStr == null) { + return "h1"; + } + + int depth = Integer.parseInt(depthStr); + if (depth >= 6) { + return "h6"; + } else if (depth <= 1) { + return "h1"; + } else { + return "h" + depth; + } + } + + /** + * Check if a node is a text node + */ + private boolean isTextNode(String namespaceURI, String localName) { + if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) { + return true; + } + if (SVG_NS.equals(namespaceURI)) { + return "title".equals(localName) || + "desc".equals(localName); + } + return false; + } + + private void startList(String name) throws SAXException { + String elementName = "ul"; + if (name != null) { + ListStyle style = listStyleMap.get(name); + elementName = style != null ? style.getTag() : "ul"; + listStyleStack.push(style); + } + handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES); + } + + private void endList() throws SAXException { + String elementName = "ul"; + if (!listStyleStack.isEmpty()) { + ListStyle style = listStyleStack.pop(); + elementName = style != null ? style.getTag() : "ul"; + } + handler.endElement(XHTML, elementName, elementName); + } + + private void startSpan(String name) throws SAXException { + if (name == null) { + return; + } + currTextStyle = textStyleMap.get(name); + hasWrittenStartStyleTags = false; + } + + private void startParagraph(String styleName) throws SAXException { + if (pDepth == 0) { + handler.startElement(XHTML, "p", "p", EMPTY_ATTRIBUTES); + if (styleName != null) { + currTextStyle = paragraphTextStyleMap.get(styleName); + } + hasWrittenStartStyleTags = false; + } else { + handler.characters(SPACE, 0, SPACE.length); + } + pDepth++; + } + + private void endParagraph() throws SAXException { + closeStyleTags(); + if (pDepth == 1) { + handler.endElement(XHTML, "p", "p"); + } else { + handler.characters(SPACE, 0, SPACE.length); + } + pDepth--; + + } + + private void updateStyleTags() throws SAXException { + + if (currTextStyle == null) { + closeStyleTags(); + return; + } + if (currTextStyle.bold != curBold) { + // Enforce nesting -- must close s and i tags + if (curUnderlined) { + handler.endElement(XHTML, "u", "u"); + curUnderlined = false; + } + if (curItalic) { + handler.endElement(XHTML, "i", "i"); + curItalic = false; + } + if (currTextStyle.bold) { + handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES); + } else { + handler.endElement(XHTML, "b", "b"); + } + curBold = currTextStyle.bold; + } + + if (currTextStyle.italic != curItalic) { + // Enforce nesting -- must close s tag + if (curUnderlined) { + handler.endElement(XHTML, "u", "u"); + curUnderlined = false; + } + if (currTextStyle.italic) { + handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES); + } else { + handler.endElement(XHTML, "i", "i"); + } + curItalic = currTextStyle.italic; + } + + if (currTextStyle.underlined != curUnderlined) { + if (currTextStyle.underlined) { + handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES); + } else { + handler.endElement(XHTML, "u", "u"); + } + curUnderlined = currTextStyle.underlined; + } + } + + private void endSpan() throws SAXException { + updateStyleTags(); + } + + private void closeStyleTags() throws SAXException { + // Close any still open style tags + if (curUnderlined) { + handler.endElement(XHTML,"u", "u"); + curUnderlined = false; + } + if (curItalic) { + handler.endElement(XHTML,"i", "i"); + curItalic = false; + } + if (curBold) { + handler.endElement(XHTML,"b", "b"); + curBold = false; + } + currTextStyle = null; + hasWrittenStartStyleTags = false; + } + + @Override + public void startElement( + String namespaceURI, String localName, String qName, + Attributes attrs) throws SAXException { + // keep track of current node type. If it is a text node, + // a bit at the current depth its set in textNodeStack. + // characters() checks the top bit to determine, if the + // actual node is a text node to print out nodeDepth contains + // the depth of the current node and also marks top of stack. + assert nodeDepth >= 0; + + // Set styles + if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) { + String family = attrs.getValue(STYLE_NS, "family"); + if ("text".equals(family)) { + currTextStyle = new TextStyle(); + currTextStyleName = attrs.getValue(STYLE_NS, "name"); + } else if ("paragraph".equals(family)) { + currTextStyle = new TextStyle(); + currParagraphStyleName = attrs.getValue(STYLE_NS, "name"); + } + } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) { + listStyle = new ListStyle(); + String name = attrs.getValue(STYLE_NS, "name"); + listStyleMap.put(name, listStyle); + } else if (currTextStyle != null && STYLE_NS.equals(namespaceURI) + && "text-properties".equals(localName)) { + String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style"); + if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) { + currTextStyle.italic = true; + } + String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight"); + if ("bold".equals(fontWeight) || "bolder".equals(fontWeight) + || (fontWeight != null && Character.isDigit(fontWeight.charAt(0)) + && Integer.valueOf(fontWeight) > 500)) { + currTextStyle.bold = true; + } + String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style"); + if (underlineStyle != null && !underlineStyle.equals("none")) { + currTextStyle.underlined = true; + } + } else if (listStyle != null && TEXT_NS.equals(namespaceURI)) { + if ("list-level-style-bullet".equals(localName)) { + listStyle.ordered = false; + } else if ("list-level-style-number".equals(localName)) { + listStyle.ordered = true; + } + } + + textNodeStack.set(nodeDepth++, + isTextNode(namespaceURI, localName)); + // filter *all* content of some tags + assert completelyFiltered >= 0; + + if (needsCompleteFiltering(namespaceURI, localName)) { + completelyFiltered++; + } + // call next handler if no filtering + if (completelyFiltered == 0) { + // special handling of text:h, that are directly passed + // to incoming handler + if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) { + final String el = headingStack.push(getXHTMLHeaderTagName(attrs)); + handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES); + } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) { + startList(attrs.getValue(TEXT_NS, "style-name")); + } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) { + startSpan(attrs.getValue(TEXT_NS, "style-name")); + } else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) { + startParagraph(attrs.getValue(TEXT_NS, "style-name")); + } else if (TEXT_NS.equals(namespaceURI) && "s".equals(localName)) { + handler.characters(SPACE, 0, 1); + } else if ("annotation".equals(localName)) { + closeStyleTags(); + handler.startElement(XHTML, "span", "p", ANNOTATION_ATTRIBUTES); + } else if ("note".equals(localName)) { + closeStyleTags(); + handler.startElement(XHTML, "span", "p", NOTE_ATTRIBUTES); + } else if ("notes".equals(localName)) { + closeStyleTags(); + handler.startElement(XHTML, "span", "p", NOTES_ATTRIBUTES); + } else { + super.startElement(namespaceURI, localName, qName, attrs); + } + } + } + + @Override + public void endElement( + String namespaceURI, String localName, String qName) + throws SAXException { + if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) { + if (currTextStyle != null && currTextStyleName != null) { + textStyleMap.put(currTextStyleName, currTextStyle); + currTextStyleName = null; + currTextStyle = null; + } else if (currTextStyle != null && currParagraphStyleName != null) { + paragraphTextStyleMap.put(currParagraphStyleName, currTextStyle); + currParagraphStyleName = null; + currTextStyle = null; + } + } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) { + listStyle = null; + } + + // call next handler if no filtering + if (completelyFiltered == 0) { + // special handling of text:h, that are directly passed + // to incoming handler + if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) { + final String el = headingStack.pop(); + handler.endElement(XHTMLContentHandler.XHTML, el, el); + } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) { + endList(); + } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) { + currTextStyle = null; + hasWrittenStartStyleTags = false; + } else if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) { + endParagraph(); + } else if ("annotation".equals(localName) || "note".equals(localName) || + "notes".equals(localName)) { + closeStyleTags(); + handler.endElement("", localName, localName); + } else { + super.endElement(namespaceURI, localName, qName); + } + + // special handling of tabulators + if (TEXT_NS.equals(namespaceURI) + && ("tab-stop".equals(localName) + || "tab".equals(localName))) { + this.characters(TAB, 0, TAB.length); + } + } + + // revert filter for *all* content of some tags + if (needsCompleteFiltering(namespaceURI, localName)) { + completelyFiltered--; + } + assert completelyFiltered >= 0; + + // reduce current node depth + nodeDepth--; + assert nodeDepth >= 0; + } + + @Override + public void startPrefixMapping(String prefix, String uri) { + // remove prefix mappings as they should not occur in XHTML + } + + @Override + public void endPrefixMapping(String prefix) { + // remove prefix mappings as they should not occur in XHTML + } + } + + public static final String TEXT_NS = + "urn:oasis:names:tc:opendocument:xmlns:text:1.0"; + + public static final String TABLE_NS = + "urn:oasis:names:tc:opendocument:xmlns:table:1.0"; + + public static final String STYLE_NS = + "urn:oasis:names:tc:opendocument:xmlns:style:1.0"; + + public static final String FORMATTING_OBJECTS_NS = + "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"; + + public static final String OFFICE_NS = + "urn:oasis:names:tc:opendocument:xmlns:office:1.0"; + + public static final String SVG_NS = + "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"; + + public static final String PRESENTATION_NS = + "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"; + + public static final String DRAW_NS = + "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"; + + public static final String XLINK_NS = "http://www.w3.org/1999/xlink"; + + protected static final char[] TAB = new char[]{'\t'}; + + private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl(); + + /** + * Mappings between ODF tag names and XHTML tag names + * (including attributes). All other tag names/attributes are ignored + * and left out from event stream. + */ + private static final HashMap MAPPINGS = + new HashMap(); + + static { + // general mappings of text:-tags + MAPPINGS.put( + new QName(TEXT_NS, "p"), + new TargetElement(XHTML, "p")); + // text:h-tags are mapped specifically in startElement/endElement + MAPPINGS.put( + new QName(TEXT_NS, "line-break"), + new TargetElement(XHTML, "br")); + MAPPINGS.put( + new QName(TEXT_NS, "list-item"), + new TargetElement(XHTML, "li")); + MAPPINGS.put( + new QName(TEXT_NS, "note"), + new TargetElement(XHTML, "span")); + MAPPINGS.put( + new QName(OFFICE_NS, "annotation"), + new TargetElement(XHTML, "span")); + MAPPINGS.put( + new QName(PRESENTATION_NS, "notes"), + new TargetElement(XHTML, "span")); + MAPPINGS.put( + new QName(DRAW_NS, "object"), + new TargetElement(XHTML, "object")); + MAPPINGS.put( + new QName(DRAW_NS, "text-box"), + new TargetElement(XHTML, "div")); + MAPPINGS.put( + new QName(SVG_NS, "title"), + new TargetElement(XHTML, "span")); + MAPPINGS.put( + new QName(SVG_NS, "desc"), + new TargetElement(XHTML, "span")); + MAPPINGS.put( + new QName(TEXT_NS, "span"), + new TargetElement(XHTML, "span")); + + final HashMap aAttsMapping = + new HashMap(); + aAttsMapping.put( + new QName(XLINK_NS, "href"), + new QName("href")); + aAttsMapping.put( + new QName(XLINK_NS, "title"), + new QName("title")); + MAPPINGS.put( + new QName(TEXT_NS, "a"), + new TargetElement(XHTML, "a", aAttsMapping)); + + // create HTML tables from table:-tags + MAPPINGS.put( + new QName(TABLE_NS, "table"), + new TargetElement(XHTML, "table")); + // repeating of rows is ignored; for columns, see below! + MAPPINGS.put( + new QName(TABLE_NS, "table-row"), + new TargetElement(XHTML, "tr")); + // special mapping for rowspan/colspan attributes + final HashMap tableCellAttsMapping = + new HashMap(); + tableCellAttsMapping.put( + new QName(TABLE_NS, "number-columns-spanned"), + new QName("colspan")); + tableCellAttsMapping.put( + new QName(TABLE_NS, "number-rows-spanned"), + new QName("rowspan")); + /* TODO: The following is not correct, the cell should be repeated not spanned! + * Code generates a HTML cell, spanning all repeated columns, to make the cell look correct. + * Problems may occur when both spanning and repeating is given, which is not allowed by spec. + * Cell spanning instead of repeating is not a problem, because OpenOffice uses it + * only for empty cells. + */ + tableCellAttsMapping.put( + new QName(TABLE_NS, "number-columns-repeated"), + new QName("colspan")); + MAPPINGS.put( + new QName(TABLE_NS, "table-cell"), + new TargetElement(XHTML, "td", tableCellAttsMapping)); + } + + public Set getSupportedTypes(ParseContext context) { + return Collections.emptySet(); // not a top-level parser + } + + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + parseInternal(stream, + new XHTMLContentHandler(handler, metadata), + metadata, context); + } + + void parseInternal( + InputStream stream, final ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + + DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS); + + + XMLReaderUtils.parseSAX( + new CloseShieldInputStream(stream), + new OfflineContentHandler( + new NSNormalizerContentHandler(dh)), + context); + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java new file mode 100644 index 00000000..11922d7d --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.odf; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.DublinCore; +import org.apache.tika.metadata.MSOffice; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Office; +import org.apache.tika.metadata.OfficeOpenXMLCore; +import org.apache.tika.metadata.PagedText; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.xml.AttributeDependantMetadataHandler; +import org.apache.tika.parser.xml.AttributeMetadataHandler; +import org.apache.tika.parser.xml.ElementMetadataHandler; +import org.apache.tika.parser.xml.MetadataHandler; +import org.apache.tika.parser.xml.XMLParser; +import org.apache.tika.sax.TeeContentHandler; +import org.apache.tika.sax.xpath.CompositeMatcher; +import org.apache.tika.sax.xpath.Matcher; +import org.apache.tika.sax.xpath.MatchingContentHandler; +import org.apache.tika.sax.xpath.XPathParser; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Parser for OpenDocument meta.xml files. + */ +public class OpenDocumentMetaParser extends XMLParser { + /** + * Serial version UID + */ + private static final long serialVersionUID = -8739250869531737584L; + + private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"; + private static final XPathParser META_XPATH = new XPathParser("meta", META_NS); + + /** + * @see OfficeOpenXMLCore#SUBJECT + * @deprecated use OfficeOpenXMLCore#SUBJECT + */ + @Deprecated + private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR = + Property.composite(Office.INITIAL_AUTHOR, + new Property[]{Property.externalText("initial-creator")}); + + private static ContentHandler getDublinCoreHandler( + Metadata metadata, Property property, String element) { + return new ElementMetadataHandler( + DublinCore.NAMESPACE_URI_DC, element, + metadata, property); + } + + private static ContentHandler getMeta( + ContentHandler ch, Metadata md, Property property, String element) { + Matcher matcher = new CompositeMatcher( + META_XPATH.parse("//meta:" + element), + META_XPATH.parse("//meta:" + element + "//text()")); + ContentHandler branch = + new MatchingContentHandler(new MetadataHandler(md, property), matcher); + return new TeeContentHandler(ch, branch); + } + + private static ContentHandler getUserDefined( + ContentHandler ch, Metadata md) { + Matcher matcher = new CompositeMatcher( + META_XPATH.parse("//meta:user-defined/@meta:name"), + META_XPATH.parse("//meta:user-defined//text()")); + // eg Text1 becomes custom:Info1=Text1 + ContentHandler branch = new MatchingContentHandler( + new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX), + matcher); + return new TeeContentHandler(ch, branch); + } + + @Deprecated + private static ContentHandler getStatistic( + ContentHandler ch, Metadata md, String name, String attribute) { + Matcher matcher = + META_XPATH.parse("//meta:document-statistic/@meta:" + attribute); + ContentHandler branch = new MatchingContentHandler( + new AttributeMetadataHandler(META_NS, attribute, md, name), matcher); + return new TeeContentHandler(ch, branch); + } + + private static ContentHandler getStatistic( + ContentHandler ch, Metadata md, Property property, String attribute) { + Matcher matcher = + META_XPATH.parse("//meta:document-statistic/@meta:" + attribute); + ContentHandler branch = new MatchingContentHandler( + new AttributeMetadataHandler(META_NS, attribute, md, property), matcher); + return new TeeContentHandler(ch, branch); + } + + protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) { + // We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date + // Process the Dublin Core Attributes + ch = new TeeContentHandler(super.getContentHandler(ch, md, context), + getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"), + getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"), + getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"), + getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"), + getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"), + getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"), + getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"), + getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"), + getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"), + getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights")); + + // Process the OO Meta Attributes + ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date"); + // ODF uses dc:date for modified + ch = new TeeContentHandler(ch, new ElementMetadataHandler( + DublinCore.NAMESPACE_URI_DC, "date", + md, TikaCoreProperties.MODIFIED)); + + // ODF uses dc:subject for description + ch = new TeeContentHandler(ch, new ElementMetadataHandler( + DublinCore.NAMESPACE_URI_DC, "subject", + md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT)); + ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword"); + + ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration"); + ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles"); + ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator"); + ch = getMeta(ch, md, Property.externalText("generator"), "generator"); + + // Process the user defined Meta Attributes + ch = getUserDefined(ch, md); + + // Process the OO Statistics Attributes + ch = getStatistic(ch, md, Office.OBJECT_COUNT, "object-count"); + ch = getStatistic(ch, md, Office.IMAGE_COUNT, "image-count"); + ch = getStatistic(ch, md, Office.PAGE_COUNT, "page-count"); + ch = getStatistic(ch, md, PagedText.N_PAGES, "page-count"); + ch = getStatistic(ch, md, Office.TABLE_COUNT, "table-count"); + ch = getStatistic(ch, md, Office.PARAGRAPH_COUNT, "paragraph-count"); + ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count"); + ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count"); + + // Legacy, Tika-1.0 style attributes + // TODO Remove these in Tika 2.0 + ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count"); + ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count"); + ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count"); + ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count"); + ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count"); + ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count"); + ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count"); + + // Legacy Statistics Attributes, replaced with real keys above + // TODO Remove these shortly, eg after Tika 1.1 (TIKA-770) + ch = getStatistic(ch, md, "nbPage", "page-count"); + ch = getStatistic(ch, md, "nbPara", "paragraph-count"); + ch = getStatistic(ch, md, "nbWord", "word-count"); + ch = getStatistic(ch, md, "nbCharacter", "character-count"); + ch = getStatistic(ch, md, "nbTab", "table-count"); + ch = getStatistic(ch, md, "nbObject", "object-count"); + ch = getStatistic(ch, md, "nbImg", "image-count"); + + // Normalise the rest + ch = new NSNormalizerContentHandler(ch); + return ch; + } + + @Override + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + super.parse(stream, handler, metadata, context); + // Copy subject to description for OO2 + String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT); + if (odfSubject != null && !odfSubject.equals("") && + (metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) { + metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject); + } + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java new file mode 100644 index 00000000..6ba5281f --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.odf; + +import org.apache.commons.io.IOUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.extractor.EmbeddedDocumentExtractor; +import org.apache.tika.extractor.EmbeddedDocumentUtil; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.metadata.TikaMetadataKeys; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.EmbeddedContentHandler; +import org.apache.tika.sax.EndDocumentShieldingContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Set; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipInputStream; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * OpenOffice parser + */ +public class OpenDocumentParser extends AbstractParser { + + /** + * Serial version UID + */ + private static final long serialVersionUID = -6410276875438618287L; + + private static final Set SUPPORTED_TYPES = + Collections.unmodifiableSet(new HashSet(Arrays.asList( + MediaType.application("vnd.sun.xml.writer"), + MediaType.application("vnd.oasis.opendocument.text"), + MediaType.application("vnd.oasis.opendocument.graphics"), + MediaType.application("vnd.oasis.opendocument.presentation"), + MediaType.application("vnd.oasis.opendocument.spreadsheet"), + MediaType.application("vnd.oasis.opendocument.chart"), + MediaType.application("vnd.oasis.opendocument.image"), + MediaType.application("vnd.oasis.opendocument.formula"), + MediaType.application("vnd.oasis.opendocument.text-master"), + MediaType.application("vnd.oasis.opendocument.text-web"), + MediaType.application("vnd.oasis.opendocument.text-template"), + MediaType.application("vnd.oasis.opendocument.graphics-template"), + MediaType.application("vnd.oasis.opendocument.presentation-template"), + MediaType.application("vnd.oasis.opendocument.spreadsheet-template"), + MediaType.application("vnd.oasis.opendocument.chart-template"), + MediaType.application("vnd.oasis.opendocument.image-template"), + MediaType.application("vnd.oasis.opendocument.formula-template"), + MediaType.application("x-vnd.oasis.opendocument.text"), + MediaType.application("x-vnd.oasis.opendocument.graphics"), + MediaType.application("x-vnd.oasis.opendocument.presentation"), + MediaType.application("x-vnd.oasis.opendocument.spreadsheet"), + MediaType.application("x-vnd.oasis.opendocument.chart"), + MediaType.application("x-vnd.oasis.opendocument.image"), + MediaType.application("x-vnd.oasis.opendocument.formula"), + MediaType.application("x-vnd.oasis.opendocument.text-master"), + MediaType.application("x-vnd.oasis.opendocument.text-web"), + MediaType.application("x-vnd.oasis.opendocument.text-template"), + MediaType.application("x-vnd.oasis.opendocument.graphics-template"), + MediaType.application("x-vnd.oasis.opendocument.presentation-template"), + MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"), + MediaType.application("x-vnd.oasis.opendocument.chart-template"), + MediaType.application("x-vnd.oasis.opendocument.image-template"), + MediaType.application("x-vnd.oasis.opendocument.formula-template")))); + + private static final String META_NAME = "meta.xml"; + + private Parser meta = new OpenDocumentMetaParser(); + + private Parser content = new OpenDocumentContentParser(); + + public Parser getMetaParser() { + return meta; + } + + public void setMetaParser(Parser meta) { + this.meta = meta; + } + + public Parser getContentParser() { + return content; + } + + public void setContentParser(Parser content) { + this.content = content; + } + + public Set getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler baseHandler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + + // Open the Zip stream + // Use a File if we can, and an already open zip is even better + ZipFile zipFile = null; + ZipInputStream zipStream = null; + if (stream instanceof TikaInputStream) { + TikaInputStream tis = (TikaInputStream) stream; + Object container = ((TikaInputStream) stream).getOpenContainer(); + if (container instanceof ZipFile) { + zipFile = (ZipFile) container; + } else if (tis.hasFile()) { + zipFile = new ZipFile(tis.getFile()); + } else { + zipStream = new ZipInputStream(stream); + } + } else { + zipStream = new ZipInputStream(stream); + } + + // Prepare to handle the content + XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata); + + // As we don't know which of the metadata or the content + // we'll hit first, catch the endDocument call initially + EndDocumentShieldingContentHandler handler = + new EndDocumentShieldingContentHandler(xhtml); + + if (zipFile != null) { + try { + handleZipFile(zipFile, metadata, context, handler); + } finally { + //Do we want to close silently == catch an exception here? + zipFile.close(); + } + } else { + try { + handleZipStream(zipStream, metadata, context, handler); + } finally { + //Do we want to close silently == catch an exception here? + zipStream.close(); + } + } + + // Only now call the end document + if (handler.getEndDocumentWasCalled()) { + handler.reallyEndDocument(); + } + } + + private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException { + ZipEntry entry = zipStream.getNextEntry(); + if (entry == null) { + throw new IOException("No entries found in ZipInputStream"); + } + do { + handleZipEntry(entry, zipStream, metadata, context, handler); + entry = zipStream.getNextEntry(); + } while (entry != null); + } + + private void handleZipFile(ZipFile zipFile, Metadata metadata, + ParseContext context, EndDocumentShieldingContentHandler handler) + throws IOException, TikaException, SAXException { + // If we can, process the metadata first, then the + // rest of the file afterwards (TIKA-1353) + // Only possible to guarantee that when opened from a file not a stream + + ZipEntry entry = zipFile.getEntry(META_NAME); + if (entry != null) { + handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler); + } + + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + entry = entries.nextElement(); + if (!META_NAME.equals(entry.getName())) { + handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler); + } + } + } + private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata, + ParseContext context, EndDocumentShieldingContentHandler handler) + throws IOException, SAXException, TikaException { + if (entry == null) return; + + if (entry.getName().equals("mimetype")) { + String type = IOUtils.toString(zip, UTF_8); + metadata.set(Metadata.CONTENT_TYPE, type); + } else if (entry.getName().equals(META_NAME)) { + meta.parse(zip, new DefaultHandler(), metadata, context); + } else if (entry.getName().endsWith("content.xml")) { + if (content instanceof OpenDocumentContentParser) { + ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context); + } else { + // Foreign content parser was set: + content.parse(zip, handler, metadata, context); + } + } else if (entry.getName().endsWith("styles.xml")) { + if (content instanceof OpenDocumentContentParser) { + ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context); + } else { + // Foreign content parser was set: + content.parse(zip, handler, metadata, context); + } + } else { + String embeddedName = entry.getName(); + //scrape everything under Thumbnails/ and Pictures/ + if (embeddedName.contains("Thumbnails/") || + embeddedName.contains("Pictures/")) { + EmbeddedDocumentExtractor embeddedDocumentExtractor = + EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); + Metadata embeddedMetadata = new Metadata(); + embeddedMetadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, entry.getName()); + /* if (embeddedName.startsWith("Thumbnails/")) { + embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, + TikaCoreProperties.EmbeddedResourceType.THUMBNAIL); + }*/ + if (embeddedName.contains("Pictures/")) { + embeddedMetadata.set(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE, + TikaCoreProperties.EmbeddedResourceType.INLINE.toString()); + } + if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) { + embeddedDocumentExtractor.parseEmbedded(zip, + new EmbeddedContentHandler(handler), embeddedMetadata, false); + } + } + + } + } +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/AbstractMetadataHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/AbstractMetadataHandler.java new file mode 100644 index 00000000..cbff35e7 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/AbstractMetadataHandler.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.xml.sax.helpers.DefaultHandler; + +import java.util.Arrays; +import java.util.List; + +/** + * Base class for SAX handlers that map SAX events into document metadata. + * + * @since Apache Tika 0.10 + */ +class AbstractMetadataHandler extends DefaultHandler { + + private final Metadata metadata; + private final Property property; + private final String name; + + protected AbstractMetadataHandler(Metadata metadata, String name) { + this.metadata = metadata; + this.property = null; + this.name = name; + } + protected AbstractMetadataHandler(Metadata metadata, Property property) { + this.metadata = metadata; + this.property = property; + this.name = property.getName(); + } + + /** + * Adds the given metadata value. The value is ignored if it is + * null or empty. If the metadata entry already exists, + * then the given value is appended to it with a comma as the separator. + * + * @param value metadata value + */ + protected void addMetadata(String value) { + if (value != null && value.length() > 0) { + if (metadata.isMultiValued(name)) { + // Add the value, assuming it's not already there + List previous = Arrays.asList(metadata.getValues(name)); + if (!previous.contains(value)) { + if (property != null) { + metadata.add(property, value); + } else { + metadata.add(name, value); + } + } + } else { + // Set the value, assuming it's not already there + String previous = metadata.get(name); + if (previous != null && previous.length() > 0) { + if (!previous.equals(value)) { + if (property != null) { + if (property.isMultiValuePermitted()) { + metadata.add(property, value); + } else { + // Replace the existing value if isMultiValuePermitted is false + metadata.set(property, value); + } + } else { + metadata.add(name, value); + } + } + } else { + if (property != null) { + metadata.set(property, value); + } else { + metadata.set(name, value); + } + } + } + } + } +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeDependantMetadataHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeDependantMetadataHandler.java new file mode 100644 index 00000000..c1795fad --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeDependantMetadataHandler.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.Metadata; +import org.xml.sax.Attributes; +import org.xml.sax.helpers.DefaultHandler; + +/** + * This adds a Metadata entry for a given node. + * The textual content of the node is used as the + * value, and the Metadata name is taken from + * an attribute, with a prefix if required. + */ +public class AttributeDependantMetadataHandler extends DefaultHandler { + + private final Metadata metadata; + + private final String nameHoldingAttribute; + private final String namePrefix; + private String name; + + private final StringBuilder buffer = new StringBuilder(); + + public AttributeDependantMetadataHandler(Metadata metadata, String nameHoldingAttribute, String namePrefix) { + this.metadata = metadata; + this.nameHoldingAttribute = nameHoldingAttribute; + this.namePrefix = namePrefix; + } + + public void addMetadata(String value) { + if(name == null || name.length() == 0) { + // We didn't find the attribute which holds the name + return; + } + if (value.length() > 0) { + String previous = metadata.get(name); + if (previous != null && previous.length() > 0) { + value = previous + ", " + value; + } + metadata.set(name, value); + } + } + + public void endElement(String uri, String localName, String name) { + addMetadata(buffer.toString()); + buffer.setLength(0); + } + + public void startElement( + String uri, String localName, String name, Attributes attributes) { + String rawName = attributes.getValue(nameHoldingAttribute); + if (rawName != null) { + if (namePrefix == null) { + this.name = rawName; + } else { + this.name = namePrefix + rawName; + } + } + // All other attributes are ignored + } + + + public void characters(char[] ch, int start, int length) { + buffer.append(ch, start, length); + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeMetadataHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeMetadataHandler.java new file mode 100644 index 00000000..dba5e4cb --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/AttributeMetadataHandler.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; + +/** + * SAX event handler that maps the contents of an XML attribute into + * a metadata field. + * + * @since Apache Tika 0.10 + */ +public class AttributeMetadataHandler extends AbstractMetadataHandler { + + private final String uri; + + private final String localName; + + public AttributeMetadataHandler( + String uri, String localName, Metadata metadata, String name) { + super(metadata, name); + this.uri = uri; + this.localName = localName; + } + public AttributeMetadataHandler( + String uri, String localName, Metadata metadata, Property property) { + super(metadata, property); + this.uri = uri; + this.localName = localName; + } + + @Override + public void startElement( + String uri, String localName, String qName, Attributes attributes) + throws SAXException { + for (int i = 0; i < attributes.getLength(); i++) { + if (attributes.getURI(i).equals(this.uri) + && attributes.getLocalName(i).equals(this.localName)) { + addMetadata(attributes.getValue(i).trim()); + } + } + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java new file mode 100644 index 00000000..5999773e --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.DublinCore; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.TeeContentHandler; +import org.xml.sax.ContentHandler; + +/** + * Dublin Core metadata parser + */ +public class DcXMLParser extends XMLParser { + + /** Serial version UID */ + private static final long serialVersionUID = 4905318835463880819L; + + private static ContentHandler getDublinCoreHandler( + Metadata metadata, Property property, String element) { + return new ElementMetadataHandler( + DublinCore.NAMESPACE_URI_DC, element, + metadata, property); + } + + protected ContentHandler getContentHandler( + ContentHandler handler, Metadata metadata, ParseContext context) { + return new TeeContentHandler( + super.getContentHandler(handler, metadata, context), + getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"), + getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"), + getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"), + getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"), + getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"), + getDublinCoreHandler(metadata, TikaCoreProperties.CONTRIBUTOR, "contributor"), + getDublinCoreHandler(metadata, TikaCoreProperties.CREATED, "date"), + getDublinCoreHandler(metadata, TikaCoreProperties.TYPE, "type"), + getDublinCoreHandler(metadata, TikaCoreProperties.FORMAT, "format"), + getDublinCoreHandler(metadata, TikaCoreProperties.IDENTIFIER, "identifier"), + getDublinCoreHandler(metadata, TikaCoreProperties.LANGUAGE, "language"), + getDublinCoreHandler(metadata, TikaCoreProperties.RIGHTS, "rights")); + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/ElementMetadataHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/ElementMetadataHandler.java new file mode 100644 index 00000000..d7a81dc4 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/ElementMetadataHandler.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.xml.sax.Attributes; + +import java.util.Arrays; + +/** + * SAX event handler that maps the contents of an XML element into + * a metadata field. + * + * @since Apache Tika 0.10 + */ +public class ElementMetadataHandler extends AbstractMetadataHandler { + private static final Logger LOG = LoggerFactory.getLogger(ElementMetadataHandler.class); + + private static final String LOCAL_NAME_RDF_BAG = "Bag"; + private static final String LOCAL_NAME_RDF_LI = "li"; + private static final String URI_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + + private final String uri; + + private final String localName; + + private final Metadata metadata; + + private final String name; + private Property targetProperty; + + private final boolean allowDuplicateValues; + private final boolean allowEmptyValues; + + /** + * The buffer used to capture characters when inside a bag li element. + */ + private final StringBuilder bufferBagged = new StringBuilder(); + + /** + * The buffer used to capture characters inside standard elements. + */ + private final StringBuilder bufferBagless = new StringBuilder(); + + /** + * Whether or not the value was found in a standard element structure or inside a bag. + */ + private boolean isBagless = true; + + private int matchLevel = 0; + private int parentMatchLevel = 0; + + /** + * Constructor for string metadata keys. + * + * @param uri the uri of the namespace of the element + * @param localName the local name of the element + * @param metadata the Tika metadata object to populate + * @param name the Tika metadata field key + */ + public ElementMetadataHandler( + String uri, String localName, Metadata metadata, String name) { + super(metadata, name); + this.uri = uri; + this.localName = localName; + this.metadata = metadata; + this.name = name; + this.allowDuplicateValues = false; + this.allowEmptyValues = false; + LOG.trace("created simple handler for {}", this.name); + } + + /** + * Constructor for string metadata keys which allows change of behavior + * for duplicate and empty entry values. + * + * @param uri the uri of the namespace of the element + * @param localName the local name of the element + * @param metadata the Tika metadata object to populate + * @param name the Tika metadata field key + * @param allowDuplicateValues add duplicate values to the Tika metadata + * @param allowEmptyValues add empty values to the Tika metadata + */ + public ElementMetadataHandler( + String uri, String localName, Metadata metadata, String name, boolean allowDuplicateValues, boolean allowEmptyValues) { + super(metadata, name); + this.uri = uri; + this.localName = localName; + this.metadata = metadata; + this.name = name; + this.allowDuplicateValues = allowDuplicateValues; + this.allowEmptyValues = allowEmptyValues; + LOG.trace("created simple handler for {}", this.name); + } + + /** + * Constructor for Property metadata keys. + * + * @param uri the uri of the namespace of the element + * @param localName the local name of the element + * @param metadata the Tika metadata object to populate + * @param targetProperty the Tika metadata Property key + */ + public ElementMetadataHandler( + String uri, String localName, Metadata metadata, Property targetProperty) { + super(metadata, targetProperty); + this.uri = uri; + this.localName = localName; + this.metadata = metadata; + this.targetProperty = targetProperty; + this.name = targetProperty.getName(); + this.allowDuplicateValues = false; + this.allowEmptyValues = false; + LOG.trace("created property handler for {}", this.name); + } + + /** + * Constructor for Property metadata keys which allows change of behavior + * for duplicate and empty entry values. + * + * @param uri the uri of the namespace of the element + * @param localName the local name of the element + * @param metadata the Tika metadata object to populate + * @param targetProperty the Tika metadata Property key + * @param allowDuplicateValues add duplicate values to the Tika metadata + * @param allowEmptyValues add empty values to the Tika metadata + */ + public ElementMetadataHandler( + String uri, String localName, Metadata metadata, Property targetProperty, boolean allowDuplicateValues, boolean allowEmptyValues) { + super(metadata, targetProperty); + this.uri = uri; + this.localName = localName; + this.metadata = metadata; + this.targetProperty = targetProperty; + this.name = targetProperty.getName(); + this.allowDuplicateValues = allowDuplicateValues; + this.allowEmptyValues = allowEmptyValues; + LOG.trace("created property handler for {}", this.name); + } + + protected boolean isMatchingParentElement(String uri, String localName) { + return (uri.equals(this.uri) && localName.equals(this.localName)); + } + + protected boolean isMatchingElement(String uri, String localName) { + // match if we're inside the parent element or within some bag element + return (uri.equals(this.uri) && localName.equals(this.localName)) || + (parentMatchLevel > 0 && + ((uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_BAG)) || + (uri.equals(URI_RDF) && localName.equals(LOCAL_NAME_RDF_LI)) + ) + ); + } + + @Override + public void startElement( + String uri, String localName, String name, Attributes attributes) { + if (isMatchingElement(uri, localName)) { + matchLevel++; + } + if (isMatchingParentElement(uri, localName)) { + parentMatchLevel++; + } + } + + @Override + public void endElement(String uri, String localName, String name) { + if (isMatchingParentElement(uri, localName)) { + parentMatchLevel--; + } + if (isMatchingElement(uri, localName)) { + matchLevel--; + if (matchLevel == 2) { + // we're inside a bag li element, add the bagged buffer + addMetadata(bufferBagged.toString().trim()); + bufferBagged.setLength(0); + isBagless = false; + } + if (matchLevel == 0 && isBagless) { + String valueBagless = bufferBagless.toString(); + if (valueBagless.length() > 0 && !valueBagless.contains(LOCAL_NAME_RDF_BAG)) { + // we're in a standard element, add the bagless buffer + addMetadata(valueBagless.trim()); + bufferBagless.setLength(0); + } + isBagless = true; + } + } + } + + @Override + public void characters(char[] ch, int start, int length) { + // We need to append to both buffers since we don't if we're inside a bag until we're done + if (parentMatchLevel > 0 && matchLevel > 2) { + bufferBagged.append(ch, start, length); + } + if (parentMatchLevel > 0 && matchLevel > 0) { + bufferBagless.append(ch, start, length); + } + } + + @Override + public void ignorableWhitespace(char[] ch, int start, int length) { + characters(ch, start, length); + } + + @Override + protected void addMetadata(String value) { + LOG.trace("adding {}={}", name, value); + if (targetProperty != null && targetProperty.isMultiValuePermitted()) { + if ((value != null && value.length() > 0) || allowEmptyValues) { + if (value == null || value.length() == 0 && allowEmptyValues) { + value = ""; + } + String[] previous = metadata.getValues(name); + if (previous == null || !Arrays.asList(previous).contains(value) || allowDuplicateValues) { + metadata.add(targetProperty, value); + } + } + } else { + super.addMetadata(value); + } + } +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java new file mode 100644 index 00000000..1f396901 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.commons.codec.binary.Base64; +import org.apache.tika.extractor.EmbeddedDocumentExtractor; +import org.apache.tika.extractor.EmbeddedDocumentUtil; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaMetadataKeys; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.Set; + +public class FictionBookParser extends XMLParser { + private static final long serialVersionUID = 4195954546491524374L; + + private static final Set SUPPORTED_TYPES = + Collections.singleton(MediaType.application("x-fictionbook+xml")); + @Override + public Set getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + @Override + protected ContentHandler getContentHandler(ContentHandler handler, Metadata metadata, ParseContext context) { + return new BinaryElementsDataHandler( + EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context), handler); + } + + private static class BinaryElementsDataHandler extends DefaultHandler { + private static final String ELEMENT_BINARY = "binary"; + + private boolean binaryMode = false; + private static final String ATTRIBUTE_ID = "id"; + + private final EmbeddedDocumentExtractor partExtractor; + private final ContentHandler handler; + private final StringBuilder binaryData = new StringBuilder(); + private Metadata metadata; + private static final String ATTRIBUTE_CONTENT_TYPE = "content-type"; + + private BinaryElementsDataHandler(EmbeddedDocumentExtractor partExtractor, ContentHandler handler) { + this.partExtractor = partExtractor; + this.handler = handler; + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { + binaryMode = ELEMENT_BINARY.equals(localName); + if (binaryMode) { + binaryData.setLength(0); + metadata = new Metadata(); + + metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID)); + metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE)); + } + } + + @Override + public void endElement(String uri, String localName, String qName) throws SAXException { + if (binaryMode) { + try { + partExtractor.parseEmbedded( + new ByteArrayInputStream(Base64.decodeBase64(binaryData.toString())), + handler, + metadata, + true + ); + } catch (IOException e) { + throw new SAXException("IOException in parseEmbedded", e); + } + + binaryMode = false; + binaryData.setLength(0); + } + } + + @Override + public void characters(char[] ch, int start, int length) throws SAXException { + if (!binaryMode) { + handler.characters(ch, start, length); + } else { + binaryData.append(ch, start, length); + } + } + + @Override + public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { + handler.ignorableWhitespace(ch, start, length); + } + } +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/MetadataHandler.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/MetadataHandler.java new file mode 100644 index 00000000..3fee00a3 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/MetadataHandler.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.Property; +import org.xml.sax.Attributes; +import org.xml.sax.helpers.DefaultHandler; + +/** + * This adds Metadata entries with a specified name for + * the textual content of a node (if present), and + * all attribute values passed through the matcher + * (but not their names). + * + * @deprecated Use the {@link AttributeMetadataHandler} and + * {@link ElementMetadataHandler} classes instead + */ +public class MetadataHandler extends DefaultHandler { + + private final Metadata metadata; + + private final Property property; + private final String name; + + private final StringBuilder buffer = new StringBuilder(); + + public MetadataHandler(Metadata metadata, String name) { + this.metadata = metadata; + this.property = null; + this.name = name; + } + public MetadataHandler(Metadata metadata, Property property) { + this.metadata = metadata; + this.property = property; + this.name = property.getName(); + } + + public void addMetadata(String value) { + if (value.length() > 0) { + String previous = metadata.get(name); + if (previous != null && previous.length() > 0) { + value = previous + ", " + value; + } + + if (this.property != null) { + metadata.set(property, value); + } else { + metadata.set(name, value); + } + } + } + + public void endElement(String uri, String localName, String name) { + addMetadata(buffer.toString()); + buffer.setLength(0); + } + + public void startElement( + String uri, String localName, String name, Attributes attributes) { + for (int i = 0; i < attributes.getLength(); i++) { + addMetadata(attributes.getValue(i)); + } + } + + + public void characters(char[] ch, int start, int length) { + buffer.append(ch, start, length); + } + +} diff --git a/modules/extract/src/main/java/org/apache/tika/parser/xml/XMLParser.java b/modules/extract/src/main/java/org/apache/tika/parser/xml/XMLParser.java new file mode 100644 index 00000000..e247a6c4 --- /dev/null +++ b/modules/extract/src/main/java/org/apache/tika/parser/xml/XMLParser.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.xml; + +import org.apache.commons.io.input.CloseShieldInputStream; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.EmbeddedContentHandler; +import org.apache.tika.sax.OfflineContentHandler; +import org.apache.tika.sax.TaggedContentHandler; +import org.apache.tika.sax.TextContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.apache.tika.utils.XMLReaderUtils; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * XML parser. + */ +public class XMLParser extends AbstractParser { + + /** Serial version UID */ + private static final long serialVersionUID = -6028836725280212837L; + + private static final Set SUPPORTED_TYPES = + Collections.unmodifiableSet(new HashSet(Arrays.asList( + MediaType.application("xml"), + MediaType.image("svg+xml")))); + + public Set getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + if (metadata.get(Metadata.CONTENT_TYPE) == null) { + metadata.set(Metadata.CONTENT_TYPE, "application/xml"); + } + + final XHTMLContentHandler xhtml = + new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + xhtml.startElement("p"); + + TaggedContentHandler tagged = new TaggedContentHandler(handler); + try { + XMLReaderUtils.parseSAX( + new CloseShieldInputStream(stream), + new OfflineContentHandler(new EmbeddedContentHandler( + getContentHandler(tagged, metadata, context))), context); + } catch (SAXException e) { + tagged.throwIfCauseOf(e); + throw new TikaException("XML parse error", e); + } finally { + xhtml.endElement("p"); + xhtml.endDocument(); + } + } + + protected ContentHandler getContentHandler( + ContentHandler handler, Metadata metadata, ParseContext context) { + return new TextContentHandler(handler, true); + } +} diff --git a/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala new file mode 100644 index 00000000..6c05d56a --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala @@ -0,0 +1,29 @@ +package docspell.extract + +import docspell.common.MimeType + +import scala.util.Try + +sealed trait ExtractResult { + + def textOption: Option[String] + +} + +object ExtractResult { + + case class UnsupportedFormat(mime: MimeType) extends ExtractResult { + val textOption = None + } + case class Failure(ex: Throwable) extends ExtractResult { + val textOption = None + } + case class Success(text: String) extends ExtractResult { + val textOption = Some(text) + } + + def fromTry(r: Try[String]): ExtractResult = + r.fold(Failure.apply, Success.apply) + + +} diff --git a/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala b/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala new file mode 100644 index 00000000..ae3ac66d --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/odf/OdfExtract.scala @@ -0,0 +1,30 @@ +package docspell.extract.odf + +import cats.effect._ +import cats.implicits._ +import fs2.Stream +import java.io.{ByteArrayInputStream, InputStream} + +import org.apache.tika.metadata.Metadata +import org.apache.tika.parser.ParseContext +import org.apache.tika.parser.odf.OpenDocumentParser +import org.apache.tika.sax.BodyContentHandler + +import scala.util.Try + +object OdfExtract { + + def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) + + + def get(is: InputStream) = Try { + val handler = new BodyContentHandler() + val pctx = new ParseContext() + val meta = new Metadata() + val ooparser = new OpenDocumentParser() + ooparser.parse(is, handler, meta, pctx) + handler.toString.trim + }.toEither + +} diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala new file mode 100644 index 00000000..c935100c --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala @@ -0,0 +1,34 @@ +package docspell.extract.pdfbox + +import java.io.InputStream +import java.nio.file.Path + +import cats.implicits._ +import cats.effect.Sync +import org.apache.pdfbox.pdmodel.PDDocument +import org.apache.pdfbox.text.PDFTextStripper + +import scala.util.{Try, Using} +import fs2.Stream + +object PdfboxExtract { + + def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map { bytes => + Using(PDDocument.load(bytes))(readText).toEither.flatten + } + + def get(is: InputStream): Either[Throwable, String] = + Using(PDDocument.load(is))(readText).toEither.flatten + + def get(inFile: Path): Either[Throwable, String] = + Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten + + private def readText(doc: PDDocument): Either[Throwable, String] = + Try { + val stripper = new PDFTextStripper() + stripper.setAddMoreFormatting(true) + stripper.setLineSeparator("\n") + stripper.getText(doc).trim // trim here already + }.toEither +} diff --git a/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala b/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala new file mode 100644 index 00000000..68e1de18 --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/poi/PoiExtract.scala @@ -0,0 +1,85 @@ +package docspell.extract.poi + +import java.io.{ByteArrayInputStream, InputStream} + +import cats.data.EitherT +import cats.implicits._ +import cats.effect.Sync +import org.apache.poi.hssf.extractor.ExcelExtractor +import org.apache.poi.hssf.usermodel.HSSFWorkbook +import org.apache.poi.hwpf.extractor.WordExtractor +import org.apache.poi.xssf.extractor.XSSFExcelExtractor +import org.apache.poi.xssf.usermodel.XSSFWorkbook +import org.apache.poi.xwpf.extractor.XWPFWordExtractor +import org.apache.poi.xwpf.usermodel.XWPFDocument +import fs2.Stream + +import scala.util.Try +import docspell.common._ +import docspell.files.TikaMimetype + +object PoiExtract { + + def get[F[_]: Sync](data: Stream[F, Byte], hint: MimeTypeHint): F[Either[Throwable, String]] = + TikaMimetype.detect(data, hint).flatMap { + case PoiTypes.doc => + getDoc(data) + case PoiTypes.xls => + getXls(data) + case PoiTypes.xlsx => + getXlsx(data) + case PoiTypes.docx => + getDocx(data) + case PoiTypes.msoffice => + EitherT(getDoc[F](data)) + .recoverWith({ + case _ => EitherT(getXls[F](data)) + }) + .value + case PoiTypes.ooxml => + EitherT(getDocx[F](data)) + .recoverWith({ + case _ => EitherT(getXlsx[F](data)) + }) + .value + case mt => + Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}"))) + } + + def getDocx(is: InputStream): Either[Throwable, String] = + Try { + val xt = new XWPFWordExtractor(new XWPFDocument(is)) + xt.getText.trim + }.toEither + + def getDoc(is: InputStream): Either[Throwable, String] = + Try { + val xt = new WordExtractor(is) + xt.getText.trim + }.toEither + + def getXlsx(is: InputStream): Either[Throwable, String] = + Try { + val xt = new XSSFExcelExtractor(new XSSFWorkbook(is)) + xt.getText.trim + }.toEither + + def getXls(is: InputStream): Either[Throwable, String] = + Try { + val xt = new ExcelExtractor(new HSSFWorkbook(is)) + xt.getText.trim + }.toEither + + def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx) + + def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc) + + def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx) + + def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls) + +} diff --git a/modules/extract/src/main/scala/docspell/extract/poi/PoiTypes.scala b/modules/extract/src/main/scala/docspell/extract/poi/PoiTypes.scala new file mode 100644 index 00000000..f3795fc5 --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/poi/PoiTypes.scala @@ -0,0 +1,16 @@ +package docspell.extract.poi + +import docspell.common.MimeType + +object PoiTypes { + + val msoffice = MimeType.application("x-tika-msoffice") + val ooxml = MimeType.application("x-tika-ooxml") + val docx = MimeType.application("vnd.openxmlformats-officedocument.wordprocessingml.document") + val xlsx = MimeType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet") + val xls = MimeType.application("vnd.ms-excel") + val doc = MimeType.application("msword") + + val all = Set(msoffice, ooxml, docx, xlsx, xls, doc) + +} diff --git a/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala b/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala new file mode 100644 index 00000000..e2b5757b --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/rtf/RtfExtract.scala @@ -0,0 +1,24 @@ +package docspell.extract.rtf + +import java.io.{ByteArrayInputStream, InputStream} + +import cats.implicits._ +import cats.effect.Sync +import fs2.Stream +import javax.swing.text.rtf.RTFEditorKit + +import scala.util.Try + +object RtfExtract { + + def get(is: InputStream): Either[Throwable, String] = + Try { + val kit = new RTFEditorKit() + val doc = kit.createDefaultDocument() + kit.read(is, doc, 0) + doc.getText(0, doc.getLength).trim + }.toEither + + def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, String]] = + data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) +} diff --git a/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala b/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala index 0f400a13..8033200a 100644 --- a/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala +++ b/modules/extract/src/test/scala/docspell/extract/ocr/TextExtractionSuite.scala @@ -1,9 +1,7 @@ package docspell.extract.ocr import cats.effect.IO -import docspell.common._ -import docspell.files._ -import docspell.extract.TestFiles +import docspell.files.TestFiles import minitest.SimpleTestSuite object TextExtractionSuite extends SimpleTestSuite { @@ -30,13 +28,4 @@ object TextExtractionSuite extends SimpleTestSuite { assertEquals(extract.trim, expect.trim) } - - test("find mimetypes") { - ExampleFiles. - all.foreach { url => - TikaMimetype.detect(url.readURL[IO](8192, blocker), MimeTypeHint.none). - map(mt => println(url.asString + ": " + mt.asString)). - unsafeRunSync - } - } } diff --git a/modules/extract/src/test/scala/docspell/extract/odf/OdfExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/odf/OdfExtractTest.scala new file mode 100644 index 00000000..00189e10 --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/odf/OdfExtractTest.scala @@ -0,0 +1,28 @@ +package docspell.extract.odf + +import cats.effect._ +import docspell.files.{ExampleFiles, TestFiles} +import minitest.SimpleTestSuite + +object OdfExtractTest extends SimpleTestSuite { + val blocker = TestFiles.blocker + implicit val CS = TestFiles.CS + + val files = List( + ExampleFiles.examples_sample_odt -> 6372, + ExampleFiles.examples_sample_ods -> 717 + ) + + test("test extract from odt") { + files.foreach { case (file, len) => + val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity) + val str1 = OdfExtract.get(is).fold(throw _, identity) + assertEquals(str1.length, len) + + val data = file.readURL[IO](8192, blocker) + val str2 = OdfExtract.get[IO](data).unsafeRunSync().fold(throw _, identity) + assertEquals(str2, str1) + } + } + +} diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala new file mode 100644 index 00000000..4d06be76 --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala @@ -0,0 +1,48 @@ +package docspell.extract.pdfbox + +import cats.effect._ +import docspell.files.{ExampleFiles, TestFiles} +import minitest.SimpleTestSuite + +object PdfboxExtractTest extends SimpleTestSuite { + val blocker = TestFiles.blocker + implicit val CS = TestFiles.CS + + val textPDFs = List( + ExampleFiles.letter_de_pdf -> TestFiles.letterDEText, + ExampleFiles.letter_en_pdf -> TestFiles.letterENText + ) + + test("extract text from text PDFs by inputstream") { + textPDFs.foreach { + case (file, txt) => + val url = file.toJavaUrl.fold(sys.error, identity) + val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity) + val received = removeFormatting(str) + val expect = removeFormatting(txt) + assertEquals(received, expect) + } + } + + test("extract text from text PDFs via Stream") { + textPDFs.foreach { + case (file, txt) => + val data = file.readURL[IO](8192, blocker) + val str = PdfboxExtract.get(data).unsafeRunSync().fold(throw _, identity) + val received = removeFormatting(str) + val expect = removeFormatting(txt) + assertEquals(received, expect) + } + } + + test("extract text from image PDFs") { + val url = ExampleFiles.scanner_pdf13_pdf.toJavaUrl.fold(sys.error, identity) + + val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity) + + assertEquals(str, "") + } + + private def removeFormatting(str: String): String = + str.replaceAll("[\\s;:.,\\-]+", "").toLowerCase +} diff --git a/modules/extract/src/test/scala/docspell/extract/poi/PoiExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/poi/PoiExtractTest.scala new file mode 100644 index 00000000..002755bc --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/poi/PoiExtractTest.scala @@ -0,0 +1,39 @@ +package docspell.extract.poi + +import cats.effect._ +import docspell.common.MimeTypeHint +import docspell.files.{ExampleFiles, TestFiles} +import minitest.SimpleTestSuite + +object PoiExtractTest extends SimpleTestSuite { + val blocker = TestFiles.blocker + implicit val CS = TestFiles.CS + + val officeFiles = List( + ExampleFiles.examples_sample_doc -> 6241, + ExampleFiles.examples_sample_docx -> 6179, + ExampleFiles.examples_sample_xlsx -> 660, + ExampleFiles.examples_sample_xls -> 660 + ) + + test("extract text from ms office files") { + officeFiles.foreach { + case (file, len) => + val str1 = PoiExtract + .get[IO](file.readURL[IO](8192, blocker), MimeTypeHint.none) + .unsafeRunSync() + .fold(throw _, identity) + + val str2 = PoiExtract + .get[IO]( + file.readURL[IO](8192, blocker), + MimeTypeHint(Some(file.path.segments.last), None) + ) + .unsafeRunSync() + .fold(throw _, identity) + + assertEquals(str1, str2) + assertEquals(str1.length, len) + } + } +} diff --git a/modules/extract/src/test/scala/docspell/extract/rtf/RtfExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/rtf/RtfExtractTest.scala new file mode 100644 index 00000000..699af486 --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/rtf/RtfExtractTest.scala @@ -0,0 +1,14 @@ +package docspell.extract.rtf + +import docspell.files.ExampleFiles +import minitest.SimpleTestSuite + +object RtfExtractTest extends SimpleTestSuite { + + test("extract text from rtf using java input-stream") { + val file = ExampleFiles.examples_sample_rtf + val is = file.toJavaUrl.map(_.openStream()).fold(sys.error, identity) + val str = RtfExtract.get(is).fold(throw _, identity) + assertEquals(str.length, 7342) + } +} diff --git a/modules/files/src/main/scala/docspell/files/Dimension.scala b/modules/files/src/main/scala/docspell/files/Dimension.scala new file mode 100644 index 00000000..2d1a1f4b --- /dev/null +++ b/modules/files/src/main/scala/docspell/files/Dimension.scala @@ -0,0 +1,7 @@ +package docspell.files + +case class Dimension(width: Int, height: Int) { + + def toAwtDimension: java.awt.Dimension = + new java.awt.Dimension(width, height) +} diff --git a/modules/files/src/main/scala/docspell/files/ImageSize.scala b/modules/files/src/main/scala/docspell/files/ImageSize.scala new file mode 100644 index 00000000..21cd0180 --- /dev/null +++ b/modules/files/src/main/scala/docspell/files/ImageSize.scala @@ -0,0 +1,61 @@ +package docspell.files + +import java.io.{ByteArrayInputStream, InputStream} +import java.nio.file.Path + +import cats.implicits._ +import cats.effect._ +import fs2.Stream +import javax.imageio.stream.{FileImageInputStream, ImageInputStream} +import javax.imageio.{ImageIO, ImageReader} + +import scala.jdk.CollectionConverters._ +import scala.util.{Try, Using} + +object ImageSize { + + /** Return the image size from its header without reading + * the whole image into memory. + */ + def get(file: Path): Option[Dimension] = + Using(new FileImageInputStream(file.toFile))(getDimension).toOption.flatten + + /** Return the image size from its header without reading + * the whole image into memory. + */ + def get(in: InputStream): Option[Dimension] = + Option(ImageIO.createImageInputStream(in)).flatMap(getDimension) + + /** Return the image size from its header without reading + * the whole image into memory. + */ + def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = { + data.take(768).compile.to(Array).map(ar => { + val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar)) + if (iis == null) sys.error("no reader given for the array") + else getDimension(iis) + }) + } + + private def getDimension(in: ImageInputStream): Option[Dimension] = + ImageIO + .getImageReaders(in) + .asScala + .to(LazyList) + .collectFirst(Function.unlift { reader => + val dim = getDimension(in, reader).toOption + reader.dispose() + dim + }) + + private def getDimension( + in: ImageInputStream, + reader: ImageReader + ): Either[Throwable, Dimension] = + Try { + reader.setInput(in) + val width = reader.getWidth(reader.getMinIndex) + val height = reader.getHeight(reader.getMinIndex) + Dimension(width, height) + }.toEither +} diff --git a/modules/files/src/test/resources/bombs/20K-gray.jpeg b/modules/files/src/test/resources/bombs/20K-gray.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..4804bb10eb4fa51614ff20c66d2c0a8e9f416bbd GIT binary patch literal 1562661 zcmeIuJxT*%5CG8G!rhHoF={Lf8?&eog%C6@sGwK~0WESRkK$EQcnE(QZ}J(fOy>dK zOf@qwGraVczM}Uuf1F2_MP#dt^cfFP=6R8Cltoc=I^|~7->SOZ>R@+oyFVO^4u^xf zZce7-=4f(U*QZx!lk)||L%{!QLftk-s<3b zq>m_nez}jA<@!WLfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 V2oNAZfB*pk1PBoLm%wv+{{~-G8ms^S literal 0 HcmV?d00001 diff --git a/modules/files/src/test/resources/bombs/20K-gray.png b/modules/files/src/test/resources/bombs/20K-gray.png new file mode 100644 index 0000000000000000000000000000000000000000..66d8b0a48453386fd831308af9ca3f80dd0482b6 GIT binary patch literal 48829 zcmeI*v1`;&90%}swS$~qJqH)N-0a$=9ZGFIZG&*sa&K_b4t3FXk%A!NS@FQZ;vgM# zcInVgf;#CS2>k=Jor+TnMX*B$(Vmw;`Uf~BpFk24lEClt-t;B>-t*bkbgg=^8bYWg zGtIdWT8(tC^lyZ%GY?<&PXkGFvi)G^@9^?!2=%k+)xqaV`b6jD@#fFd>B}|(1PBly zK!5-N0t5&UAV8qd0wWWxy*wA_Y(G7fMF)-Ew`;S}Mt}eT0t5&UAV7cs0RjXF6kK59 z($SMV7l`xch6d9Vr1R(2@MOUYR~`WZ1PBlyK!5-N0t5&UAP|?jYu%Gv;Mw?SJ^MT2 z@bTgx90CLg5FkK+009C72oNAZpy&d>e~o?Y=K@1%e}Q=6X!mlK3w-UpJKrvP>FOgu zfB*pk1PBlyK!5-N0t8}pZsB$Q$G~?D2FwTo1PBlyK!5-N0t5&UAW$lSsT+-%JQvVl zpj1PzSONqH5FkK+009C72oN9;lg%G1c`l&AfC)i>009C72oNAZfB*pk1j;3Fr@g

lGlxyr&OMn0Y0t5&UAV7cs0RjZJ-+!s*xj@ulz=R+`fB*pk1PBlyK!5-N0;LkT z{^n6~k_%`sP^z(4ECB)p2oNAZfB*pk1PBmV>aKM|sNKs}DttRW+DKO_tRME4uMA{s q7CI0hK!5-N0t5&UAV7e?{|bDHcVno0xPS9S`i10btGO|Cd*vU@or7us literal 0 HcmV?d00001 diff --git a/modules/files/src/test/resources/bombs/20K-rgb.jpeg b/modules/files/src/test/resources/bombs/20K-rgb.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a4ef7bf6826ff9beeef63357d09e38b750d24aed GIT binary patch literal 2344037 zcmeIu%Ps>^00!W5G-f(FjSeNqlqw05Mi@0ojd#j&|MXydbUc|%w)W01&#H^VlS%b;NX)V<@8oOU?pn27ZdZR!{TPb&bT}Qh z;wa1&u~o$SDXe@yHT+D{)cs!b_SrP;YHTvwCh6De;#b@J`!(dP*xb`9!c};P)%*4A zMl%8g2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 l2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF{7WF#FP~7IE8_qF literal 0 HcmV?d00001 diff --git a/modules/files/src/test/resources/bombs/20K-rgb.png b/modules/files/src/test/resources/bombs/20K-rgb.png new file mode 100644 index 0000000000000000000000000000000000000000..cf332e531e7dde48aa547f44a0a856ee1c4f6364 GIT binary patch literal 1207693 zcmeI*U#MU8zvuV0jg8}J5;}6Io5&v0R*UDR?zE2M7^J2uHDV7T^aYL%*Z*U`d}8gofAGnF>tDF;AN$b%#3#HQkKf_tRs&zs0v6xH0v6E{ zu=uYf1&e42T0~30B3c3#(NeI8mY_wn1T3N@U=b|^i)aa2L`%RTS^^f)Qm}}YphdI< zETSb~5iJFaXbDZ+0_qO-+$M)_q=f5neCf}A78)v zyYux~IsLu=;@Sm`>-*0?|M#w4_qe|QZ2yfvy1pNH?MLwm3wHbt3s`(f3s`&)3s^); z!6I6M7SR&0h?amwv=l6&C1?>X0gGq}SVT*~B3gnL(GswTmViaH6fB}8Xb~*|i)aa0 zL`%UUT7nkQ60nGtfJL+vETSc75iJ4hr)X_oc=(fhS3l^$OK*O6`-68Jd+2&^Bm9#S z*Z=o1{NMlj_5Xbg7ytfW;O}Gjlht37alQZH6Bgw79Tu?ok`}NAzJ~=Zq9tGvEdh&Y z30Oo+!6I6M7SR&0h?amwv=l6&C1?>X0gGq}SVT*~B3gnL(GswTmViaH6fB}8Xb~*| zi)aa0L`%UUT7nkQ60nGtfVC5?i`-7SR&4h?amwv;-`orC<>)L5pY!SVT*}+Pl#@ef6`W2Uk)bpF97Ct3wY_DHlcp>j{ON{!^1XeB`9nrM|8$urSPfT}go zsyN~U(TaVlEzznt;seo&ed;aIsy)IJ(F%F$CDE!q!V}R7d735BYCxjn9*)-LnJaJa zABOFxPoDbmy=S&=c`S0WcU(@gUCb$$lS~$8_`CDYV!@aTh6|9!SNI+tB^G?-dw2oT z^a|g@6Gi?&v~r{A4Wczso@kXC$urSPfXX$|Dm9X4qLlzuYob+g#0R1k`&3(^RdK`zq80np zTcTBageRgE^3==Ue6-e&J^#eMVYs+?_XjV2{^5mvch;X@e&TXcZZyBa<)qmnecHp9 zlTw@R1#VP!B>sSC`8NHAX!R!UglM@m{eo!qChml2xirRtXc;Kph-i5-#++ywDBg%@ zc{0|VXjv-GjA%J9)|hBnD$a~(IWXp!XqhbbkZ9R1=9FleEcTFS*)H~!XxT32lxUeO z_K;}VF6NYInJmtbXgM&}m}prl&TNlI>$Qys-d`Jr)9Y`2`O%fHF0b9@FgasxV>vM9 znA=z;i#_BvmhECrfAhDo%+D$q@(Bkx?TSjbBGH*jM>@@pN;e?Uk!VFa&5md_AmNc{ zg*^3=Xw@F!iD-p9^^$1S9`T82#Xi-RXjL5XfoR1()s|>g9La%bB|znxXq6htGto+b z$~Dm{HL_=-l?fGlqE%pIk3=gID)vOHz(^m7R!TI#K(uCy^r?;3r9&HEtbWkufycjo z>YL}T4!5R1L-_)ilS+-`nafEDP`TdYmy=40{Fxh-xzYRv(V8vNC!&=S%`XtG*&=-+ zS}9TS0?{fkvPYto2^D*yRbXU~L@N_2_e87INS=vS0#vSvR;iIZ6RiZOS`)2`BR&wV z*r(bOt%@T)5Utp!-V&|aBRmnUkf&Y}t=c0z5v`D?SrV-VBsvnUNT=Bmtp+4I60KqD zl{cPU5v}jP>)U%?xbMvN&5>8Ll4>usuk!VFa?T%=*BEgwx1vu@BXtg52nP>$#-HK>+B>vzx7p;H%pFQ}|uy+6U ziARn8mTHgiM6^Pl zdP%fukMKmaLY`(xv>K4;NVFoIW=FIdkmyLXBAs?ev|5qid;>*m`@+MY+`Iav9(d`^ z?{0taj$;qq6#TmD6&ICikMP7rrI4pya#5-Fh)>+EjD4yt(W*G&1JR0osx8r~IFbX= zN`T5W(JD2PXQGt=m209^YGltuD-$aAM61Ba9*I^aRP2dXfssBEt(0hffoRPZ=@ZdP ziRKrG)@+eK5v|;4dV^?96!`FNfo^gv{1vZW#du!xp`MYI$wq9te%Edh&Y30Oo+!6I6M7SR&0h?amwv=l6&C1?>X z0gGq}SVT*~B3gnL(GswTmViaH6fB}8Xb~*|>-P|?(^o$`dT{ki`1suUA0GYm(xJ1< zemPuDTITfATU<^Oum&zC30mB!6tIYvfJL+fETW}g5iLQBXbD(EOTZ#p3Kr24w1}2~ zMYIGgqNQLFEkTQD30Oo+z#>`-7SR&4h?amwv;-`orC<>)L5pY!SVT*}B3cR-(c014 zJagsk{ll>R^vP2{zW2=5Esrfx2rJZ+v4_8-~{nNRPwan?KxA>A4u=tV|u((31 zU=b}ri)aa0L`%RTS_&4?610exfJL+fETW}g5iLQBXbD(EOTZ#p3Kr24w1}2~MYIGg zqNQLFEkTQD30Oo+z#>`-7SR&4h?anL14e88*z-^9Tm27XY~KCBi=TgZVc(tkr^Dr> z+-QD-zuI87NT0ZzloHJ^aHDdzNS}ySN>sc+vw3M62S64@4{WskTI`;)oAKEB2|kM631)Ped!^sh32n_6Scz zE97aGM5_UbjzlZcX?8@b0f~-8E7ECqM5`4E&O|G~X;(z65P5Cmf%n&j;q>}jUw(Au ztIKP*6+83s2E55({yM86cXcX`_`vUQ>{D-lcfLL=C+^z`T6_-+SbPr)SX>fTu!xqR zMYIGgq9tGvEd`5c30g!;z#>`#7SU3$h?by5v;-`oC14RP1&e42T0~30B3c3#(NeI8 zmY_wn1T3N@U=b|^i)aa2akMTS+W2DilWiV&{OhN_dG6|PYw|O5IVk}u*IZ62HIip8 zCnZ4Tnj4j+M)pj!GNEEmv@#+t`&*Sd=E!D&5rNk1|&N2JsjyYJEGNqgh!$k^3+SBReOXdq80Mg zOQKbK#3!N^`&3(^RdK`zq80m8TcTBQBnP6E0F`T^Rca*9L@NO**F>w-$exK-CRFT+ zR)LW{60Jb_yeNl+w>cv)tk5z zqUF-`3!>GVxD%q~(ijV(WuSN?qUFgLbE0LScq5|a$yjruWvMtbqUFF?W1?lLI5VQ< zz?fsAWwO{qqGh|7Q=(|M)-n56=&4_ivwg(WD%kOR-UQJ& zaaZ2Viav2Z;Yg?1QRxOGI#TIKr`b{I1|&QZt&pc)60O=JJQ1ysr(P1R+9N&@t=Om9 z60M3OJ`k?Ja17nVf zmdRodiI(kRPKlPuVh@Ry?P5=fmhECriI&M?4~drTVor&c$>I!&mIGsriI%0}%!rl) zV~vTHrQ*$qmM3G(iI#!ljfj>fW6X(`f#QycmP^wwh*od1nrxoA^7j5=*naxtsUP2a zX6u&6%(n*a>ew#kl)E}6i!)quSI7CG1NZGb8Dq}(uz})@_#XCTtU2GqmWneYS`Lgg zCR&z?Gb36Kj5#J+CW}2JTDFThC0ZtnJtSJTi#;V;wu?C>S|*DT9%44BU%oOH6~h?iZ>%#o{TXkS_X`1ywy_T6bQa5>3R@n&33@??xTmy-+>Z^VsCPsW%NEd#|J5iOUd zUl6U{#GMc=m!@A3t=`0+5G~)P+Yqge#2*kX-=^CTt&SuZ5Ul{GT@kHTBsdeT0H<9M ztyUyD6Rk+6*%7SPY;-uD*?BecF7& zmWnf@(hiI{rqU*hJ*3jMi#a7)CW}2JTDFTlC0e$NIVD;qi#;S-wu?C>S|*D#Bw7xP zH6~h?iZdfx4vaM>T9%48BU+w}F(+CEiZ>!!o{TXkS_XpO(u>9f?2Sa*}V;ZMdA& zk@y2{RQfjkhG_LB?u2N$H2s2T^(O9wXt^}Tf@m2i-iT;4p z&Wva|FxHr8St`zqXgM(Em}r?S_K;}VF6NYInJo5@XxT3IlxW#5=9FleEcTFS*)Hak zXqha|kZ3tD)|hBnD$a~(IWX3kXjv-LZ0nUbo?RW*_uuvHJulpMX8Y!8^Ya6DbsCW9 z$X%UCr`aiYb($YKaNn*KiOzfvM>@@p@8Jd{I`Tan=`=f{)qsRYq80MgOQKbKgeRgE z^3+SBReQuIq80m8TcTBQ#0R1k`&3(^RdFN-qLl!ZYob+ZB+o=E0V>x-tJKJziB=|5 z?1@%^kv$TvOsLostpX!`Bw8ua`~uOMEz&2Vl@iS_5UtrFf7%1l+P?7cC-<%-4!rc{ zceg)y$FYZQs#v(3RB^-yE+@r4)t1Xi6-RR5Mr8t2u8CHukvtQv1gKmStx_X>h*oYi zy+O1liUkiuYXQ>u3eg%R7JMXH3y{WFh}I~v+Fq|@$*Rx1*miB^Equ83AE5}b)vfYYsr zR!8Cwh?Z~DZHQJ!;tzjS^^f)60nGtfJL+vETSc75iJ3W zXbD(EOTi*qg4Q02*7~vMpV&7H7dP+z;Kk2Bys+=ipP9IvBz(DWIZ46da+09MjY#du!xp`MYI$wq9te%Edh&Y30Oo+!6I6M7SR&0h?amwv=l6&C1?>X z0gGq}SVT*~B3gnL(GswTmViaH6fB}8Xc4W2Sg&n7@c!B`oL+zH%a5*nb$RW!Yd?gEkL5{`FJeJa=`tb#>UY|GUD1`1C55lO~G%fy+s`(ews4Dkq8s4@7GL z()bF|8YLEdBw7oQ##e~eD6!-t(OROk^A6Dxu!xp`MYIGg{?9`Ni)aa2L`%RTS^^f) zQm}}YphdIw7O?mp7O=P^EMO5W1&e42T0~30 zB3c3#(NeI8mY_wn1T3N@U=b|^i)aa2L`%RTS^^f)Qm}}YphdII1jE#Ic! z5Ut+Coe(XTre6@P-o%{{Etkev5G@168xbu}#+VZ=1H~H=El={nGr1q#u^hX zOU0QHEeFON6D^a)9`4C#ZJxRE_WsqMwSW5LsUP2aX6u&6Y@2+_<)qwbdV|YJ6UBlD z{%V5-NaHKqs2n90d?Z>6kj7Vt)+n*$Bhgx-wDS(p60nGtfJL+fti5;*T;ZwTC0eyd zcp_RMPrW2swMTs7-?U<%YD=^#j`%>dVxMYDv?`9|K(rE|a!s^KjpUhVB|znxXq6h- zGttU~iapUPFtSIYl?fGlqE%p|k3=ganqSzX(ON(D{1f|z;o|1qAH4YahZpwUIg2Py zTuxH7xSS+t?a9kYf=~4-H!3TR_&~H`pL$EQYLD)e?YW+ zn|?#IdJ}g-v|O5gL9}`kcS5vW8e>7U3>0rfv^*JOzGtKL+QtL#uMNZL^|!wK=*m}@ z*KRX#(h--FQljDoE+-Wj+2bC)oRqQHdu~+PF6NYInJmtbXgM&}m}prl&Wva|FxHr8 zSt{O)Xn8WmoM;&+-iT;%0Ht@4h>%-M@X}k>gJWpYpugO`rVySp`pg z!U0aZ;=iI=k?72SMMXNz?l0%RqAnfU_+s@FZytF3>!-eX?&@%B^G{iF1b`e^2W0(tM&bNeS6Of_nq0kS%CZbRj#fGWPdqVSA>7y&a2$UQq1_07PR=1 z7O=QNDqs;U0c#*yf)>#du!xp`MYIGgqNQLFEkTQD30Oo+z#>`-7SR&4h?amwv;-`o zrC<>)L5pY!SVT*}B3cR-(Gs+XmViaH1T3PZVBHYW+P?7cC-<&?)dyaB^Sj$0yyMtI zHz}XO>Rm1;31D1K60r94jS^^f)60nGtf+s6!<>M#5fAyW$H;$|+c*6~GlSBFyuU_U4%?e`t4hvXZFch%3U?^Y_ zEd`5c30g!;z#>`#7SU3$h?by5v;-`oC14RP1&e42T0~30B3c3#(NeI8mY_wn1T3N@ zU=b|^i)aa2L`%RTS^^f)Qm}}YphdI`#7SU3$h?by5v;?dhDq8Euo_}KBFkIZc`-2xh|M0@TJC}YUTuxdlb?v=BIi~yzsm8#X)@JzMTNZ_ppG)_ppG)C1C}N zXbD`#7SU3$h?by5v;-`oC14RP z1&e42T0~30B3c3#(NeI8mZ0TD>(Zf(FIGRz=7Gn*e(Ia&t`4^@`Z>9cwTS8Gm$;23 zU~wBuz}iK(v6g+-e8S6|etwHe3tCiKz@pLu7SR&0_*=6SETSc75iJ3WXbD(EOTi*q zf)>#du!xp`MYI$wq9te%Edh&Y30Oo+!6I6M7SR&0h?amwv=l6&C1?>X0n3lp)+=v3 zyZVXN_uuvHJulpMX8Yy^KP!LmeF4(=3U_rzi3K0MyE+Sg=)iruB}(Hvd=HNj3qJBa zyZ~u@h40}}V!=nEwE$^)g=kF_`NItrt(X4l+kZB!-M@X}k>gM1zx1Eq;CFke)atz( z>UVqTubbcDrBeJ33sS3B`IivEi+>3bu=tZo3KsvhBxn&W0gGq}SVT*~B3gnL(GswT zmViaH6fB}8Xb~*|i)aa0L`%WC0i(5j;o(p2U40G*UV8Jp+aJ8+*h4p^pAh$UQlj|< z?(NJL>63qNC;g!VckdP$O|S4hJW=Eid=KYF(;IvbPZSFt_?tTxAdRmOtx;mZN20X= zX?%rfjS@>f60IdlJMR!J0gGq}SVT*};{QBUu!xqRMYIGgq9tGvEd`5c30g!;z#>`# z7SU3$h?by5v;-`oC14RP1#8zu>*D2iw+^oaUOs-}`&Zw2edEYl!f^xMW%RaKUh*Snxo!79fqU5Uo*S!AGLC0BL-MXpIs}J`$}ZN;~fm zEdh&Y30Oo+z~cWrRIrGaphdI-5#njvie7Qa(O+{)b0Dy>#eo{LA7tR_s%4xs6qEBnSS4VFFaH zcinBQ@{_Zk`GhSMXGWzR7;8+WEfsG@r9ByAPP7aZZ$z{_8Dmbg3>0@nv|O5gL9}`k zcS5vWntnmFdJ}&_w0xUxL$o>)e?YW+n{Gq2I+9>Ov;v%VMYLLx;7qgvoOVUDT9N2X zv?85mN3h$u}`%nS`|lpAX>3cwIx~=M{*!q2~fEvTBSzvOtcc9a!s^K zjqI6dWkSWCXcZXQBhku)iapUPFw#e&l@iS_5UtrFeIi;Z(fk6@nl17tqLmv>ZxF4C zB7Y!SxzY5-u8r1f8xOp{HVmiN-}>^SD_>n+yKVZEn}-|tCP(utEsosPDKN4}eup!m za?b_BQX_fhf?)zwu8CHukvtQv1gKgQt%@T)5Utp!+7hjbBR&wV*r(nSt=c0z5v`D? zUJ|X^BRmnUkf&J^tp+4I60Jz5*%7SCna(tAG6Ffycjo>YL}T4!1hL7H(rX zFxHscSeA-2<2IH9V~uz1Z7l1Py*A?$_GGL%m9|uz8I^WmtTC0gRGb;na$w9c(K1== zA><&zUCb%bGFhA<(Q;s{G10PAoEgz_V5~9G zvQ)eo(eh-BIngpuyb;mzWQ;k{GEm$R(Q;|}1<~qF+zHWgY5K*|Xl=dn##++ywDBg%@c{0YFXc;K(h-kSq{eo!qChml2 zxitNPX!R!kglPFT-G*p&B>sSC`8M5#XmupPfM^9c?TTo%BEgwx1vu@BXtg5InP^2i z&F+Se*7k*mKe>1Hi$3tuo8R63;2p;vx~cgLt{u6nW1x5=?&^3l#(e2r9pi@%1NZGr z7JJC|u>=O7wu?O_TDFThC0ZtnJtSJTi#a7)CW|v9S`LggCR&z?Gb36K zj5Q`&mWnqcTAqwCCt3!IHzHb|j4>x#28uf(S}sk$AX>eNJ0V&wO~1I2qxCQUoB!4S zGpyaeed3YhPxim^oe96&E=|9|JnEwX; zYw6`FRc3`YAmkliyXU1hi z2gV!|EtAC_5-r=soDwaQ#U2tZ+r^#|E!)MM5-pR(9uh6v#hel?lf@YlEeFOL6D>={ znGr1q#u^hXOU0WJElzgv!e%B|M-v3o&Vv{PcI!h+x}9112;L^UowA@MK2Oe z_#F;#+7%ZJTaoC@1;a?E*>SCt3wY`be}=qWJ})HCv=lL@OnlUm#kuMgH{5MQiiSmACf~!}ilBPyP7bGh4Sj zR{!F-x>9?Dr(f*qO8cweHdZSVocWRtaM~4L(yd5z<_c+~)9i>=0}>sHR;1JHh*kp< z9*I`SQ!j~D?Gc`cR>)H?iB|0qpNLlMQ*DV>#StHfR_s%4iB`pt9Eer|RIZ6usgXPr ztpuoC6RlDsdnQ_$P_ZXk1xEHrv@)S$PqYe*^pR+#MDq(oYqm(Ah*nB8zwn!h*7~vM zpV&7H7dP+z;Kk2Bys+=i`EyuJ|NbAANA3BSyU9`iy11NFdxR%`heMub$(M8k5*@iw z8R;}TqSb&zN1_$!v^%2JiUen(72vchqScB7XQCD0bSt9Ok@y3m<=b={qScZ31ES^I z^c$kpo46CA<yD<-k~D zqGhQ#Got0dm}8=4ve?64Hd?Q3Jn;V7Fq~e0>&uU>e06#4w)q$Jo4Ls`|B{D+yE>&t z^33mW0#vQJU|4a)2QC=KKGl|JRUGkwXvIGDmT1);;fZL4JoS=j)gIxAXoWn@l4vy` z(UE9HI?aw~H6YQEXhk~hj%c+a!I@|UIPHpPwIacpXazXkifDBt{(xxtHr|`dZNf5xI&s6O>YpbiDJP6(OQ5szCyG{ zi3J~t)&ivQ6{0mtEcr;ZmMHDKL$m}eq9tGvEdgsFT7nkQ60nGtfJL+fETW}g5iLQB zXbD(EOTZ#p3Kr24w1}2~MYIGgqNQN{wxYH5${WwFKD+h(cYS-$3-_JbzIo?U`#7SU3$ zeq+(vzVPrT_pUy_124V#-R%$FaqOX+e)?p8TQ@m&et-C*9KshWzrz9+7Yqd~E*J_} zTrgCyh?by5v;-`oC14RP1&e42T0~30B3c3#(NeI8mY_wn1T3N@U=b|^i)aa2L`%RT zS^^f)Qm}}YphdIzsQ%5pZNaOcV6E(vbH+> z#%^-_Y|I~;6~5N^9Tu>-U?^a5!BD{Bf}w&%v;-}pC14RP0gGrUSVT+EB3c3#(GswT zmV!mJ1TCT^U=b|=i)blWL`%>jS^^f)60nGtfS%l!D<`5zwr^wOcT!@$)Q;kWHqxw>-g*T-!v;dhBIX#tBbX#tBXqzV?% z610A=(fTj{umApE8`kdMKJm!$C!=2zf4@?s)9m>2Za|{r@Ab<&`UmqR9qBYXzN8zF z=*X9Jq|@y9*USbaJQA&tr(P1R+9Ny>t&pc)60O=JJ`t_hr`i&&iX%P{t=Om960M3O zIS{P`s9Y1RQX_dLS_x3OCR(LN_Dr-gp<++83XJS=&qZtV%$2wI55xA;Cr|zO-ZNXb zJjOMa>}#ah|4P?b_?tV5--G;-FX`N9dV??NiDJP6e;LgJr12H5la3M#J`$}3NaHI+ zYm`{>k!USZ+Ifd)30Oo+z#>`#7JqO@!6I6M7SR&0h?amwv=l6&C1?>X0gGq}SVT*~ zB3gnL(GswTmViaH6fB}8Xb~*|>o*&%ONTbTSbd(G2Oj_Wsc)XUI@~IJ()FucPO3e^ z6PJ@ho_e|GE+_E`*IxkPi7TWbPqU=b4M=pP(veQHBU%kebR=4lPP-#otw?YtS^-YG zB3i9Ta3)#-PPZaj9f?06TE0!UAzB@YKOkDZO}`;py@@*^S}sk$AX>eNJ0V&wjj`-7SR&4h?amwv;-`orC<>)L5pY!SVT*} zB3cR-(Gs+XmVmVrt?dgBe{%2YhaGt7&F^l1@Qz~--6Z@R^Q+v|nJv;M?&_pO^9x)s zoGtPvE*R!U(;Gx6kj7Vt)+n*$Bhgx- zwDS(p60nGtfJL+ftbu3=T0~30B3c3#(GswTmV!mJ1TCT^U=b|=i)blWL`%>jS^^f) z60r7ev@TwLckA#<>gD4nzJK+d*Ef!=DQr9Ma#u${)L5pY!SVT*}B3cR-(Gs+XmViaH1T3PZU=b}ri)aa0L`%RTS_&4?610ex zfJL+fETW}g5iLQBXbD(EOTgO8(K>zgv!e%BRv(``|HGr7UOIGEar^mQE++|QTuu_O z_U`2*1!;Vj8`-7SR&4h?amwv;-`orC<>)L5pY!SVT*}B3cR-(Gs+XmViaH1T3PZVD0H> zZJxRE_Wohme){C8AK!as>z2n9xohuoIY|)Xa*}|xmoFzNOh3QNjY`3cXbD(EOTZ#p z3Kr24w1}2~MYIGgqNQLFEkTQD30Oo+z#>`-7SR&4h?amwv;-`orC<>)L5pY!SVT*} zB3cR-(Gs+XmViaH1T3PZVD0f}tsi^-iG9Oxar5pEUi|#S3;XU=>{dVS@Bg9p1IoAg zL$jqu^1P?->LfPZnhS;&uU> ze06#4HtRF>W?WA4WUTq`&Rf%Evps%SXBpD?78eXhi3K0|9$tVny~6kKM3Fxbt=wpO zgJ?|@`2*3)jpjFq)@+eJ5v`PHet~Gs7U>hwN{NaWh*p7-Jrb=YL}T4!0&` z<(ivWrAG41&8!5dTyrz4)JUFp#m%hbbF9{U!WBnypwbCYxu(*kM)FLh6QFWUv`UTa znP_D~#hz#t7}+Dy%7ltN(JC;~N1~My%`XtG*&=-+S}D=|0@0c+@+YE|8%=Kzt%)Lk zAX>T6^ajzIC>A^rtp!NqD@1FQSn!c(EkGJyAzGuvl8;1diPFwHL`%RTS^}08t*uwy zcy{$utna_;+k0NP@67hii+?^YCoQHL1}-NFSX@pLu((kvVDUd3rC<>)L5pY!SVT*} zB3cR-(Gs+XmViaH1T3PZU=b}ri)aa0L`%RTS_&4?610exfJL+fETW}g5iLQBXbD(E zOTZ#p3Kr24w1}2~MYOJAZC`l!lY3Vm54`l|ceg)y$FYZQT1d!UorO@>UgNHgfMwm) zS^UGfZ?~9g82BC*u=pMpu=pMpum++fXb~*|i)aa0L`%RTS_&4?610exfJL+fETW}g z5iLQBXbD(EOTZ#p3Kr24w1}2~MYIGgqNQLFEkTQD30Oo+z#>`-)-H`#7SU3$h?by5v;-`oC14RP1&e42T0~30B3c3#(NeI8mY_wn1T3N@U=b|^i)aa2 zL`%RTS^^f)8nI4a{p{$$)#2lF=YM$g(@TfWj*I9cx3MCfX2)%;1|&T4$D%`?dbx{k zW7VIScj6QFWQ;kLHc-3~mG)$;IhD3loEgz_V5~9GvQ(TI(Q;tSG0`$v>><&zUCb%b zGFj{)(Xw6aDbccB%qh__S?nRvvR%w6(K1<_A<=SRtTEBDRGb;na$u}6(Xv#$8PW1& zj5*OVP`nY*@??xT(K1ln5z%sK`h_2@%`;ct-oN@E#(4VVsUP2aX6u&6`XAg*xSZ6A z1ZOTM1vuU6cjvp&;sF;77gG%b-@^hH-@^hH7Yr3Fq9te%Edh&Y30Oo+!6I6M7SR&0 zh?amwv=l6&C1?>X0gGq}SVT*~B3gnL(GswTmViaH6fB}8Xb~*|i)aa0L`%UUT7uTD zi`M$F=bzX&3>P=={@}&WKfJK-&J2UQI+;+p=dMnvkv#i%b&?;rT65nnH=5qydw8N) z@WA)*0;KU3zK2JN1s{pl0;KU3qBTk^`AD>uDDAvMv;-`oC14RP0gL}xP6dl-30g!; zz#>`#7SU3$h?by5v;-`oC14RP1&e42T0~30B3c3#(NeI8mY_wn1gu>ct=Bdlcz8(*w`?B;>TzkceQ=dKR7dOy)mbN+;3$Wt%5ja7SuCvIbfJoR!H-o~mw z!S=)_+=@hJDjn%GJ1X6PL`Nze=`=f{)qsRYq80MgOQKbKgeRgE^3+SBReQuIq80m8 zTcTBQ#0R1k`&3(^RdFN-qLl!ZYob+ZB+o=E0V>x-tJKJziB=|5?1@%^kv$TvOsLos ztpX!`Bw8ua`~uOMEz&2Vl@iS_)U%?xbMvN&GQfLp17;ik@y4d z>i9O@CcCTC{eWV?eY*l9edK#MC7NIO#lDCC)u;aHKOffa-#+ok@h9hh{7+AOc{d>8 zkuUF%r(XVIU*7dUcHm380SS+MNryc3k}v7nBRuhMT_I1sBwDpcd?H%0Pqigl6-Rs^ zTCq>HC0Z3nav)j>P`M^rrAG2hv=X3lO|(jl?3rj~LdBkF6&Tqg(aMC1J<%#K(nq3| z63s6Vt=S@dB3dcY`~uOMEykzq3lD#C@9J>ir8mF3{lPnqJ#^E!={|CArz7zP+}rVO zx=ns>r~4uEfV+3IMf$||a7r}4!1wTMkv{P~oDvl;5Um0udn8(!P_ZXk1xEHrv@)S` zPqa#nS|*D#Bw7xPH6~h?iZdfx4vaM>T9%48BU+w}F(+CEiZ>!!o{TXkS_X6wS{+F+AX+BQ>8qa|J-FI@ zeD3@YkA8aT&{^}Xf!kQNi#g>smdWA_xsBz(SmRxL8_W7+ug&;`JsD$8r41BsM5R3$ zYfhyt6=z1Y92jd%v@8{8MzkClb4;{M7JEpvY!`D%v`iLzNVIGhdrGuy7jsIqOcr}c zv}_l1O0-NCXGpXh7;8+lEEQ))v>X^~OtdT&Z$`8{8Dmbg3>0rfv^*JOPP7aZcSN*Y zntrh~TAOFCyuE)Iwx2$E>c{t<*}CPi{-+uSE+<(k&Wy`R4vabGa+1kn54llkyO>j= zWwO{qqGh|-Q=(yD<-k~DqGhRgGot0m z7;~a!pm-yq<;fUxqGh1CBckQf^b4ZZo46CA<1B z%eUz^M5`n52Sm%a={H2HH*qIK%cbcTM5{M(C%ZmcuWdZ={@O5{UVrP$kFI=mdF{5A zg3C#*NOb0MQl!)DxSZ60L`QB^Mmo)oXf+_=k!XcH^^$1S9^r{-g*^3=Xw@F^iD<<> z)s|>g9Pxo@#Xi-RXjL4^foLT_<(g=f8p$)!N`T5W(JD2vXQGt}6?>vpU}TR(D-$aA zM619^ABk2Ct3wY`be}=qWJ})HCv=lL@OnlUr?j9 z^~xL1u733T{=2@t=Y{*uY~MWpRI4X0CsiErfy+s;PqpQ8QpFJ;xKSDV)LWuedxR&V z74pTuwtH^006!8?vUbkn$5edMlAfss9OS0@uH_Uc`o;)f0c z_w6c<_`vsY>{D&|94p z)$HQscef6&CNCd9@%^jsyuNW{ZCZ0tX`;vb~fX`)#0!0pNfNaHI+Ym`{< zk!USI8ebt=qr{SrL~DuC&O1a)z#>`#7SR&0_*?K4ETSc75iJ3WXbD(EOTi*qf)>#d zu!xp`MYI$wq9te%Edh&Y30Oo+!6I6M7SR&0h?amww3c9s?2%|?LdBkF6&Tqg(aMC%J<%#Pl4qio0F`T^Rca*9L@NQR)dVxM|Tv}%v=M6^PldP%fukMKmaLY`(xv>K4;NVFoIW=FIdkmyLXBAs?ev|5qi zOtb==avp zV5E;kD9U`b4x+qWJ})HCyCQL@PI%-XK~NMgBmva--=DqBT)0cpzE}kj7Vt z)+n*yBhgxbG`>Q#Mu{aKiPjROop*?qfJL+fETSb~4Ma=OB3c3#(GswTmViaH6fB}8 zXc4W&Sg&n7@c!B`oL+zH%a5*nb$RW!)!z&8H~whx5AR;&u1-hd54fx2+jJW)7={nGr1q#vBtZlf@nqE!)MM5-pR(9uh6v#hwx^+r^v`EtAC_5-r=soDwaQ z#TgPU2gVu`Elb6j5iJMC8t=JiT{^V!#R_8cz~f&(_04lvhg-RvWPM$DGcG52GRBW6ik+ZmBpkqUFF?W1?lLI5VQ!yIY4>FT8yG#P_eh^ZLe-)!z(p zo&Vu-k^sl$BmryBT~6W?7QQ+Lu8;~^R9e8I(gGII60nGtf~s_7SR&0h?asyv;-}pC14RP0qdt| zoxb|n(SxfW^zpg#KRo*Br9)?xpP+n~yE>&t^2}YG1gKhb!LZ_p4_q*eeX1?dsyN~U z(TaWQEzzny!V}R7dFmz6sy)IJ(F%E*CDCd?q9f6YbebK}YCxhR(Ta519noqPY+n(eiEj4bkdN+zHWgY5E1x>P_4U(Q;{w z1<^84ywM(w*5;WjZ|@(5?Wa$k`tiMIwr+XM^NHu&)rozoEq8S)j^x0f^iP1wH5Uv^ zjpUhVB|znxXq6h-GttU~iapUPFtSIYl?fGlqE%p|k3=ganqMGVvqkzuv{It^1)?=u z~s_7SR&0h?asyv;-}pC14RP0gGrU zSVT+EB3c3#(GswTmV!mJ1g$w*uWdZ={@O5{UVrP$kFI=mdF?jgXW4m`i%N#StHfR_s%6iB|0q zo`_b+Q!j~D?Gc`cR>;#ViBHC0Z3nav)j>P`M^rrAG2hv=X3lO|(jl z?3rj~LdBkF6&Tqg(aMC1J<%#K(nq3|63s6Vt=S@dB3dcY`~uOME%K+|ezdk;dE?pD z=efTBu5a&o;l4B5H|L*p^#+%dDvtQT<)qlB+V1JgNtMKc12-xcAWg3jt%)LkAX>T6 z^ajzIDDnrQl^e}(5UtrFeIi;Z(fk6@nk~{NqLmUAFA%K)BYPxTnNYDOS_MY-NVGDc za!<5MjpUhVB|znxXq6htGto+bsx{H7IN}4*ihZgr(W*G&1JR0o>MhZ#J;D>w3VG@! z(W*Vd(_W9(_JxN(xpyUW;H5XeyZymCjy-f!$ki^njn#?-XKrH!IPHqtSglBO{_Wq! ziatAI$0ux{cq1z9$ry7gZJ>A~D(%TwbE0LbI5VQj=WwO{qqGh|-Q=(yD<-k~DqGhRg zGot0m7;~a!pm-yq<;fUxqGh1Cqc&O>FTcBWc=h(n$4`9!>N~G*99eUJXusfcQg7l; zxSZtD^ozZIIjI+kCfulubebK}YCxhR(Ta4M9noq)!XwcNdFmz6sy)IJ(F%F$CDE!q z;uFz|eX1?dsyN~U(TaVlEzzntk^|97fXX$|Dm9X4qLl!ZYob+ZWY0t^6DsyZtH8(} ziB=|5?1@%^kv__^t>N_5&yF5kNqv0o{11!9f{)zQS%5UX(%#jX77HG@Z?^zxe1-4fQDVVIzK0heO|S4hJW=EiL@PI% z-XK~NMgBmva-;bTqBUEjPedyvnqMGVvqkzuv{Itt1)^18WRFBE6DsyZtH8(}iB=|5 z?uk~ZkvtQv1gKmStx_X-CRzzlwI*5>M|>b!u}`%nS`|lpAX>3cy(L<;M|dJyAy2*B zHPPBUbLH**!?6AI$x}bR_srHUkJX>2f8uhIZ_{nKoYav711={8IPHoXm90o{CRzbb zyCPbxNOUGzkxsKCS`A2aBwCS9vm;s!NO&Y#Ay2&|TD3=bB3dC&y(C(-M|>h$u}`%n zS`|lpAX>3cwIx~=M{*!q2~fEvTBSzvOtcc9a!s^KjqI6dWkSWCXcZXQBhku)iapUP zFw#e&l@iS_5Urmq){i~^#J*v;xOw*nFMj^vg?)FE6Vh5G|LcUl6U{#Geo?-=^CTt&YSW5G~)P z+YqgeBp48_0H<9MtyUyB6RiNJT@kHTBsvqVNT=Bmtp+4I60Jz5*%7SzWkf&Y} zt=c0z5v`D?UJ|X^BR&zW*r(bOt%@T)c+q-op%K$Km4o1+Wp%n9y$JG@k?9w z$nW+tr=Q-s5q`IYU+&MZ@;fY$@jEPF@h>3)*1*4n2wFr-z#>`#7SR&0h?asyv;-}p zC1CBcXk9wA@x|)1-#qa6*H3-(+|}XM1r*%dS%5UY!o8hQV!=o6-p+y_I&k-HiPHEE z-@~KCf{%O;FF+b!;d^+LSn!c(EkK%HAzBkf{y?;Hqv;KzHBsabL@PI%-ymAEMfyav zQlj|)U%?xbMvN&Gn~ioVY@2 zpm-y0V|g;hoZDCiiZ|M2x3RoWbDr}F&ldR;mClW(H>mVPkv~xB+-Q1(XiXFg9*EWg zr12G^HA*b_NVFCpjjs@`QDVtQqP0Y6=N+OYU=b|=i)aa0{Eut#{jPtm>1q4t&CviZi3q4vaOX(w2%dqtXtHIVM^ri#;S-wu?C> zS|*Dx#28uT#TAqwCCt3!IJ0e;xO}`*oy@@*^S}sk$AX>eNKZ&Dt@$$P{hgU!N<>M#5 zfAyW$H;%0NKhtW%U7e7pUUFBb_6SemU7hepu9p09tW2ob^F3T(WRH9gXF|oE@8JR? zeI!~b(fk6@nk~{NqLmWOFA%NSB7Y)UxzY3n(V8go2cnf5O>YpbiDJP6(OQ5szCyG{ zi3J~t)&ivQ6{0mtEcr;ZmMHDKL$m}eq9tGvEdh)Fk9Y-(XbDz2oiPkm>^Z7koW+i)AJBMAoFdk=8h z)vmma)qaxYnNPUXNS>*50#vT4bg7X&Q|U~o*b}V+BYPxTnNYDOS_MY>NVHO-`30gi zTcl4!D9U{zSBLqv;KzHBsabL@PI%-XK~N#exT-wE$^+g=mcu3qBI91xVv7 zL~E2-@{wpQQQCQjXbD(EOTZ#p0v3OHg@UytTIC4shBPm2O3XGtmlg+7;1i zMWQp&igcPC(P}`VBhiXLt;tJ>nD5ihZgr(W*G& z1JR0osx8r~IFbX=N`T5W(JD2PXQGt=m209^YGltuD-$aAM61Ba9+ySywT%bfUmJ$g z>u-Je(Uq?*uia+hA1H~QjJ?zr-3%-YY z6L&(iT$+ACw0aYNLbQCFZbP&>5`RFne4B1Vv^tVtK(qp!c15&Wk>E_U0-Sb5v|5qq zOtd1MW=FIdkmyLXBAsSOv>K4`NVGzpdP%fukMKmaLY{g_v}%v|M6_a`YD=^#j`%>d zVxMZetD|-4(8d?5&vx^`<6l4Z&2v|WTdPlfJaAWMlvwbQyE+Sy##ffz)fwNeKXTu$ z_J~h>563>$mha(;BR=pw9Q#yTqE&Gu2cne#m209^Y9!A@D*-CkM61-uo{3f_RP2dX zfss8DtxTxc6RiRxeI!~b(fk6@nk~{NqLmWOFA%NSB7Y)UxzY3n(V8go2cnf5O>Ypb ziDJP6(OQ5szCyG{i3J~t)&ivQ6{0mtEcv**qqX(Q8_%v3*7x7_?L9BtcV_!$gXCAg zjdk?ioBoes?f&f(j~su}`1NI@U44zk`sDe9EfsI}tABZWzp!A=?{I+At@s`8Nc;iU zNqw7c!*$Y*#2*kX-=^OXt=`0)5G|LcUl6U{#GMc=m&RBSEd#|H5iL)~m=i4n#TyYV zPsW-PElb6j5iJMC8WSx`#hDQ;2gV!|EtAC_5-r=soDwaQ#U2tZ+r^#|E!)MM5-pR( z9uh6v#hel?ljW!3_JxN(xp(z+;H5XeyZymCjy-f!_bxZHIud`t%`D%h+i)|hBMAn( z`(~E$iSr2?DBg%ldosqHN*gHNh)R1h)|_ZrD$a~(IWX3kXjv-GjA%J9=9p-iEcTFS z*)HakXqhbbkZ9R1_LOMZF6NYInJo5@XxT32lxUeO&X8z1FxHr8St`zqXgM&}m}prl z-i&B@GRB-}87SU}Xn8WmoM;&+?nsH&#mnz*9bWxRFCRbg{j2Z1zHwx&`DwYF)PO`s zE+<7g&5p}S4M=$8MrFuTFNs#|5uS)v$Wt$gR_zg=h*s=VZHZRJ5g&+F>{D%tR>hGV zh*knru8CHukvtQv1gKmStx_X>h*oYiy+O1liUkiuD?~bd^|PY~S8sfL?)(pretPN9 z+4^nn>eL?LiMu)>PqS3+>NG!e;J#fe5}f%S4shBP-@~m)bmn_F(rI=?s{x6QL@Uy1 zc0{WI36Df8x- ztJFxIiBGuy5+Hwh095$M)J(%qy(s1b2+Kvh!5PTjD4yt(W*G&1JR0o>MhZ# zJ;D>w3VG@!(W*Vd6VVEJnkCU{K%yhjigcPC(P}`VBhiX<+8xnqMS?TY3UJyL(P~A4 zGtmlgx)sstNc;iO@@={e(dtP20nzep`VG*50#vQ3bj1-LsC4X8ZHZRJ5g&+F>{D-vR_zg+Fq|@$*Rx1*miB^Equ83AE5}b)vfYYsrR!8Cwh?Z~DZHQJ!;tz!#g60ioMC1?>X0gGq}SVT*}B3cR-(Gs+XmViaH1T3PZU=b}ri)aa0L`%RT zS_&4?610exfJL+fETW}g5iLQBXbD(EOTZ#p3Kr24w20OstV@SBzF57udEoJ{pZeyx ztHZ53KZ?6Li}-$iiMu)i*7e-g8Gm~2+l>-SKJq=hL}}+8zJ~=YzJ~=Yq9tGrL`%>j zS^^f)60nGtfJL+vETSc75iJ3WXbD(EOTi*qf)>#du!xp`MYI$wq9te%Edh&Y30Oo+ z!6I6M7SR&0h?ao0r=qp>${WwFY}WVR_3b?`+;?XC=AWSWQ^La63zw4wEG{PrSlp-- zu((mFU=b}ri)aa0L`%RTS_&4?610exfJL+fETW}g5iLQBXbD(EOTZ#p3Kr24w1}2~ zMYIGgqNQLFEkTQD30Oo+z#>`-7SR&4h?amww3cFRUwHVFdsiwY8uQ=DmWnsy|M$a_ zG3G?eK=DRI%abwYM9V;NM?}k|=@&$+H*qIK%cbcTM5{OPCq&D)={9>TS{E`-7SR&4h?amwv;-`orC<>)L5pY!SVT*}B3cR- z(Gs+XmViaH1T3PZU=b}ri)aa0L`%RTS_&4?610exfJL+ftiM>a{^qggpV&7H7dP+z z;Kk2Bys+=iv)6wIg1@^&_-*6wE)lT!3zrlu{=y|ei)aa0L`%RTS_&4?610exfJL+f zETW}g5iLQBXbD(EOTZ#p3Kr24w1}2~MYIGgqNQLFEkTQD30Oo+z#>`-7SR&4h?amw zv;-`orC{yJXk9wA@x=;h^T6X@KlROXSBF~_vGQI1f|gPvd*&}_$%Kmi9{USg_}`}~ ze(&=~{u-^^XnKQ6PZSFtsPqD)@f9jPN-X$Dv=$(ZuMn+KV#!CMwM1#>9ik;*5iJ3W zXbD*S&$TF6L`%>jS^^f)60nGtfB+>CNwMfAEfD4_)8?AO?T_O!#`?u8x4k z1w#Rg3x)y~7Yr3Fq9te%Edh&Y30Oo+!6I6M7SR&0h?amwv=l6&C1?>X0gGq}SVT*~ zB3gnL(GswTmViaH6fB}8Xb~*|i)aa0L`%UUT7nkQ60r7ev@TwLckA#<>gD4nzJK+d z*Ef!=32fu5Tp=AL7JTG3)&ivQ6>ejV5(_^5mTzM%`1JZ$_=I~Ce?p~wn{Gp;I}(3D zrG1-jL$o@QU_i72oOVUDT9M#Pv;v%VMYLLx=uEUCon}Y08j$Env?85mN3dVxM|Tv}%v=M6^Pl zdP%fukMKmaLY`*1r=zub=E~dqhhh8alc#=s@0qPz9&2#YBX@ODqWJ~x>dY4DQ*l=( zeYbmo`*t0PKj3@Vx9K)~4|gQ~fbU`7rr!{)-o%{{EtjTW5Ut+Coe(XT##j(71H~H= zEl={nGr1q#vBtZlf@nqE!)MM5-pR(9uh6v z#hwx^+r^v`EtAC_5-r=soDwaQ#TgPU2gVxjf@rNDd;W=i!*Fr)?hju4{KE_T?zBEn zf5zn`-=^DeIjJKF27CH)Qs82)xKU}c*h8XayO>j=WwJO!qUFF?W1?lLI5VQGVxD%q~()0_W)tmSeqUGCk8=}>b z_yeNl+jJYE)sX}Pq7~q@E27nk1ZScZ;Iu2E)rv&t-+#2u9{z9s_hHyLa^jKWPujl{ zzPxQ0b9%#kdHnBQSb#hdXZ?a3H(zK0DIZ$z{_8Dmbg3>0@nv|O5gL9}`k zcS5vWntnmFdJ}&_w0xUxL$o>)e?YW+n{Gq2I+9>Ov;v%VMYLLx;7qgvoOVUDT9N2X zv?85mN3S$ EfAIS|ZU6uP literal 0 HcmV?d00001 diff --git a/modules/files/src/test/resources/letter-en.txt b/modules/files/src/test/resources/letter-en.txt index 79bcca36..b7051bc4 100644 --- a/modules/files/src/test/resources/letter-en.txt +++ b/modules/files/src/test/resources/letter-en.txt @@ -2,18 +2,18 @@ Derek Jeter 123 Elm Ave. -Treesville, ON MI1N 2P3 +Treesville, ON M1N 2P3 November 7, 2016 Derek Jeter, 123 Elm Ave., Treesville, ON M1N 2P3, November 7, 2016 -Mr. M. Leat +Mr. M. Leaf Chief of Syrup Production Old Sticky Pancake Company 456 Maple Lane -Forest, ON 7TW8 9Y0 +Forest, ON 7W8 9Y0 Hemptown, September 3, 2019 Dear Mr. Leaf, diff --git a/modules/extract/src/test/resources/logback.xml b/modules/files/src/test/resources/logback-test.xml similarity index 71% rename from modules/extract/src/test/resources/logback.xml rename to modules/files/src/test/resources/logback-test.xml index 5b0b6a44..fdc4bdf7 100644 --- a/modules/extract/src/test/resources/logback.xml +++ b/modules/files/src/test/resources/logback-test.xml @@ -3,12 +3,12 @@ true - [%thread] %highlight(%-5level) %cyan(%logger{15}) - %msg %n + %highlight(%-5level) %cyan(%logger{15}) - %msg %n - + diff --git a/modules/files/src/test/scala/docspell/files/ImageSizeTest.scala b/modules/files/src/test/scala/docspell/files/ImageSizeTest.scala new file mode 100644 index 00000000..ac3bce6b --- /dev/null +++ b/modules/files/src/test/scala/docspell/files/ImageSizeTest.scala @@ -0,0 +1,46 @@ +package docspell.files + +import cats.implicits._ +import cats.effect.{Blocker, IO} +import minitest.SimpleTestSuite + +import scala.concurrent.ExecutionContext +import scala.util.Using + +object ImageSizeTest extends SimpleTestSuite { + val blocker = Blocker.liftExecutionContext(ExecutionContext.global) + implicit val CS = IO.contextShift(ExecutionContext.global) + + //tiff files are not supported on the jdk by default + //requires an external library + val files = List( + ExampleFiles.camera_letter_en_jpg -> Dimension(1695, 2378), + ExampleFiles.camera_letter_en_png -> Dimension(1695, 2378), +// ExampleFiles.camera_letter_en_tiff -> Dimension(1695, 2378), + ExampleFiles.scanner_jfif_jpg -> Dimension(2480, 3514), + ExampleFiles.bombs_20K_gray_jpeg -> Dimension(20000, 20000), + ExampleFiles.bombs_20K_gray_png -> Dimension(20000, 20000), + ExampleFiles.bombs_20K_rgb_jpeg -> Dimension(20000, 20000), + ExampleFiles.bombs_20K_rgb_png -> Dimension(20000, 20000) + ) + + test("get sizes from input-stream") { + files.foreach { + case (uri, expect) => + val url = uri.toJavaUrl.fold(sys.error, identity) + Using.resource(url.openStream()) { in => + val dim = ImageSize.get(in) + assertEquals(dim, expect.some) + } + } + } + + test("get sizes from stream") { + files.foreach { + case (uri, expect) => + val stream = uri.readURL[IO](8192, blocker) + val dim = ImageSize.get(stream).unsafeRunSync() + assertEquals(dim, expect.some) + } + } +} diff --git a/modules/extract/src/test/scala/docspell/extract/TestFiles.scala b/modules/files/src/test/scala/docspell/files/TestFiles.scala similarity index 72% rename from modules/extract/src/test/scala/docspell/extract/TestFiles.scala rename to modules/files/src/test/scala/docspell/files/TestFiles.scala index 9c5637e3..1ee01c9a 100644 --- a/modules/extract/src/test/scala/docspell/extract/TestFiles.scala +++ b/modules/files/src/test/scala/docspell/files/TestFiles.scala @@ -1,8 +1,7 @@ -package docspell.extract +package docspell.files -import fs2.Stream import cats.effect.{Blocker, IO} -import docspell.files._ +import fs2.Stream import scala.concurrent.ExecutionContext @@ -12,19 +11,19 @@ object TestFiles { val letterSourceDE: Stream[IO, Byte] = ExampleFiles.letter_de_pdf - .readURL[IO](16 * 1024, blocker) + .readURL[IO](8 * 1024, blocker) val letterSourceEN: Stream[IO, Byte] = ExampleFiles.letter_en_pdf - .readURL[IO](16 * 1024, blocker) + .readURL[IO](8 * 1024, blocker) lazy val letterDEText = ExampleFiles.letter_de_txt - .readText[IO](16 * 1024, blocker) + .readText[IO](8 * 1024, blocker) .unsafeRunSync lazy val letterENText = ExampleFiles.letter_en_txt - .readText[IO](16 * 1024, blocker) + .readText[IO](8 * 1024, blocker) .unsafeRunSync } diff --git a/modules/microsite/docs/dev/adr.md b/modules/microsite/docs/dev/adr.md index 43840acb..285571da 100644 --- a/modules/microsite/docs/dev/adr.md +++ b/modules/microsite/docs/dev/adr.md @@ -11,3 +11,8 @@ title: ADRs - [0004 ISO8601 vs Unix](adr/0004_iso8601vsEpoch) - [0005 Job Executor](adr/0005_job-executor) - [0006 More File Types](adr/0006_more-file-types) + - [0007 Convert HTML](adr/0007_convert_html_files) + - [0008 Convert Text](adr/0008_convert_plain_text) + - [0009 Convert Office Files](adr/0009_convert_office_docs) + - [0010 Convert Image Files](adr/0010_convert_image_files) + - [0011 Extract Text](adr/0011_extract_text) diff --git a/modules/microsite/docs/dev/adr/0006_more-file-types.md b/modules/microsite/docs/dev/adr/0006_more-file-types.md index 6c433051..08a7104b 100644 --- a/modules/microsite/docs/dev/adr/0006_more-file-types.md +++ b/modules/microsite/docs/dev/adr/0006_more-file-types.md @@ -112,7 +112,7 @@ If conversion is not supported for the input file, it is skipped. If conversion fails, the error is propagated to let the retry mechanism take care. -### What types? +#### What types? Which file types should be supported? At a first step, all major office documents, common images, plain text (i.e. markdown) and html @@ -123,6 +123,12 @@ There is always the preference to use jvm internal libraries in order to be more platform independent and to reduce external dependencies. But this is not always possible (like doing OCR). +

+ +
+ +#### Conversion + - Office documents (`doc`, `docx`, `xls`, `xlsx`, `odt`, `ods`): unoconv (see [ADR 9](0009_convert_office_docs)) - HTML (`html`): wkhtmltopdf (see [ADR 7](0007_convert_html_files)) @@ -130,9 +136,19 @@ But this is not always possible (like doing OCR). - Images (`jpg`, `png`, `tif`): Tesseract (see [ADR 10](0010_convert_image_files)) +#### Text Extraction + +- Office documents (`doc`, `docx`, `xls`, `xlsx`): Apache Poi +- Office documends (`odt`, `ods`): Apache Tika (including the sources) +- HTML: not supported, extract text from converted PDF +- Images (`jpg`, `png`, `tif`): Tesseract +- Text/Markdown: n.a. +- PDF: Apache PDFBox or Tesseract + ## Links * [Convert HTML Files](0007_convert_html_files) * [Convert Plain Text](0008_convert_plain_text) * [Convert Office Documents](0009_convert_office_docs) * [Convert Image Files](0010_convert_image_files) +* [Extract Text from Files](0011_extract_text) diff --git a/modules/microsite/docs/dev/adr/0011_extract_text.md b/modules/microsite/docs/dev/adr/0011_extract_text.md new file mode 100644 index 00000000..c90736b6 --- /dev/null +++ b/modules/microsite/docs/dev/adr/0011_extract_text.md @@ -0,0 +1,77 @@ +--- +layout: docs +title: Extract Text from Files +--- + +# Extract Text from Files + +## Context and Problem Statement + +With support for more file types there must be a way to extract text +from all of them. It is better to extract text from the source files, +in contrast to extracting the text from the converted pdf file. + +There are multiple options and multiple file types. Again, most +priority is to use a java/scala library to reduce external +dependencies. + +## Considered Options + +### MS Office Documents + +There is only one library I know: [Apache +POI](https://poi.apache.org/). It supports `doc(x)` and `xls(x)`. +However, it doesn't support open-document format (odt and ods). + +### OpenDocument Format + +There are two libraries: + +- [Apache Tika Parser](https://tika.apache.org/) +- [ODFToolkit](https://github.com/tdf/odftoolkit) + +*Tika:* The tika-parsers package contains an opendocument parser for +extracting text. But it has a huge dependency tree, since it is a +super-package containing a parser for almost every common file type. + +*ODF Toolkit:* This depends on [Apache Jena](https://jena.apache.org) +and also pulls in quite some dependencies (while not as much as +tika-parser). It is not too bad, since it is a library for +manipulating opendocument files. But all I need is to only extract +text. I created tests that extracted text from my odt/ods files. It +worked at first sight, but running the tests in a loop resulted in +strange nullpointer exceptions (it only worked the first run). + +### Richtext + +Richtext is supported by the jdk (using `RichtextEditorKit` from +swing). + +### PDF + +For "image" pdf files, tesseract is used. For "text" PDF files, the +library [Apache PDFBox](https://pdfbox.apache.org) can be used. + +There also is [iText](https://github.com/itext/itext7) with a AGPL +license. + +### Images + +For images and "image" PDF files, there is already tesseract in place. + +### HTML + +HTML must be converted into a PDF file before text can be extracted. + +### Text/Markdown + +These files can be used as-is, obviously. + + +## Decision Outcome + +- MS Office files: POI library +- Open Document files: Tika, but integrating the few source files that + make up the open document parser. Due to its huge dependency tree, + the library is not added. +- PDF: Apache PDFBox. I know this library better than itext. diff --git a/modules/microsite/docs/dev/adr/img/process-files.png b/modules/microsite/docs/dev/adr/img/process-files.png new file mode 100644 index 0000000000000000000000000000000000000000..455b1a137388047463cf19c6ca9710eb9fad30f6 GIT binary patch literal 50465 zcmbTebzGEP7dEQ2(x7w;(jp*TQUZc>r?hkm2m^?;v~+iuG)PHzcjthVz#uW?+4#KA z`+n!I?{^M=;K&U3z4zK{UF%xcS~tOp@=}_1tXnmIp^GPN~zG;}sKp)&eJ^}*TMo}Z1)-rCUC`J;{X3u8MQ z+?RZ0;0n(yR5hLd{rQ83;4*G0{wjJ_3$Jk?BBS1#uoy*alZHoYFQ3nbi|ZvP%(Elo z_iORbX~?*)SHn^y6mR-zTY;6^;Vluxc&{uSD7J^NIVr zz*|S6QP(FrtTy>4lKFftjjKy{zL22`qu}dWwn?^rEdtH&Fa(@HzjbXqtXs zd3WUPU6Q#nvhoDB7uBWAZiend?lAxEx;~=XakKspAGm~OoRZ5t1n|+GiAtku+bp}l zJxMP|oERCW3t3hT}-cK4M6I}5Lm@wdD8Rwn|k@hNX zvX4`@JPO)6;-=)Lm`iY>%U=ohoh(uC@IU*xmfLgtV14CHg^1#U=Yt2IAIM0Es=CeZ zr+Ulj&Lo~kPueu;Tuv)DebuOVqH#RE99EOb5msXSb)+MW<>jVN_;%&`&HV;};Fn?}aKxlvNlFeJEYumldg|)N{zvCI zJ-rlR$Hwt$>(i%Cc^_WKrlxz}ergheT)Z400#|3E1Q-6bBq%JStUNxJEoCf!-QCkO z^9lUws+}uKzz2OjJ#zU6-h~jbtb+S*gSn7(*z#FHYux4Ih@PrREMAsd|MH^~ z(H-tWNqJ*O$MJ1mOR}Zw9&+_5;xCe|2&hUcFCHDsEQvvV2ap|Wy<$eTRd49Sz-BXH zxMFBy@8Ca+)QZ1L^7Z}rxOa56J5`K~jLfWF^3y7j4ml2L&pTbFSA5B1yHGo1ju6Xf zK1@qbKj3|=71HH+FwX^_$(4;iQpaL=XAZ1WB)$VqViHyK?c4sjD)Wk^1D6e*$+Iw0 zp4z&)5jO5Ph2{43@rem2m;Q$XT*kNW-o<*C8noWF`=blE>?;1q1Z!kitDsRT5>UVBWXfK3eWb@7O=_rY16xIDxp?k?#AE|?LX*HdV`d$vV~~zqz?%di!9;1wzhBh{ZJCu_|g;M0H+R}+#r~C_aSR8 zWK<9mbr~~c7Jb)#6H~H|eAf1m$7cRX5KPpWaCoV*1SaaB#-Wzg8BP5|I*Fe$JqOs1 zT2&Z)TVqO}9hfTCklf6mv_u`_a|@dGU$v%j%U4K`%j0vKUvBp3GS5lQtzMlwaM{TV z3QJ8*EiEm5YuDf3uS!aY^&-t_ID|}UfmTyb@4%%o*8#)MRKVq@=rwaXgO-X4{<7GQ z%~swdh(Fom{2UAyEvSz3?>N&~1rk>D$B$L>-$72h!$>P|KYf|`_9|zL-s0Z*|N3!# z|F&=KVHQLB2nL;0I9c19>nU5+Qv2 z2N%a1FJD)@?Hzy2cEREz;Bi*6i}oW1#>m&1mX$H?Zc(9mMAA|u^vOb7R~-(?vs2>R zkb=`n&d$;I@jkQ;Wa^NOe@k*7b%4BVl+AI*P5{q3E96V#%Sh-eMsZM!w1Ha*ecArjn)oRK*+QU<6WQAXkuS%lVQ3|=gLW25g1-n@G3={Lj z0b^ztj__CtxLnHoZ&%3B>a9|(s;$vXaZ4@ahnID(hx$7Q4K4@THSar` z>cWbD{NSgflN1j3v@Zbb5eu_jY4N=LW6|`~*pIPqGHG^sex6D8pwDHWO^?o3P1xZ( zA}ZGE!Z9oF^I_eXLG#b)`u+52j~vwa0$L*nD=Rrt@RY}vms>vk*AsB?UT`d8UFOF< zru}!L(j_#{e&x^|0GO#{x=a)DNZ|bMTZ2{hFAbHIzoqM^f}i~j9JFBK;Z%YA?%}oa zE~Cst5a@rA^4_KYFR$*y?%pH+bDjTxul}ZpzgPMH{^~yD{#n8Q=T{*0&~6b?Qo1%S z+2Mg}$lC9W=O**m!NO(kt%Aq7x}~SCUTZOtCvcxhe`;13NdlPfi>5EvYwQ+aya&Ue zPo``_LVCX!-5wqbeB7Np0jmiz*xlU?rxaPUP1*fTa{nv_vT_DFg)CKI6YS;9y z*~`hxb0~ldBMn?!xM^w4y=6OkZT1&ogBTOi(w3{tN61|l9WIUxBYHY8fMxOWF0@M3 z7u8#i!|bltu)PyfQm*$5?)t5%{raWQg0brCR%$g_Kq6b+&;;AzU)BYSz}g$=?RC4m zIWGrUXDyIY`Jc69eO|KdVTJ$MnHYThEg{sM{nx5Sxjs}bfHn$ukC}-{dHSAI++UUf zpl8oeeMZJ-Em6F7RCT<2x^BN2UJS zPf1xtWu`=nQ%Gp2gzC`%hAFs+SexJArZZ{XmZfp%DgPV4R0Q}=wft&zmMKggK z%xVdj)qmV0aQSJlpBNJ}3Bm(Cp1oMch(lP|>wL-8ZlSicyc~PF$HCL@p5sJqEH*d+ zLj^h-P@818{oR4FJe$X=%-ue`e0h%WKDrj*fOXBG(5-d~v5n(kI1A>%i-qI`-OlKH9E|76?l1r2rPN?cp1($^w-9BJUg*|;_qw_K`t@<$ zLbJoho1m258QCq!Uj;gjj2JKOo{N0#|F@dMEL3S(4#O_sRfVflmB_n*h}0$YXC zzlHHWl=z9$&Sf<%?X4uXo)!}m!vdqH85B?^uF`p{)2nJW1?_DC#Xa!Ew0-w ze0$B~yRfD@fRyRcYNxsq;N9jfLmh$G>HHse0dJ|-%puK3K%HxJJ#0E33m)-H=T3fC zRwCqawn|EvHsXVbO6B3^vveeeiB3x!Ua@fCy7X~ImaUl<(-R!QApT$MHWs}IVVKVU z_$9J_6QqWh%!yIR{iuklcgjw6Zh{oo1KYk}ll97rw&!{8k|DRq<;BIt;t@^<`95R@JPdAS8szd}0yei_|`C zVx-VC>h<`pJr;RFXOf!wE1(@j;^N|aGv(>$F4DfRwbofm(!JeXKv9{c zK8u%WRj+GyMzaIYS2ZqQKRJEr`mM`dG&0Y_L18JeEGcP+~Dri zpcOu3=XH0r!m02VmB8(ZWK~pDw6qd`&`esb?ax(lC)?a^KqmnVe2b3ev0V%boBy43 zF$3~HI8E~k&G)__&#RCG8?SSD0OSPj0DuBy3?qH@Qtht7=l@<8L}#!#s0=`m4?(E@ z^C9qYTC6s%b|~|s`y>Ny|F=-N=QQ`^@^3eOd#& z28NP(T-^)<@bw+Q&+pR)`2DEttSp512e%}btl8Pwf8Snq{?84-r?RpkMP21pRVN@- zyM?s^mT=aIZe3VbChT!$t*%a>PbuQHxN+R;4PeTT)umC7Tub{5i;VX(VPRom7Q}k* zS0E(F%R<3A@~0QukY$CQLM7eR`sxnpHu7-d@d^QZdy901s? zvWCXC0eow&BKrTZQZ&N~D7`#iM1nvop4r*i*=+?haJCY-%>PUYUR7j5<_mQRm{vfX zO?uZPN&RzNYo6iv7h^AnFfuakwgUFK+nNPQ=C)D3YqnoQ0LK6CRU$Q&g+XehYijnq zIs;B{bOr$T?e3(s$o2V8AH=ut?@GeOZyo_h$OZ@c&uOuz!szJf?H22C`Rgnuo;R^F zGph(JSaUlF?nii@?ayV$VE6K4f>k@d0;l&s8w5Ck8|fmC+vj_->NdJy>I|ogluxyS zGB8!hgW$ew-F2$Y%*>pgo<2J}8yg$j*|FGn0t^sNDC|Eo;$c{e&qVpp;(@;f5wj80 zj{Uo)`-A&ub^kfSPON$Pw13$2fBfz5;r_?p{&l*4UGlFt{`J-Wc;o-;4R8VKe}CiN z9shbVFyXz|-+%kZ>i0MP*MaU0{NJ1ZpKtuV$KQAV&sF^!C`WnX|CeixsljcRo8Wm` zpP;cC^6A0{$8K3^74A^w>)aqHt-cf27{@eia9Bf(@;}aO^cueT%q*{b3$o=PhA`_JE(>_;X1bsRLZ~H9$4xTP@(S{8OE%|9Erbs-k zWMX3C$B!S)7?`%Mm$c;NVk$=s`eLWK?-nw=IdO-6dpj2OOllBEE9u15oR3|n`xd}-_|-a7FvuJGajZI7 z39~`DlTgPU*naST>;#>lU+=V%I5D`poVmMdy(6XW+T5hA?W~5aag`h<^x}@)j4<|2^X!VuvrlHg|Is#ytaUtz>KU z6I9TUtkbRvSeGatyG{3-flC?|F{SF3-uzc`d;g6Z=Wt%SB=3v0Km)fmf9dYjZbpOZ z61J9Dw&{W3L{4s|X~IvqU*k?C>2lXJ|Lf%W-kpyC#!|Zf%1po6H^&VTz*K7~&@@>gYQaG3W=&~kx*=z zWu@O5nMKnvsFq-WO%ry-1ekgL8=>h~nZX@vZpQ4xDSS?VRUWRi-qp3$QU;_Ib4tij8(>{GfV&1A#{AGNlcM-UTkx+N z7C`v@o(}+yX5?(;+tN>Xy4jfv>r&wh&I}DpUMG<@@;>sVeEGk#Nz(;_NgtbnFq~HY z7L@RSl7(kFB}G^B+w_aFG{42C5(#U9R?r`y=r9PIA?a64W<-J5xb7YIk1Lt{38 z97{&i(j)W}N+*iC8wOC{T?QH5uk`-zu%C$7P3OxVF!z(KA-mQam(Qh@W-XVY>Mc1r zv?7ml-t7nD(}UZ>_N+nz_5Y7Mo}QiQH#kZ*zh;Q|p3%^twN6V+oV8qpWhS4Gzp%z9 zyf%qS3Hh9QTwb^Aj$Uryd8&3=E^-GSfiYfp?fvu}j*E-a(EpTMUT){?{7Tzl7DQEO zOh4Qgla!lGz(p=BQpt{vK07((r^+iXuG7oCa<*&on*P9pi}GvBizUzg!AAWzZ{9%8 z+Pa&XnpkyerElMR!>L+uS3+1X^yl!?C4G*ggzl+8%XXT_=Ej9|Nen*Y$u1KS8QJ7k zX44e6-{oe+$SHD-y2fLUhs0m3qaq4sc1bp@TF@WuP$OEtY%XP3T@h!a3VB#Ha29T6lERyx!bnlABv z6eE?+^6^7?K5F?L;Iq8%0quue@8qY4$2xwwTi!Npjep9lPGwOUiu7|TQx__*0>`SA zMTjEpMVbSp$ww2$+V@u-*dn2Veou`BfGug@7?W1lmfDgcfGD>>SKE9q_)}0|y0tS? zQ+--84qdj-8!izDsS}ERj7GBdvO_xxJA%x$IduJbbRJ;+&y+A%T zmaz%0?v2gLA$%m!7P;=D8UnKj2@L)`ZraA zo~5hlThIN_vH~Cw*;E_aU`k1(w zpsv6l@-=XGwvZJM0Qfy$VOsgtA6ReDsMke|w`i4(L#FaW7Wye_*bC?$(-g!~_|WJF z8Uh-;?Ux|5I^$q)hbAI5VVgRg>%5DwPJ-$_9|~t@=iYLZ)M)txZRkY6{Ji6OHiUs< z6vKv?fr$z4?%~Tm;KJt_@uS_aL@9Wr?0g;0(^G<7x?r)g=X7+e{LMo`=Zo3oQOJ}5 z;wS}>dOX6^6O?$;O6_DgDAS7oYG$tUb8vit6&|byjTcvvXyw?IY@%MJnT zaEI#YZ*YvclPkabhpPo+K7an))~yZmW`&V(bv~6 zpcWAkIf)iLn>EH3xwc>J&rc0-V1$ZSe`O$8Yjnno9#+MZD{#~6Y6NcO^P2lS`r@$E zTny|{F<+1!qXFjl572Zc@7ud|DLyhNKf`rzoI&R!w-S zviYc#Tj@y11nrd>2xIBSw-;iPZ3^FC<{7BM`KQ*sh1IH_YEZ^cn?2WG1mP_E zZUrx-e_I6>^*Z+kJ^G&G0UpSn($K(HQM0yY*x7@Kh`4S6FnirXO-;?d3KaPj9LYn^ zIpU(CqAuAjEG*uRXV|=5iJKR)n@YzeYk3Sram|^Wy~}g8JT&*G7gczYB)1IL+J3wl!kG<+a^5aq*O3IRYi-qNK82PuVx|6yTLi$7p-fCl3+aB^~v z=0ru|`VrpT+~6?nj8Bh_Qti?`k4sI(jr1Z@vbKtilcep>|S9lVG48TH6rLr^S!%MSHEKAQu}gnQXiM298r`h3Kk_ zjs}I+D8vs;+*fWK&M*TU*ZjHSa9ft*ff4qU3^PVh%xi zklgozZORbi(oQfHNuwJ|fri|z|0vR(^6`sGtn@F&s|ILt*c5`lCf+gE#$Z=GAAezr zcfB3WElC0OQ*G{3h$!?+>p~%OG~mr_&&KeX)xq(Qvx5FnTQs zX0g&^f@Kt$A`^>55!1zped|^w!Pn=IR{%X^il4uY(#7K7$xe#LYxwhLLSe%It>#$! z9b#h%ELd>k18-hvj#@L%vED_H+cAk44I`o;*F(*+5Vt#Jyc7c~lgOEpllta-}s}8QR{3tN60As%m0f+%;eJaePgi4GCN~-{pZWb4<^DN47~NsKB(TDZyE#* z9)%Pr>bL855?fB+n#ON&sxd_HDlbTLaB=y+lpg;=g%ONtk84u(aX&_t3Tg88&-|ve zTiD8+kE8G(TeliDjqgLAa-DNaOl55RR{e(&*x~`f}ePZYuJ3Z z!#`9Afo)2EMd~kA$fo0Xs~ngjio)bS&CS&_sww=$E0!$%OjGNdsFzfZc4_~riWGA4 zIPg6Pu5BIzcYSpKd%qqE4~%og*;UZkv*BLA_Buk;U7AJpNV)?`c_u7s_JL~*XznbI zgCLw=SRf@OmE6wJ!{VW~h#43j?oKVNu2$8>;-QTn@K;z%Ly8K?$6%L+J9nCExgePf zPw;jB)@(Uqzx%_={x&5xT4F$r)1;&?-!9nbEz~dl(^lgA{QQp}KfJH%yo(D9fj(G& zuufoIp$UV*>`#Ccod5Vqx$E`fqVD2et_%*(C9w4JG|UG*v8L)KVfBTo>gxT7 ztq1xL6n~Z7bxk-?KWBg$pJACYeEnH2|L`oIkdTm|U`Z0v$;nBnI&-^kN($NW>->pN zaF{6y3dl4;S_ugWfbIG<1kd+LFmlL#73^^0B?IZExw*MuMaxqKTvG+EDW6bmHau%^ zy-uhqoE>YY1Omif&h5RElar*R|BzXgCd)X5rndI{?CdYc1{`U72o=!njHtl-APWIF zCnqPbsj2De>YAIID@ykI^y!nhxbMWyW{?dNrTC}rQ5O2aDWaiSfOn|^u4P(PD8acf zNliFG*TyjhCZ_0F1_Vds*UH(!0C#@BM9j-t5Q={eZ|9b{^w7|kNkP1|JtTorTg}ml*na|_jONobv5PfQJ`u|0rN^$ zXhg*M`FUMU4al?9EHdX`2;D-$E@ozDiF}cOSha~iI6VAT(?KzIXE;F78?5J>@2H{jg__QhJ}mksrT^@ zAb=GYhalz^LNK6(kQn^Cnu3J`uk~Phpx5fXAavr=mqaT9DIRE(za!oZ(#t?OV}RC{ znMu7tUnHzeRXa!Bjg0CL{;n^pA{xzf?m74$1bIUsR@=qA+eZaJH` z3eEIyy@s*{YsmvYR;(d-`ji?6bhw-wm!9k(8N}NGy}wSQUQ|H=JC+SAW&|~&=h@I_ zj(y3r1PwK{k6@C_0SzFJ(kd877;U(`bnm1AG>jkwnJ05=%S?i{`OH8;i3%uHVIitT2d0ZBm%!IGBWmXzQ$_AJ9NpGje+5N7l{a1vVr?fZdqBG>+&aa zXXnYFP|r(Hph94M{tbU>$~|qskO6yqN~HI>FDf>e^1<}(P}ln4WtV&XHh(5S)r*I2;$?J}s^O)4>n#ZVxMB@3*qMYdPEHG$NEQ%YjWPe7#>alJq?VEGwfWws4`bu~FQ%EO>D0 zPd4QZT9hf7foFi({_#c+pp0SCjO$)x#KQ#gDiN8IXcXTP6N5Q1?ln-6pLeF6m`wPK zEQ+)s`zaQGXC?qJdsqGm02C%BCI|fSt=I_)D(YN}6oy($2qD{g+S1h2sR28(JW+mF zv{W@aA77zeoY(`L8>J}C3$O$zh7$aFyJ_P9cmZ5ZQq+3}x*T%x`(4#4S!C{y>GTf?ml7A(QmFa zH#B_e2`5Lx{3vJU>3LJva>~jXgKsTrnw*xV4`6O(rF;enBdQ?@d|^=t62)s6m~hl) zWMouUj@QS$6&V>E9K^Z&^d_(cBTjiy;rJhQWBDN zq?dmEgmvIIe(r%|$T1fBjR}A|?3R|PpNEex*yH-wuV078k5gXClhQ~0+63z?4u@;$ z=!6IygLQ)m0g}315WOE-1;nO6qaGx-on*O55Bn{KIQ$Diu@}UXNDg8jDEf4#LJWXk zDF~ga>gsz&_00|s58qs!lcp-yErjj1+t#6ebc&F}WnyJzWnsbK16DptOiJ3_*@-$9 zSg5n>85vt#l)HJR+QR#AP13`o(fP1@1$aLwUd_;h%V4;yzP`SZKDj7E1cZcF)wf-A zfD-f~{Yv~T_2yM{QW8mQf|-d4K+9f7bbjtVNn{I#)hQAzhBaKA*tt2a&SQd*-0bWQ z)xP)^4EcP{lw_<}1vbF>P*G4ayn$a8WK(Tgqfn`~B$xo)j`i~L5|%5^%E~G)kLWl- ziyiR;7Bn$4Ye$Xy5sCYw>!wS9o!ziTJB)K-f8TmmgI0VYEsa98RazDFTOkrkV0RWr zc!`H3q@-1I*8cwfc}!ls%&H$U=SX5lcma*H_j#r+5liRmiSx=a*HjG3$%SSGKAhXv z($tiaCshD>-m_+Z5uCHv-04~Ukj~1^j^{V?*N_nLI{@Ov#KEM0bN#D$}NDb zAHHNES_I9Z0pl5BR^Wd}!Yi)@7s?6wZK25-omSwB=zkR~@Uz2TbVn z{JiR^k0LzFMYR3*zKg)S2t~Nj@#Ro|KWyTXR>Ku?r4K9)8T`fO$e)T*qc}7)bo%E{ z?<5Q0kPd=}4$*}AwG|(R!QMLjBZ|r!8MhKH0L3|F|Z)Ii0uXmjJKc;_4P=xp1Wj()olj8Hi z_x`Nd$>H$eW}p$GMaxC4Q1+odu4;}RgT znZJ@U8?b95laoo^q(qt03jxL1+1o2FD0n^fHRA~`;}WpYpNE--o}NN4SXHCG3%le$ z2i|$#XuJDW36Oy|{5!QQ91f2v>A~3wxC^y*bF0tEK{|cuMe@N!ELM{#T_1qhGsW=r zN3Y{>q!YmHG*niXbPFL{CbqW22QC6+SgN56s%VOQ?LmrgvGPPh`R`S)F_0Dhc913G?effNYG8D)v%R5)r+A6&)4j<0izOUatiIgD}HY z_WKw2>c+yc)%vS1i+qb#6FbH;(I#W_oOvUj#TR?pSFbRX95EAe$ zIoYbHP5#-lXZ5d-n5A-~>0doK^}YKIY^Z5&ZGAtm1xiToxrJ!j0$65%jkR^B>1R?V zbAB3NLQ_l2$MNvWj#&;)&cMLH4aT*gZJ?hlEG%T}41rP0sSzwt)AGvnQs|t;DYr1x zB6LxL&<6f*hAw%;EPkEg9d+k|_I=9oR7D7sn_I#`nlnZfAbCj?PoQSPBm+TV5gLjy zSryC;<;jH6y&rAbx97O%t_)MEKwj^?zP`@r9wSDwoR_3zeKtLK+RpBAD{JR$?i$M3 z-QBH7_0G)*$HQ+!30?|xg^;gb<94AE&=C4+-`wokD_;PRoqztkvdsc63Ml~cyhB9j za+}(rcO3u_z!&OeYFHY8%i9J-1K|J^p!2K%-ht)N1c;oFvLI%ftpaJU+tAI|a5g@_ zTU!JPmPRm(Xi5^K4<{$iST=`WdwT;X?F1rtb^Y{V9$10CLVS;6*+gl!OasL-xus=g zL-JAX=I6CL?Vl5;gkGwDd~xa~81$i3-RcWlGtQ(!Uj^h+84OR&&VCqn64z7J*G~h8 zvBs`}kB=|=4hHrDBx^#kb+BGHy|6fNMAS{MH`}^_8_1HGJXxvaj9se*1n~(^VI18> zoXu<~(NQzVmVE4fu?2xcPPSWtd$1iWqt8uEf}H7wQc|c)WoT?f6Eib`4=mPdWS}8- zX6ELm>LH*cd9nQ(8iI;*LY)Je*ce=BV0#6pmHW1!Hgez;M`_nuY|5@gT2aQpzz~FQ zMsu?{aEoHavJoUECwFie!OSpq8F5|)cHs7P+e4Px>C_4!!9T&Md;h?I(&oCOsrI+m z{X9UqcLZhLPcI2j!14mxuERpG*mr%%&B^I|!6xkP{;Xm^zJUU6u(sJ3ty62gRvoP> zeDe9%^ffZxjJ$(`1F(Fjx@vN2DsZ!p{HdxfouI_WL4F_^ej`{2VV5y5NT0I&*r0o@ z1kNGzgOwFPTum1q39pA`Pi-CFmx7QT)BDyI2;!IF1K{w_A_UvlChqSM`GET=e!e!i zKx!&^MW!c+7=jSVMqMOuodgg>wzhAYhu9TCd}i0x@dybmr8qf(krB=qz#Lwe-2g7Z z>qhH$^-4^$H};eKBUtdP62be=A(gTq>j;7&v`&-?4#&dmY(#3eP8#C;31GHjj==u- z&%mA;XIr{$0Ms64qU<^ki^u}ged>8^k}4$O1!_hIAQ}Nc*3|s)tz{40oScP~6^erk za!T*W2#*@!FVhX;;wh4frz{R_{A5^61~k6&r01icupZt4>l1j{oNGq*9I(W5Dw9W> zk4!Z*;$;ZR(F1?~{tcEMgn4m1(%UQT;!>-S@!(l*o!=WB9UUqaoT&E}wzlf(`1n9t zF*Pxf$XhdYcPCk)8?aD@H!aT+}b{$dzaLyVA^#sgX@M%cq) z=txnT2eZixU$jBYzZUhZZPYE}Rowdk7rah{zwh*q28Pzz78Di^cPaxsY_KMj_i=@A zEL)p}m6h*ZvL(R>G3b?dZ;VO`c{s=%aDu1OF)Ype#y@5rBXFF)6y62WNBcQ}=XAgk z$rOj|^Vkv91n zAc{2K6WZ2!%qnyBne<5oAC`K}Zt%45&3g$pYhZgO8cg zwTOs_dV70U-S>d&f2X1mrNik>J8IU%xOjpU*g=aw)NvlUeY?F`t;xcq`}rOD8A4CT z0$|QyoVYeCs~zf?XU_hOf^y}=zYxANNi?S!lMJIeEq*E!V&HTn2HLAxg`JsAd|o2 zfXbl#`SS-5;iR==FnF&)^-Rc`JK54*l6|?};n{6Jj41wv-dlP3IPXL)xP97ez3bt! zv^#=CQolLQ5gqWt#%;>PE>b3DW-yhntE(#qeOBMRwYB9KrN)%b*uP$hZ@<*y*$Bq$ zKwS=V0aX$^uF$jonHC#6yY!S46Lv6%KnXsNK;}5QvXY*jPMO^o1N}CHoC%Epi zIz2ww9vMj0C$%y7;k*OzXx8m)U+chijg=dirdXbkSGYy~Od|@3Ns(_0Jh@O!8NW2y zQ-PQTu}Lwkngi30HDKER7c*@fG`3$Vrgv=@%|w9+*u7&GR7ROtr9 z)K|aroVHV4e=j#Tf~oC(sjX(r*6+g6EQqprN^r|&mc{U^>W;P9YKsYQLIdEDoU7%m z{=fxqM@QKG;3C~ph%#^eI#35HJQr$hDt|s%f8aI)W(_NFDcjmi&G!S!JKju2Tquum zce3*$V7)NJY{9~ffN-7lw8p$z_a{>iuPT7~^!3SfGLLFN2$+i3sX-K)An1l;Q%Q5U z6NQS##K$KaK9<_#RHZ#~AP4-ya?|j0zw6duBA7fTJOA?aH<&O~KF-U@32JZKQ!ISz zt)xqMu?hyla^kvOuKn+1O~Z?n!ynCcxTwy|hC6-NWT_64C*a1` z4jSEG`_0LK^j+`M?al<)M2UK_b2Rs_Jg5Cxc3qaByaEVPta2ftHf1c^W0t8-*VUg7 zf8pCM%lh{UlM8?;+aughfZ5s*#aFkA{)`!u3H{Ovdis;K^q^a15QII659MIXf`IO) zTjIDhNVq&FF6z_ZFCv$fV336l)CMGA5Y{XB4T8mh*W{{sK>973w zh<;jHeJf*%WgNpq5?i`U>_b>!mpk8SgV2&MFB609ilka9-29TJ1VWY9lbk5xb|eyS zjiE*PLPj?MWy!hay!j9eiHUgp8Iv;bIN%~66$9gEl3}??7mpu5&VU4Q#_c}X$%fPq z4U9sl`S`TfgF%FB@Xo@)fE5#x=w#4K1oprsTaAfN&!}`-y_)$EkQ~7SX?Q*F6%I6> zF({D>I3NW(;*^$w;jE|?V+*KL;}V!CjN%-tH3S?wYYhOHZbmL>W^xP=?}e~9Q=E3o zO!85)+=lm3adD}w`@W6=Gd@q|^LCZhWQu-}tI-t++t3wWZX@oOcovuLX4+!~ierT= zZQ>@6tO_;xW47}Vsux9R2%o-=BocLTae-9pwp}W!I@bvwDDM|=D7*pRWo7TL4-{%IrAP zVkdxXnX8d6){bT)$9d;~Ph)Am!%~z*OlIHp^{Ed7WqS*Fnm|&Z1nke~%Ywq^38Kxd zEflVra^i%fB#w@vy-|6fwoYDp40KP^e0|77$)sbWr>&upzdQ1Ttw_ggwSQ)E(vM%| z()%k+=4rDcT#-*4J{kW;2@W`U_Hr3%4`}b8%#sQ|PkRj4qTqx;ufPQNhCMH@`Li+Y zl`Pg5e*NZXXfTAtPj+KikwJ(ikRW?7_dQkNw7h^rFY4=(lY+6Fd_MdzLmLa1K$>1* zKcak8{wY=Zsgf{+ps}!SRPywJ&O(KX(6E$$ajLA@99A*NU>{bEd>$Z;_esPdN0k0% z;9UR&C?Fudj~?mP?$34vBUblI1nD@^^Y&zQ1gD|HaK>k1qKgbBu5_>q{} zcZNq^G1zuWN^I3F1q>^IExLt~N`Sm0$m&8lES~r}=J34rPKcIxhQs4e6g(myAKk=| z7EE0{T=eu!azGEy7xQ^06;6dPD!=lj4&YG-LBSHx=Oob_1{CDw1K5q8>B|0aSeMN- z=cswmj=&X~^Wz5zW#^my2yvO2XOz$mLh2-+hP9B@uWqGTWS}2V{xknbkHZsPT1FIUmSVfJqFLjnQfuXa~QMWWd^n6l>mw=&Gy7 z{jrn440czesLLdg7-Fe${@jl$DRC3pPptNe=?GAWE2WlChgD+xc{MG)%m#*rhKe1} zWxiu`tXqE}+i@?AKV?-F{)Es);VA(; zT#9jtLQze#j6yjW6snZ>J!@BrZJIn%P8^u$t5njdvszi-`|e#$V1WY-Y4GZ;Kt^I5 z?#>5OeJJ(s&CvRJ4s8~|(-*;GA8mthrvr6fmIBJi^6+dIG_eSKFtD*vg%Rn`LV){sio8yvoB>inNr}mO8wPOXYa(MW zsrB^q*6L2d^RXwC<)}okReikpX~I*!Zs|oP16$j&mS`1F5`?g3(D(#$p`{vsC_?E} z2F2$XdHtSagFTi{i3<@LVD#?fOb!L0Wt_}NH`E83E=Ixia@t_j#Kq;TT`lAza1cbZ z5b6pq#@zgTr+xO=kw`^5@I*=q3y`Mpi0c-}-ReQ!t0~@DtiDD_S+_WUT#h5c=aX*`g1?b6D&q&_c+WtN|vTw>O%glT@O{k4{ zRoTyTcWljGp62dok?-Nw$ugjU_mIIwfxx|U&? z<9G@b*gpi!0WF@?APnWZyt)FEC=ojV@4VUE%F0Sy=NBCgG9??E{*zygJN_Df;B)ob?PqS5}Jrq8CnM4<7Rzei`~RROqs| zsZIy;_cMn@gQR{!B^>_mFL6Or0bq1ASnq{6G@*bUG)A)*uu!&N+0wnf)&$hvd<-*g zR$Ghq4RbVj@+0f0r+E%#!IuimUG9Ui1p7`*ToVUrm-AfM&gUsh@W>X5H!Co!98d(> zLo!S{HY3u(0D@wvq$@FEV25FWlVegKPVeAgs0V2<|05XA+>SF%u8#;K`dfHTUd^2K zSgQNi1k@P6QIse=rWJbWckiGIlAta^hp-)GBG}fE`UE9L`z9ON+S)qPS4+GARUZ+5 ztBaBnuD~WLy3uptl~+kXh|tt(qq~k#S63&=0b$@OxwY6N!SLcm_?1tks<3flLINOM z*M-2V*dNtun1i!LQz4Z226U!Hl597GmqHh5W5V4Qbz&Jn2`|I;s&+)=Rr=EyCh50nR^^?UxHc_79bm{B??8a zaLI$+AJN}yPGD059e+%ZKu+wnc%yUC>(L-6<$7{$EjM=*E+H?=z{G|36({uA&7U+DzG5qMb zJ1l+1@%tFx3|IJr0yVrMwW7Q{v$+<$HHLJSzM(?hpwa2+?pV@72sIO>Mm`--HfU${ zQ^A0R=mDh+m|(>D5FQpLc}xc41;q8UtrXxu+j=4lpdma+xBu+(keO^ji+TMfD9XV= z3WhRBGO91si(`c@gA|pNl=St7AsH~G`-eJ^CB8TFCj*s;gk&%=ZhhS-MgNJydlwgO zKs>)yec0OEL?Y#V&I_LHdS4!grl_VC`ypCR8XHX{8qvQH?4ADYy`I)DzSS(`tQgD1 z!^6q^pQm&NRyaZ3{pB$gP}7I_jNJg5(6WFXloBp}8=0aUs~ofg64V$|6}J0Si#mqR8wLzfTQ-$K@RKXy4EG^z?v%Gy?Tf zcgB?_SMB#@TwP-90&VE_$qj|amn10jvV|+fvZiLhxEPFvTOFd_174&Y*YaHYC^Q$$;lnX z12z5vB@S<2f(86hj66$<#MD&DrKhLxesI?;Uc|T|Gf)ULWg+$qTFG|f+kcbz4VfJq zTbnq_D!-q#Y-4&RXz+xc6^woYIpxt+qA1t!`~{U|=@1M#3e!3jsY zDg?3rYxwJqxRX;AahlcE_O_Hmu3u3R6H+|515ms%DsWGz5#r5h>hfqt#n>n0nRClw zLmnD3Vb?KAoz<(qt!uRB%*3HrNE8vMT>fWk(b`;3KaPTFFkbv8_xAx@pnli)Min&O zKD|L-?KTStT;zQi(uLQ>&&f{s8XZ5zWO*wz4|HuoH#y4bJ@^8-yNJ{OcRyjQzhO|K z#fJ~lgI!7mkX!pk&~iOgccK8cotc>djo*yR)oZ(Zt?mf{!P)64GO2!orKN#HwAlyo z50;i+%N*$T5k9Q_{76Fcb9I$!HxX&X58UOc%1#*sZ(YjMlf;PPUT$%60=G{D#f-uz zIQyie%ThZ}E6_m7&(E)}{sSI%Y9k1@q6f!f2Era}Xcl@3Dqt|bCie|#r_CtfxzwhN zbG#(?)FB@8eVAPJ`rbc@&KrMCPE0)a<74pvltUVBB0))SPxCm2`N^ZifzhMf8xz8lJ-dH zB`etEr0M~}2eN?*mj8#Tw+@S{`~E;J8tIY-kq$vbLWd3kC8S#fq)XbN8>AT!q&r1G zq?ATvOr%T75dmq5LE^6YzQ6n2`?rsrIs5Eb>r)$Gvv}c*dtR4psowM=LDe&eM)!C- zM!;7h_yraY(frt=+S-K6fAkQRz-akw%*48Lz~mVHh(mKx7I5@*z!(#9C|&>h0g8It zsK_f^JmKcQ73c?cCsQGAq>az>v{nvAp9t|uP|O`K#eee;5on+)WH0L?%>KdB6*YXv zqcMBcS;Ca_Yo-RqGzV1JzR+M)vQh8xjDaTZ8GZ*%PY=G>;;mJlq!0L`dL#696z<$9 z2i1|gZk2f7B+m@y{XgierBphGnG6ClQ3}aE^>HiC{(wbCqxf1 zhvt=$VM0G{GHLDj-^zjYUU+W{lzEK(qstM%5bHhQUTK*a8#92sm0CA9M@NOylRj!K zsMTe1zd#8<&2!&MgGP(*mvclQSwdMuLuwiUF&tGgz8h~r$@5Gbq2y7)AVnm!UHIx% z^3v^lwMYmG2)?lhwV?}GUO@yd^6u~V7oubXoWNF}u?xO_FAeizZzKJe!{SBBG+ycJ zVQ&G&1vd_u-Ip}Q#Ke5ory`|u68O*Sf+%Tj?kn=Tbehz?L~97la2l%-Rlt=FajgiR z?QX8>aa}Ck^6#NyU!~Zm;B^A$-cBy)eN>;Ny%MN}L!TW4Ns;^4C1KLE?IK=O#gfk( zFwVA$f&i2?@b3^z#p5zDhNrM>k-uo!XJ==DZTLi>=&;j+qzKwc%P%P@x&7}?qY5t@ z|Ks@np1Wo-=z z-dwnX+B{ReK`3{yK79bHuM^azyDlz5p1AJd@Z%MGLX;aHY^v~y6bw3k|7g}6?OXPe zhI9S1J2zSMl7Jg>;{}K%DlgA9d55DJ*yG)- z!+FnEN%{KNbn?!DI9dVs-n{JWqE0*w7=Lr$**#FDk@jv_O@mrZsusIt53apcrj4q(ht9-g!avje-Ku2I-Gwx z0@%pW>zW$U!U>tpw>PAv(L2D`o4`q}_@ZGE$XFd`suXU-1EDQE({BRS?f$!V&z~5; z@^PJPxE6tJwg49_==6DN_R}*mJn`ujuP`$!<@O@K{qTZ4^bBKp+E0kN2%3W&k!Zo6 zJD86x;ZG_ar=vC@7YZi|J~@~>1Ixp=xiHLPe|$cSN9SR`2q$a5sSYB%tQSfbqsOkU z!a_p#+9Q0m(e(X=P9So%BVh)dSsL)0M>?cwfNDnEJ{#*iUF+nwyY_8nW(EpD^fyVU zYOEwb5v-#ubalf54nJkxP>JA;`2)pY)~F2!)KodI__NWzV7B|W(N2!W{hT5A9lo9a zSc-Bw!Q!g=j?WapfAlhe86}76MIAkaasPx~@z)e4>HF!N%G)5UO34pi8|2CFGYnpR zkgsqqI|L$h(+*G3P(QnGVeRBTGi z&#!aSK`tfS;S4jzd;+>J7Q<689BKxWWF#Su2$lC= zwr_yIFq7`GC2s7Sc|q8~;d>aHu>*pa-xGOd^m}`I(qB@n@G-q>kMjff^PX;aw3-pw z2J((VJ5fVkiKH~>fH}!D-s9XnEp_LraXQ#hlj$8fI;pxB4;D77Q)B{Ha z8z9m9?^%cl2rAi>;8mB;9ekB180VFWRcd@@p{_iGlY%7bEAPqQFK6A4Y%76yb^4gTFoLEHMU(=??;IMe^ZCK1E-C zUI?_|n|}%goV%=D4(kVnFI81nr#ywJf_nv$I4I%Wb3U*8sCJ<0 z-n<#h5xNdYg|sl0JwN6mD9}a}$?tgWt&U!*N5JkIs)c*b99W2T(ir}lLk*mhk}rt2 z@e6PZtX0d7^7aNRLDy08(?bmmRH6m;4i2I6k;mH~eE*~qj6yqb9mJM)cB_8Yb`KEl zKR={^VD(ac2gqO>Ka(qAG%jHjV^UL6L=gzrK40?=WLD!)YN^~L)Xq7raX9ZjXxd7E zU(YEIAY|u!TjJ^K>-)1k6`$}z)1d*P9ue`ZLV4-Ok-IrNo6$90EN2}O^bzly`aMDs zi_yT+l07QJ^<^L;4t*8*wmcmDMP}9-Nx)P4^b%;}7 zWFWog!T{)0EjoklScoKp$wk=Q&n&V#SEu*pxP3aOh9sw4)l9+0~^zvl?*%8h&np^CT#6i;nqZ}6O8$X2usvN1ri zygAv(!^gJ*(IynKldjFvl*bErT-OBzNZdTKmRw--0GRu9W48TZYoQm8Tf=CD5%0gh zb({w*y_vDZlV`y1IKF3WsxK&z^XKlMw2XLUef2bwg<@r~Q&Kel2?R?)uOtKc8`7}) zfEXGeSPm7i1c^+MA|dXLEMRY5*}{HPBnMK|u{{%77Kgu2v6xJtj}wHnWKaH%jL`VJ zSKZo&Gl?zmug)uUCgDw7z3T?49;#_Y*Glp5j3kQGil1Ts6kE)9@-Fa3!_PKZQrPaI z4GkWoElfUvBDf$B;khv*<=l5VwZ`(#>M39fWI?c^oyK22yy}xCa6fdj-*>_Jfv&Fm zuhmr2=+d#>zW(^SUhmgXU4mz_DG(Uxso4=zt_8@OYWc3?eQ35hAv|8E)d8PL{p6hD z?qp0j`>pgh9HSpz!X|xnZcBOR>BHjv@SPiz&Tn8@{)rMGQ+=N&-@2`f#>p>tmsUS? zb;(&+ux=jO@M-T$+KS==Y(R?yDlpJu7(OQ${Q8x=eES~fjR zhK)#bn^MWa?*yqz;^nZ;LN%6}Bk%s(6dhkPw9fpEv>>Y@g-Hsjh> zf+YI|Gac(o1r4%^&l>fY$EK#@w(eV8?NQ7*h=jPN-TbrLoU-OqiWl@cz;Nk2*?<9? zSzlg^YYH(h8jSYf46zDqsJVb74^F9o72N`|+7&O?{jla_xkHY`b3`ROLYHjYp`-p3 zE~lJ6SRi;;bolfeZ~dxJTt06?dauj$o4Zm<+Yjg>#GW<3qGa}g@SDe9a#Gg)LYpUI zPPXM~+Y0qQsI1Ea01}<+!JBg~oqRbe9P#sKz_RteryM_Pdivr(0_ zL%N*SfW>g?{Z^#*54ym3u6agP9aX9=`5e^u>s-dE`0WOh@zn1JdreA%l93P~rgqiy z98;kW1lFXwsj3WOtz@X;#}AdYzgj)kuRiyufQzCnh7E6yA)QI26#1WE4WIT8;6F>; zc-K5a(kbvi{of9F5#%Eabso_nl0W-rC{gM*Z}ff^rjxUJgEy*nhtDPL4DEBQpYqZ^ z&sjG3^tOT`t!kVR7+)`^WX1ge{DflvZbd?)?hessMVc7;owt@O%-f}JymfYVp>$L| zj_xvH;$^BrGD$dVU-mlZvFh8gn;Xd@qm0ERnxLn_Mac4!@J4zw&e)u&h%x;Db@TJ$ zFWSz@qr6gJA2wZ$wou2u#!$1-m&%L2*Wyd?Eb_1VR9sNyrh~_z>g3{r8b2I?+6nV~ z>zYe38zO#3@U%Ei_`bzc*VXd`9e8{SZLbsw{lCx7)~@R&+Hg8ZW|h)C+b%}(zmpVT zR&}JZf9@!$9G7gK%4xO)fi0_&JKil;qm4`u5a|he2#6)Bm`ZH^eiWCh-w~_mcv%~B zql08_Z5VMSedmgFILURdZeJ<_I8*JJrtgZQ$QSzelO`cSpm*UA=Az)40%7TCQ}Eeo zd47;dz(4!V$!XcU&xboWye2xtVKw$9bT40(ECn-$dUKJoKU8%P91FxfNc^`du9Jrq z6oR+n_1B^)9U=U9vT#Za2m#X`dV>3$EK=7^;H`bJxc?fpezlP?Q!*`-#AYe{W#t5?QB0g{YM`N zD8QkOjh~<7&!pmt0<&%vcBUzVMY(h>Q-r=$ABow`sdZjAbR?aG$kKzU?Tl3+3wr_S zJwH6_i{?74Ew3uor^PmRN3O~tRVX!z?;KaXdly)Uz#4WarfuOl(Q694 z2^yog0EdURMM2zL#(C#s6@{P0>L|#tLZr}NqNN5H; zjC~&Pvbio-+$)(Q+vqlZ`(eq#?ENc2?0CFLM-2Z;VKnO$98I7iN7C)l?E5SBf)Rgt zxjiHI`0|^|JJ}`5p_}iZu?l$lbo6w>6HNLYC;QXj>CKqB%u}g*Y2+gKjSF|KgbMd= z!71F6wP|WXO2#4$TF!{)?bd1T*I3!Yo+ z`a_~7Oh^pj^1JmoN5PEWoT*#4f_HCKWMmSX7QaMJ0n1sy>K#4D5T+YZ)bb-~BLU zrhcbdfeNzUI2W zc3Tn^aQVX7ODmpWQwH&Q!dHkD)rbd?!NSf5yLTyNd;s0RqTKxaj51aG37@-Hs(&hH zq_Z-BvnwVa%&1CjK+SnDEb7DtY}413&e31!28OtL!%^%3iS^B!L2f^$FR-WN15r)s z&WyBKdY|A(5D^IbT2Jlv0A$PneBPm)f~NP@?$NohC(WpNPBomiB14=;hlXcVI=bhN zCH5Tgn}Ef0LOLf2l-nZkgXuNz>7?wy!@=0eb7E2|ItHqm^<*iv zFfY9PGrzy=Nb)n{{n#WnjDVmZGc&XJOJ}CGd70RYErRWLQRIK7o5>9~VG2X8i9A;A zCyJasr$>JN=^XZpFI}!t4(@DBxnyYeJePb~cH>{+k}-$<1J@|F#V7gyW(+%aXfL_P zEK}4T86t?>DMhSFP939va3m^S_x=5?5P+M}Y2t0s3pXcYUTOWB{!8unaeLc9CgSdA zi(yCwo1Jx`!mO>Y2SSWeC9*A~Ky^;}lNFx&AUhKTok5Ao`kOS=LiVqa5cq>|i7 zTU$RR?SQLKkq50h+uxDfCu^25`wV?cQZi=h820a=^i5pO&|8hBLyV)*#ILgUo!E3h zG4L#FqgZgAwus5^CNIgNyGw2VmW~3!?g?gVO1%ZfOa_wrY~nX`9sOUU=R6jiuMnEn z&z5H?NN|!FPzpsnm=++tUu~z9{(ZnaKAHy}m)Z(Ei%3ez)l3 zqg7fZ{Pw^7ppF}v3R%UHr4|I*mj((nhFELmV|A9_eXg2#St~lCvTpTJ!(znrPhG9_ ztMb&x<}d#$i}1G87vj0Ox+Vt7#0JCMF-~^_DJG}0-#br?`jC_?x}5P~>(3IF4}i%UVf-n zPTGf4Nn0UC1hA~|fa(O=3x*xIoG6HPl7k3_KORGq);P?7{w_$-ga`OUL~hv+TCLm! zpop3pDNt0WwrJ{FJH;)j#{P9FDeryhP`}XZU$m+TdLVYs&XkU<KvX{0Mfo}s$bGN$$%~QPJ-5pKiZ+#SJ}5Vcdi_< zsc9Aj#ye_Aryns>FPGgBmE|jf7B=4jMLyvy!ID`*4KytC6Y7}d<>j@TUpe3{UpFc3Y|LABnyT}c0>Q`_9qkvf*1im zYE$ineo&O2vFG4#_FxqHu>uT!D;W5H!dXA-`b&`=Z3wd{Yj7LPgPh-T)T|lPHHE){ zKDd1c{^LR?bn^wcH=}zuhPjBt$H|v_XRrNwX^`{E;=wartR{3b=klLK$BsH;l0Te7 zeYI;;E)!rOmWbX>X?e@{086EY1^9WZZh$TLZD}cl3|fHiiHU{977Onhh;p;oTtGH( zT7I%!fz$$EpHLze-J2gjNHT*oL(pP;NVfy}Y|!Z_%`*ckv(@e{$+k`Y&qo*gsT#TX z6yp6rmhSrh4<|D|B}8fYVupPsErWxDp9xdsfU;i031s)L<;nkDi+W%ClE)@b4w)awNI`zaA11HQOTq$3QaHa18I>M-4Dz0L#6(1>}37O;>Ti zF@VqoKMV#CWxyF~_!a5w?vKMKQEpMD=_Fxt|Ih&1;|AQ{G((r~-RVoj(Hl;6?|ilw z`yq!CB88H1$oQ{!!_B`ZoYXRghSx&C4Fr@){qt%7EZQ_NBFaPH1%D_8QN$VZ)Yc29 zJwx^!kIijLS&wm>gP4C4r9{&H2{>B9Y(n;@qIzecNY7zj!tx~=EY(bb3^ zp(3c@+q~3yw(qH^;Nu=tRQKP5a&~q+-;G^1{Q2`|glwxa4gt_$QNpw7fymQD0wwm} zUeY)VH~rrwh^bSKpr3l|!7a&_GYURkswWD95c?oQCh)1CYn{QHY?nh0qoJjhpPe1| zGYcU82E8=fhjaJ&E&rlKK$_D5dtfMlX26XVZi<4fsdsbC$kH-NL(=C4UCM*t;Lb#X zw|?84*7ADxXQgb{o55jR0{--fuZx=Q2uXoTbLv+`46Llby2`C8tE#@+K6m2eor4-% z_zrhi*n_P!Ijgz*f$5uPZ+!Q=UG#lVzmQF4ygBaHl)0EIrr7??H1lExyf) zQDUdywWqA0RA$$vh>6pw5^~fWrL0!b3pdIk9to&oxBm9N(7H*|+llkN@@3cCd42;m z4xj!0yyPj%^INl>5#5`=5|qD(^*AtSU@;#1*lL)+ob;?l%@BvJ)}^ zt80oBZl-v9F80O#f~do5RZlhR(KFKjT961!Z&c-%uHq^tsX|t`=|GVG^#lF>6|bV9 z%K3OTqVM5)xZDi~`o2ZpmL}`w>8wYk$3J;w<%_9_BS+BV$*e=5b@J&=^_-vh!q3ZQ zOb%((LN;2Y(nXsoXup%aah(ht;bJE_3I2OEF5SIe{P1pPiT1p!Mzv$0`dznk2GK)J zQyhWP9sE{CHa50eetX{y2%IABh)HL^qehFYa}Y3^K`gjG_qNN58I`YU5qRe62QcK& z@4h9ErXA(tp`(KsGTz5G61bKL1dyi@+*Pj&YR z{@4@LO>^zFyKZhGw;o#3)#HaYJbk5&fWU+^)*+bJ0UMXpUPGEf^6-icLazepScQ08 zeAf?XfjGP8-wxF9kU+Ogbp{9D$l-IPhLy9|D=aLWtyvb=zB11N?ao-h4^HdhzLrci zuGIVG{^N!!oGCE_OP``W2_qHk3$n;dkr*KJW|k4<1|1-#Z=9S-?qf($LG-g@UhhDv zbj1e0?Y$~w)APn5QUDp{_kSRFKLb2<#9GOa`O$muS|%>4u}yY(>dx@`@85*@;iy1g zh{8Tyqtjz~9>T5X2n4zhy1ipjhOkgd@uz3rQbEvf=blKbX7>5!>CZ*Fc$n(`oCLd&VGT8Xzr9hzA$-}rkq4jM8*T=a#S?zD z5d%!xkefrAK~#5d1=-Z=*u?qOf5}jVJME$B9G<*=%()b-HX1OJq+i^O35c`f-AU(t zh_NPo#K@o--MirZrH}hw*ckm-hBBl-C-hrgH-kL^m-4ys6>?ukd1t<2JXw>+oNZLYnS1|Mm4-aQJk|8NkI&y)vH^0Dk6T+?%Jk;1ApzVrYa8;_-vXTl zrENl$v}66V8$;Jj<`9{XKZdpZ_S+Rq$shx0W2>@=pPfKyPe6jqt8oR17o!6TF&1^c z?(W*BUXV7zM2auT?{*xg$OaEd9JI*>a|@yZVg!6oSL;q!sq+u}p#Mx{y^}p?gJZZl zvAFKW*04a&5_R_O_kM zRtbuF8;iUtcE$}FpjX9i*MKy1tk+C0lL|gPZW_1vml=%B1dny4p8AzhDFosaQgk62 zS%Uvz1wjR}MDvr~4K@r!$qiiu#RfyoriSH_uI-cW;_|fzKfK##oxEe!xQdbnxxkKy zy6jRbi$wyym%~q3uDfAaLv?0Q`P1?bccQQM;+ksdWC(uF?NqIQ~hHEU6OP zq*Id#6T>^ySLuUMb~eF;6g$;9b>C2K3Bk&XAS!u>$uO00o^S^UIT+H)qc`~)50&UG zA)EZqF)u$GUp~~wMuPAEdn#~dvaoS&z<5CUHK&T$p?p0&ZuaCel$L+W z5OFy(!7-+XXed){1HuI>hq>uD~~#lH*8W|Y@2v3lC3I%Ra zT4D!SJir;1hX_0Z3lt=Xo^2-uGbgB=tw1<1Y^q)>VYnH9WAmJjsGDazHQ>v`j&1rS z8aVENadY35t8SDFg~}u9ju5TlRBvQfqPx-Y&o$!DCj#%Nd7t;@-(E{?L2NB9;x4h@ z+Fla3Dvpx9q)(tQ`{^mwfAAyl!2F8AN5B$B(4;n;7lGKDZiAk8=*3^EYZ^XThK8|9dH$4ocWxG>obVBsl4OH`dPb;@`QGB_ zdyF8&MXW;Kn29Y2;l6m8z-F^F(?Z&R=jY0BIqjUH;-ltirjA+?R8mc>k>J?mShZNAqdHzwxh5!AEI+!qkL)bs~)=&^A zF90rY>v|)`9}_y*57{u$7SyaD0WhGk1J;otWhd-gT_@k1^S>i-Avcs+qYKeEsY=(; z)|2YsKQKK%*oc+?Rp3W8Ogfv~b&+KDX*DbVARW_h4b|!O-35>V90Lvh1*skWzcB!# zjo!-4g8&ViiT5C41FIPNF=?jxG&;-Voz->w#O)yW6cq?gWHbJYgiyO4G<%?nqKtI^{|M$>r z@}@t5uU?Zz4cX^pXFK{`9G{(KA*mbN25l$)1lhrq?3Gj^@*(Gf=;9cO%9*5IP5 zMs_W2iz5{@yg_3%{4J;nw=wA>bHP_Djvd?opA^&E!y_fKka-9DmMhSyQqOVkC@Y!s z?`xDWq@ET%p}-WE)_ryR{N)QA!Qe>$1v$KTBtc#(!9WU~z*~{A@UP1sF`oCH9>HFk zr5V*%wn|;iC$I3=*VLi6(^56W_cDmZ*T4p!eF6Y)}ymZ(5A-(eZw|lEuYQ)YS7X{&n z%SZ^B8W&Z)k!EV2WbPNRI$O}_BEa5mQJ0C^X(H?|zW5v^*O>?8ooFaqf=V$72}zq_ zhbDJ%tup0p)}*LkY-pk!DF#wbhV0y2m%K(+1_lsyE8n_lOjPgw5n`#iRve~YI_Qc* zt8S0Z!ktnv5+U}Z!Ndcw>04W0x&pTips!yy=VvRb02$fk{%#l(*ys-Wk7`926N)07 zDw2G2m*GA$DX~|`hRK?_&CKN;mmhDl=p$=lL4o$#702OnC^2<40I*lr~+@6Nj(Vg>6>4?3Ro|R%7)NWP;I8<$87G+E=5!SeCg_RA?hvK#K7AQkdCJ{ zDa5!fZFvLs42~%)YwM!=ggel|5Nx+EL@{l2o&`n7OEWQLe8jxI`6i7eP~QeZAM?Kw zTnL5V4o+<~F32(RmN2CfW!spOP9h%{ufP-_cS;`H+gA+UqUS(CQVgZSb=vs1q~m>gM23Kk2My11=IEb5rBKU0G;|dY|U^&cdZOwkNS8TMWK+59ziBcbduO zc5myvQnq7ZcGb`4dRh&O+mgPIIcN($UOB{lN!LLeF|pev(JQ#)Q2fGs8TiUrBm$-0 zZ(R?H_@seH^+rB|q4WmTqm4?uGzugUTct~iu&p%5b<@gGNa*)``lQ)(UnujVym6C0 z|HP1?6I>H!E^K(6VdV9m|GOVh-&lO!k-wIq##N%8I=o`$(5|Oo5ZYc3Dme}lYmg_W zmS%9cy=vc)mA!R~;1-|C{8a!*Qy50UaTsUIN8* z!+w4P=?I&hE9(`gMIVcSWk_hIPMa5QOmlQak<^V(kB;^;m5g=eYMY3s&jSn!IvSzY zF_X@_6Zuj6nYq{!F#un_b&39OP77gUyRGc~#BeSI)m0I<$bT-NeZt-D(V5Q;``FDa z&1Zks(!QFk{03WrLe8z*w?C0Ib88c+@fucOOS4VYUiX7cNs1D8v%?UT#EaTR23LUP zppagrryYiTjC!lvO%bo9kc)jp)uUB^|2g!Zz|}Ris?)+x7n02h$XRO^>yS{O)haKh zsBzJX`~XD=1l$vL>Kb$W4S1Jr#7P@Dw&LY^@2m-=qSg{G#=Slk3)k-bN>~d%{?c0zHyZNJ+nb=r5a+{cX?Gadshi(1=7G0-RtbqR>5 z7rS)hrPcrOTsYL(_NFuykoTc1yf&6k>qKja`D5UQrK6)`e;QLC_a6BJ4&l|t>MpKG z#)ZfuYkiWTi)O>4ff?`IRg+lXL19-tdE{Dg_g}mcXC>zuzNw;#{Ak3sR;oew--o?pEIhRL!hhy5I}p0qz{ zikBc=FQpQ}Jm~E**NJM3uyK0(&}q?g7-ntoeA$Q-c-dlw9P5afDw_|2Hb}A?u6Fu| zs$C1Ik&B9@?Y%#}8nJ@p|w-K}()40=)Te{3|EfBSIV1L#5xk<9gJ(Z@{@ zlg_{1uQ}n}j2*Jqa}=Uc)^QLtW#4|NC-0<>(7SL5!7#L?^_nAlLLD)60!@K<=<=5f z)kR2DE=?s5bZ^X<@OK|?$CzIyuTS z`#28eRyu5U;uXisgLv`$Ij{K($}d|#)3g-u?s2Dm#Go;rYm?n|pETYWR4Yx5NpBp~ zHvb62d$)rY7}f8H)6(V(8eQLbkgny=K^1(swa}F48+NZSLd{eeOo_z|?q4_1td14F zHNyO%0<<4)@>#;WP`~(G_J{X@ac96iR2_T~G2S_7XmA&9m7WeLkM!~4iwoOZkCes9 zG%pTv zC4mnaDJlDTT)P6i-B!o};SYCic<5$Y24SJ5 zs(EHlmHYJY06ymgWH zIrA1h?xD)t{Ne-MOy{aF`%GiSX#YGOe?Ej_f98D2{ZfR#7kA4Bjg`)wB7~ln_j`Sn zcX5lM3Kbhh5BQ%P*Xcej&gvS_{^dc>>%vKlvj2R3)v;7>X?)v1{f-Wej+xH)tt*r~ z6`M-wjyg))#fxK#OJUsjmNa)EHcwthX>q$9OH-IV1pCX!OXa`1ig!`MF~>ZQ~dt#A{j+x+bMT=3)xo;Jj7J|%I z*c5g8NEXf{xx+4sj~?zOUK{DxBE4=vUad&foy57_GGl*=vi+N4mHzNJCngZp^nuc* zSc&7$`)x|O`SBFJh;m6M(PN`42QeB^;Y-h33M2&+I|CqDWnp8(RZ1XQiF08up!Mz} zy!4H(!k0v&cn$Wq%l$$<6V+(0X&ZdK;TvC5F)`)lE@T!ZyCy}4ux8;Y*%0<{Nn$l;~y`m(^>q_;E#kV8NB>hygou#%^ss=qIzd6;* zc9`j_lFo$qgOo88F1I;Jw;5vxjAQDS43$U=ODz4ZYy0ZXm1|4CY9v&mBJ`z*VK*2n z+Tq|*_}o=`EMNsSx!1phQ34-Ge)s0fQ8)jQ-XT)@m!IX-sp)ETZ{ew{!@Z)8Mt6}I zG@UQO1|>#9)wcb6*ZtBp{ALc5OQm9l+4vsqv5E_?w3SNEqr$&GQp60$?Zh6PfjCkb z^B|CN^rDP%UG9;8k^Pq~1aXKbh0d{+QwvGD$IjwC8U}BQu&?tL+IfW)FYLTiznxn! zRoBrwFpux0yZlG+Lai(p^?=9|CVkv#Wa-?kLJ0k&e8O&GRVN<>y;iJZib;DCZ6HV*MR^%$-#qt`` z%etx)r5{@PBwT&&FPLh{nN@9L1lTr6_C8$DDbnubFrz~;ZAf}gm~^nvCak`-x>s~; z91veyf?N$^omgu4n7pWkDeKajH!YRv(}hLS0mK1~p3cVw@0D{OI5E9WW&z$zLfJQH z-1J@n7f7?@)i->L`LfF$MnWX%y(n$T3Hl#8i6Iio4ox28koOPrF#dd{E7Yn+Zw4*y zV*5h$kK+P*a2M4TJzU&w326JGUW-_j@ve}ar5K-f7NH|`wva}7^&t~WkAB(~>DBYsTXTodI;_A+Ey;b9RSUA501BBd_NA4%yFfqVxJ)}`%zRaWl>%uv*=;8)_ zQ>Wu9UM$E7L_aN}6K^W?~LfzH# z$*TcPYS=B&8OBnV*Sb~|vIRNS)4u5xq6U`fx+C9tqgSuSc&_y2>dUf&RMtz+D&tn-6MQ#RC3iL!2B*oH8QLV1&t^u5xI>_~NHw-St zSz1=I;$S>CR1t$ROy9?6qqu86nAk_wQ#!?-Xq+ywSJI76-wc?_s7y7Mlishu-n zq@-H#+G3PFZ+JW&d@_zR7XoT_uY5SUtBo&TEh?ZDi;{Z~XF%6lPD7OwNr?B&hEuGI z4utStkKA66dX$RMuM$oiL{Sw3xv)0v1&T^{D4vslu&29bUjlShSr8me;coPyt7C3!t>1zR+PgT{ZVdL5u7t$Zke)XaKOOKNAZS`9aZ z#Ew6zfBxMUClh&1zxbvnwtz*)^@^yw-pJ}|x1S!OmN&ocbRd%M2BuGjmo%D;MHdnB zOD?Ok+_l(akM^=+7CjzSbI8tfxtfnfx1$tYYK)5fOw6m0_I0R#B>9HvTIMv{#YGNPso?*u$gYke&x2CPo#jG*9F%+wbO7i{uO;!4JnEbtiInc;ez1 z2C?1WCACU11}Z<}&fo4ig2{WF2^|%L`)l;oKrp_7nRUs%8nLQAI%^t=5t`0OQo~>W zI8u1YtyMTr_1la`#_Pil&lzf;ye?gkL%@|3{3QFyqaZ>sT?wNbjZhDz33W)>i9KXs zqZo^>A|1Ve=89Vz)zB+IRxEfN(0a>e7$jLc&>holT;S3&{&wMjaDy=ATp5V^#N_kYd^OyD`c4@s7>)ZGW=XGuhh@|uQuM!M8O;r*tl!*maz>1AujbrEx zvng=U5$Ubq!sksP?kHuw6IRDdPSf8SBRl$-L*;pe7BoSq4dxaD%ma}{-72Ito?1z{ zSI@a0-Tzi|nSua-f4}4r-@oW3{iK!IreO>n-8a3y4<^{qQC@m7|I!@gp-9E3CGmJ+ zx>K`#>q-%82co`wG%pJ#a^g>+eGfh{e~{tqQ$Cf$nD6>k?vnJ|{-xq{v>%&v+bmok z1%6%kseU>&Xp)~>h;QofF`+11n=iSjOt(tVd-fXtrJ<`F7R7ywS~Nl4l>)@vpE7`v*b=-~?Y&Z&eKM$=GuVa4Sq&cAWPE)sx zjr;JL8nramG@4&nC}c7 zZ+r`m$^=}(2)pH7&&3_)ZPqQvu05O~`QkI&8CW#BT!}=5GF{HpO#SpsiIZ@t1#d1O zXVS6hrRRaZHk(Cqk)sVQq@6&UfTbnk#kV)q=uBiRu~y z8YAgNF2#)T44g)7UE|WbtzPF3yc5Z3*z+JNO<@L-m3OhogdSg`(!_z&1rtMCXDulp zcWlZ?ALpuZY1@^UD62fN5z9jDi=}Y+mljKB29oN`u0rr5ZV_)Gxc1sy+Yv+Bl z25lHwSWvC6YQ+bW;Z7DfrM@f0?9DJDK53@fjL<~)>JZpMBnI)N3m-n&h#Aj@uqNEO zFk_;{GIFbhQ!IT~j~{PN;7R<$2C5Y=lTEZO9x+%g9VmO|1q8qV~+k zWa|m0z}#z<`qpC%y|s1yr2=?s0q(bd#It^Mg^>}i4ij6p5zb6okY^pDri+gxr^Nk1 z7wY(a8^T3OwL+bt?bK};Y?b1A4j-X0Ux@z!RIk1L>7A_G5asIE>~&tZLAp?z7wGn% z<^4fx+K7z-ZImg|trXJm1;g{cZCp=fsb;@@|K3wsh{HwRK#qz$g@~d$&<`EvoE=IR zsYB!_vH;Z|zZzs+F8@B=Y3C;>c%zOlb?UmS0h_43t^Be2>y9Q$Q#lYjQv6myrhj;ldiwLT`Q1g*r|S?AnTd z>z3}&t_7cQZlRF%!*cB303MB3t|F`oi0=I#b*&&yt`K2$omWxaOgQsrc-h$9iv)+X zq7Y~MTRX-Qk1qS`@@KmnkA2N3L<^cv^FlNpHJM!Fb3QtEpdpr(E)@wa5wCMj{oN`O z$EU(G5Dgmfn~VP@11Xq!FKfj<_bgkg$#8MeEg^7`>gfZ!XH%S|p&|XPwRZLo6Gxk$ zuV23&qS5;!kN^AJoLjvSSN<28qybwC^>q7mP43=1vbc|v3;8kJgJD_P9eZ^`d^arW z4Bzm@1Gn^zDrU~)z+E+K0pV8tEm;_0MsvtolZ3StvcjR%JJ@Qt_9wxFJ?{4lzuRB7 zFW|n$eYorg2xzeqr302gLgs6OtGe5OT;hipf@1BMRn}yVf21{@+%7C%am9MZ(mZvr zmdiL9)E%qsNeaL6G$+D-Zwmxh@X(};!Cje` zPR*KyJHTjHJ{&v$>_KZ4$;$%nfUnvvz!q)?q*N_=wi><0z1vB$dTG&61u5Uk#>T$i zZRszL*r#Hs!aUnJOU{&nJ=#!Gr8gtX&3&iyEs68u=n+v4x>w4drhyVaKVUptp0qx&7J|$1lq)lW zml^!{;?CF6S1LaN^Mu`h6pX-eP$!o_JUjBCYX0p-dQyCj>+~}NT1NRlnKDNSGC!g| z+pSR~D05Q&l=A&pfd|8D*JynDSgYkoxAvmu-#+lVZCv=jKLsHD^m+!zwHRB<535gC z?z`092+hBVt4K&SZ@D2$$4vd6=dCtg~`V&&s2(2-55Y- zG(TU!rE?y(GleQX6W|Y0{oZwU&cTo$4mHt@bL+>?sTa?}a{$7hS~-Oo*AknobMu7O zMl2{Y?Z9yW)<3$Hv>o;G3kWiuoCFB;(in#z-w<5rIG$VKugX8FRIc}%2WwAd*b`h8 z2=liC-cjtJqR!J3<3)TtY$n<)u)1zEEMp%OgrJK3(Ae=(Sf2Z8^up*4PtzBs&}6%- zDD^>VDjaoN;uu3ac+KBVP~X}ywColKQ2JidxdO&(5Q0{tHK(DT?t1OQ-B**&O>5bZ zcp2&tt-wL{tA-C4KWatk{%ljTdnqG{_Tp2pK>zpSyws3Hcd63UxWp-+>OWJFIT{%w z9t2FkwAu`=p6Sxc$h94rn-e+XkD)KAumS$9$J1#98}&!Md>Dp`+FUEUOq=)l=U+!P zZ`$s!wo(#0+}Cctwd{8p*Pow__>-)R%FpY!UpHa3{r>uleH#qyF{u|r<%XCg(ywKH z#JkBaEq_=zmd{*lvnZj{89rP$LQ_ zK5P3xD`tq|p3tmzQ}&XyPV^o*Q%vtdvW?rby;qQ!F=+El<*It(k$*SPsb++2oCZB1 zmpGqMIOW(&r$UbK#9RMHu~j(47p2Fn)gFTkZHGVi{#t>hUQk%Kl_1@)lJ2bQt%Jp@ zO+htQjg$YA5yiO53krYzp35eBFed{ zPGiway3LK)9eJSTaG(hu-9n`G#lN7?OB;*`uFlIU)F#A6b;YA}Styw5;O*v-2)E{G zEKSG=4ct?z7V$rp`FEC=Gz;M+uDj2SuA2|^YQWB(mqXV;fC(RwKTAI+o%wNrMUv_- z`5B0sA+&$}9q+GvtGksul0p%nE@>mN;Cn9@^nX_O#0@p7j986^sBGle-@xD75&uxlSS^D zbop>)>}~p8t1B^SCgE+2*Ao64Pqs^PI9joPO}8?AZgbqpHGc2luXG&9M96K4OyGur zxF@&W=g%Q4OG|Ab4}`Gkrmr-xh~ObS`Gx7a8>vu8+%~XH3$yl)ow()W-t_T)2s%6U2z7~63)4)S0iAySiUI10yp{Q1>yF+T($&ScR(vhqrw?C4~ zeGYTOfs>-Hcr}xUlRUatnst2S#}DDJi7}(L5X|*Hk?09uW;wB7fZ&f}(4E}WLwVW}m^AcNkl&El0b_WNYbn>Iu zV1q_UENo$sOg7daj62&RKik4-w{S8+ezx#EzHVwx?}}XM9ChwA{P6P^J{44j$_u>& zWpfg@F3B+D^<$BF^ob+O*s$sK|3osMBb`kNX^8QR@-3QIKH7*$n})LBGm%B3LUZ97 z?X~shxemB+@WVQoaQk0Cw`$}QmAdr~1Y+fijBq#Z4qV4s_qP+|TjnV{kXO*{z~(o9 ze)}4u-~hb?i1mHRxe#a=NjFklj_eMHUTw_Q8nglep315Uw7;t~ zmMHaQ-jiwJL4Yv0>-5Xn5a64S(|I^4gr%fl-LQdKBUxBl+EqBCh%T8E4}V#>4n!S` zmQ9N{QkMc_w&1PtWtV=_T&8bq}Kh!N!F^(0lZ5E2=feA8tXvAf*-ir4I0(wtL zLF@~{Y~-KcRQd=gul?6FWC1C;@Int0jsISbP*Qw3n*&%Tu`FG_>%5hORT53>-*%2W zE**J*;=5v=!K;8b@4$o}EDRgVH%im*=5K4Z&&?GKFX@yF(`mi#Lq2h_X86J`!9a3& zhg=1+^h6@UsE+FM8zOY^jXx&nLPMOcO@}qL!rpZM%nZsHeSeM&(ROTf@pw3=&vVeI z_N?q~wJp5@nMV;|@(a}2 z7~~K%gy8I1u;49y-_ercEc^R( z7pf<;CxI!jwUhnDOZ+&5AKFahKGM?DceE7dYd@qLaj04HxnRpG2}+RCq1Y6RW-QWA zDYoCCM$jz%v09;2u8}^Ht#-KNhG0p)NTJ&K2qoA|;L7qy0gc_C1Uzc(vpKdpDM!9d zYNqqeJB^P43Tcwl)uq}tU0H$85utQiloz$he4Yj5LXtJHnXOV2{5MlUGx z`;}IljjvbXN(zTDZbEklHtE;kP{4Y2~ zPD+LWfs>DTCTe#V>_?%DQ^+?E%Fr?k4Pe+ z2bH?(Wmodwc*N+jl7jif%M46?PcS4y@z{QOV?!LCcI{F<6NGj^llqPn2qP(v@MboP z{DgxE_QKQexoj#y=TcG>msD z^Jeme>zKxh^1ebmG$AEQFi15PQCP4Z8=;aYRL1=r_NkmUq*AVUThEIOs z2|!YoUzRc5Ur6#^{8ZYejU2W;TTmhJEn?&2TV)<+ul8A?IF4TDtT7EWVCbhN@+rG3876u=5;SkVyBV~^3tc-*>L-e zXA*&W6-@MpU|&u&@&wS`KRf2#aD%MP!JrjS30cPVLJ)qjT;8{|jfP40eWs*%NbR%K zvoWtJiLNKrEo+zMxyqBBDvRl_UsK>3A4PCMFhIsU>aqnOAyE8AgT-ly@z$xZkmG%e zya9*gfqjv0$*(CuU1{91#n`bIv;?=>FhuZzJ5LH?H7g{p{?rd0E70($EF1$k^VB^^;SGiOWNuvoN>?Q0Yo?0>y$$! zaw+;-ve}c@^kautA=&w6RfVS@9zdw0eMF^hR#%k23h-eO9;5 zo8HjpOg3qi;?5gK8I^uLce&YPd*LHI3Rpw80n)E|U@0(Mgy?B*mu<-I6_7>_CsSHE zxC$K@w=7MQcd%R8K}FyTZhZfQl_``#Sk<|R9!=p46>@uQ zL{Uk5swuXTd}n@-2>MbGPM+WwW}3?FTU|Y10zO#v$W0IiZ28vU0{1eR5_{|)zzCZR zG1>Yt?(DMM23lh&qSioeKesjS;076?IyYIthsEE{u340rYga_IpKIc35TSUoIUh8K z$i6{B%L_!nLEEa&sLWAR;!h6=ooO~Qaj;NpOr8J zP3bUeZtDlo(eSP%sdXOa?3scWf&%IXe0w*_fRy=tzCm;jmx=>L46YW>$?@J+3$azj zEv3@l+11t4a}hBEDPVA&>{n@j(*ne8^>qLL!}l}0`V6rm-vg4BV0MrT4S?H_@5=}M z1v<-4OK-GV;rMn7bZow!d7}TA1G0s7ClQzZD;(Iz4OL-f$+CUFFVd2EE*n?z;lgMD zB%}UITbZYkWuy640ZSb07YOCNe-b~a#1X0cL--0{&}y>pT-Dj}{VtxvjKY=PoVMi0 zH3vJfZFWMI*5`ii3TaX~dKvgdIkJHmN?VBwUP`596sW_8t+v1}1PaiO?Bu}|^!47` z^ep~v_Eo`XkF!_8um-l$5izbgwB4cTe94}n^BjDV;~h$|M2Ih(8xU%RUJRW)T%uml z#cI@g)5aIn&RSso%530x%Zlc2A#w4ji!QJMvB!)|AVlcgC^&6*>{apt@mizRQCT-+ zdn?z4+49GP!lPeI^z`#cwrxu2b0iU*tp50ISR^+w?b^Im&>!*b(%hD%e}RztBRfzr z7;_4OJ7_j^qMZDf4HfqMTU9>9BM6#>ByBaLC;7tFmhA-WuhUm)JVZD_{&0qE{Sq`w!O6X zbENkPg_k>-Ru3;!WDy;=P9l&AK^O2^uCrG7ZqRFr5Cvwscq6pN8bV)rpWQcN z$hJh**%BnFL0JQFg@n{P%A;wLV; zxQJz`tEu^~MMn}`KAA^EyJJ#3ofrjOiarWgiU1j=JGw6v9-X`I{zUVENoymTX{WTT zOq;S4h#0$qSFT>|a8(yk@$(Ze*~E^lj+s@$Yymy0dC@2e@xk^Aq(2qkp}i;au}F^B znvn)(Kp8rg79V}VBHOkKj7GhyE*1z#L}0_AE-sE}xGTZ<0&P;NUWw>83h|Tw3x4Lo zwLd#MGp0yW2NmyV9)xcF5DE4m(N(R7%B)#KrxIrxxC|9`Hpx26h1(8dBg~T7GT9hNz=`+-eg5U%qxg6j|7ozZemtc*G?|}I67KNyt2(lb5fUpm zF|eTT%QLZ8sF#sOP#DQ!lh;(^JxtvFQuK@vZX&%BZx$en0aerTPU3LBu3!39o~ zHA83IO^nJPoh$`|oF=1O`#b^*N_ek8$7umiKqUfaU%01rIWa_V;us9-V`2?}%zRtL ztz5f?=$9l!zE~Pnrg`;?rY8E?YSZU*6h@ux?KpHPZeo4qnC>|N8uY&va@T9o==+u5 zR!|3`S4h;SG2egf@`PNTKBYaq3thb09Wi<{3HI@`_D@fqTo;GLuYrTcBinu+jE*1y zOCOA?1qMg2!|)m}Op!8+C!%B4Mo0t6Rtz(=i~$EU7uCSvpn9Pp^uuBn`NPY@%?ZoF z%I_q!RI|dh24lgB3f)M>T&L!wr@(YUd(=ZAP%;ccpqfZF7nPEXze6P^ezGhVpp<~A zS%V$+66=Y8Zd#N*dz9-)I@8L8!C(GoVKpF=M4n-)oF^iT{q!Khmp#-t3*V zSj`?Sf%J~qnb&rhXp<-k2671xFRXvttXKroTet-UF2*ga;KwoycfP!8(rO8Hb%kl! z!pi=$@_-gAU`TxYnH^qUlCcQ`rYX@V%)MV^3brAT_+<@ncF6cwX+K@Ky#PC?5Ba6L zb;P7KR(a4vV)dRB$28({-r#(sQVddTMFk%>-R2f}OF$)&2(UIyCFLrTtc7`Bm>S9D ziWsALPh|oU=uF7Ne!hCvJ4qUywpBBca~y0^%uEUMt@CNb*ri{w4G0*Y-vYr^kR^(Y z`U%Rj*$^d^iHNpY&;G2#EMiN`~ms@bAVd4kOm!)mJiH9|7xJNj-!Qir_EY zI4-GxIZ8NL!4)8W%vc7swavz*-1GGzUy!k0fuii#qA zn`D_1+|#I8?Vw0^647ea0KZ&vrD#qI_y*2o0uTBrvxfxq9~EpDva2s8bhlVl`leHw zCv4})2U`tIqNam`5eUQ|lfale!caw74RS%c1@d zg5H#^n!#=9YUr>vYimfn#-A0X7@eAW<%|sb_~`N$r~;)PPhu3KK@Qyiba&IwOxIWS zbFn440laTIrHnanDSDon!-%fF6At6}eNULwYUFmzDj@nT<9>mSB4N!~KMR0f;WX?< z`xOqo=(2TQ{iRj>%@e5JZ}~$vo&0#H^AUN6tBx#gd#HHvsTT_9%?+S{vP_l7Zol72 zVHXt(PNV?F-F1aeudWtp>9iunh*cahYNDgAd*xo(e47)e6@LZvuU*~b>7Sfl(JmRT zMh-&1wB23cRb(ai^xoc{Wm_3{p5^bC1qdAW(yCrAybI7olB{IZ$^mL((`5Efm3tom zCIZbY&g7FDH?grAhDjRiowm8>SM|XLK#6+>bzpy8q}YJl$y2uhbgnS6=)e)_{MGxV zl6i6Kv?>}n+8fuE`jN&dcY&facB^0+wS{ACc6>R=grRx17Z{m$glHCSND+wN24XWY z-wK71Pp1=FgT4JE-Z>K&+&(Sh;f;ffTV)OoEj?o1goYAcc(e7RsTc{O&kgzlA;$sG zW=}nqLhNkgn-d|8YB4v~+VlQOr z6_=Lg8zp)Ixb7eBRpyTZ*U9&pFz0}v)Y$IP%#@oLYIj$Z_dM|G0XX_G(UC{B2v?KB zwh*@M5>K!dEQYN`I*z>wosrv>ocnjk0A-8iF3@Bn*ttK4Qbhq@+2;hEv>{%7U{ zp?ActpTW<(2*sDZa7NBBSt&}4$=7}DAW^Y|Z09-OUibp-7mCqX93X;eWH26|d*&&B0v~&$$+Khq_t+|b{ z>ZN7<{dN+X2esoj0UD|#hLD>EI`*sP1Q~#k!bEbLw)i&LH>cs}y1fVf#!JY+<NXlf?jLEi)gu$=%^~h*3UougaT6dLSf!Om&2h7fPXq7TNR=Z;&1hL4g zq1mdE-PO{r#sVVvF->!73h72RWZl3|51W4gO;f6k`5a(T{Doud59ArFxSzQx8e~@e zfr^IsiD^dde9`XuA46Y^^? za){)&V-+KHt9OSY0d9BA519aSSJn8^XU4{!-5j+kPDK>c>N!fUf`SC)*&A1;2dqn)Yv-qBk*?PDl+2>9W$4%gG;Uh$Ddssrh9X zKsL3O59FphJ8@kJzYs6{zW?>Z_K&b|Y)a6}{7Us+91V3vP)=}whsS5zs_>3kJa>0! zp#sIVgF4k&582^EiWde=h9?6QNyMI)HKga#6PHfOIQ7H|hrZ=_f$+g&;auPaB*n}`;-CuaEfKomr8jOz`Wo?xz9=wO!&F3kkkWK% z?c^K0_#|BdIE< zfV_L92q=YmAYe{x8VQU#%?9igl`1giY85|HgQSMqI}Gzu!vE45T!so&X`wO3T3?@3 zDH$+SQiE4R(iA-D~4F*9t6+5i&m|Ab34ZorCMkRq}z89tM>{KbK4MU)i_Rt;twd( z!6cM=fn7JE`XcqRzT6VFVqkGCG~kv<18@1VjzJb9*h-uIjcb$mV}PIp<)dD+D{ut! z4SF1EL|x(>Z>YR@{{4aClZq+AQc^f01)GHwWxpRwW4D@$**nxSPte|lnlXkLmF3^{ zbRR;Ef!t)$N<>3TOFmI9ZinEvtAyf(a5JDKolOovPO{0ar-y3pNOYu@Q)Fg0W=^tid)yn3EU zSuCq~HHp*C%S)Ipo{l&Y5Rl>s-bLe+hq^a2iSOpS+DFgF^t5%8w`Q@C&qTpZzTR&mBN1_N!St+wH{z2; z2*FSljhI^e9iyw&K*fd4CXz_FX;VnuPaT#`GVAO?$FX#&S`gB%HQ)44*E%kn)^QmJ zAv<{3PWedX_|R`?jj`zx#7Z&fKeAe1TeGh_NlB72p%{Zvc(nCeTB@=PKnmm4ZUb>$ z;3E0^D`DDHj0}1c!^1VXY}B;3leR}3`P#QXF8&5;-^i{-sl9^(UK@uNiG6~|8OUb1 ze~)r(7UWXKVB-WqX=o~swmTJ31f8n3RpmGLx^K1_AW*#K+daRRDom5f20T|fh;97wAk5yd!6jOB%IkZWW3R2?5|^^ zx=ssRJl9_NlEK5nW3zGPRajB&n>TD~TO&JOF7lRpUr1vJpn-w>}FO0qvDrm(%+^ zeJ&Q9feID`v}BmN<3SN2Hjpbv@0%kv<;e=(JA0!HEE8>nK$q%e|6b^jqCDJx{1>l} zvgb$eRqP2%N}@q*$Odvr+lUJhX=!ObT1()gp{hO&sjYdG4`U*sUJ0e=GH~84xv7{( zdS)%%>_}_Xk6F!2rV>(o>|Qq6Jrw&rP03CRsQhK zx42g2!Gp34EEF*cf3;-sk0VfK8M*jPP_XVj8grWh{L|BiDA1g`mou~go_X%AXosif zjN6oscc8r32G~hcn!yHvb?Qt#be&)XqL?`}#LH@f?1jzY7iUMZh*4 zjqmUM6mKK+0lY}%$kX}mgpQpQ!Cnl81`bkB$V<0G?u8$=Rqt*l$*&$VXF^9CY#*g5 zVjwO1!Tf?qDc?XW>!!Vk1h&n_|TyR83qMfz+GB?RrbS|5yKMCJ0+n3IAJ>VhH9vJ?@z*8GsDU6L;=A^a|j)1ni{xY}W~+_7H?S%)n^g%9h`DxA!N(}^X@oai zCH9X?zyXSw1B6>ueL=~khJixT!rUBkkdH{OL@99eq-Tg2UGtsSZuako-{}qQU*Wv_picnUzqbGgd-oV)2UiHXI?L}+iI#x5)?nYndFi|SwG_Yvxzc`m zl7+%G+K*mF*udc2cZHl#F@v9qh`Si{o50Bm93103%ih-2E&fI?zcNudS!w4}fVoy+ zX2wIX?(r@19KquUAA=3U#W)Y5X-bf&gay2KN9dXAG+0ddPXZs~8&L6pQdOd#$8_lu zwPQHt>e7;hY~0HxUlNv)#%VAbFbsf#)LrvCqx-F>4sxOhn+NrWV8WQ8eewf;QC^uD zMtWuFaE6CPpG!ad-qWKYLN^JDsy|cTlc#JIjPJ^-qA{Lyu!Zxae6$um!aiT2cdx${ z6cjY}7h1rj>m;<{<%`dmn3y7GI4ELlga~R~ot+gw664}}tlN{nF#Pxw)@Ve}Q@$j-EMV+{f}k)@2+8TF+1g;SMBD73f?yH-~}a1y>pf zwC+WE%fwpDy?BkiC4lRW0XJ?`NDY+W zUr6|#4uY%&TT)N;ZN=L4z~WvM2-(Fl(tVxaeYc)S%9;+08MEa>wxU=->;6HZpsFzE zkE9=P6Du@t-?9m)ztlyhtJlGHvScZcTPn%rk>cV&V;OpqB_--rnV`7|Wj&ag*n{+} zs`0~nGl8nT)Ct|QffCc0=3iKG>WXn`iVtD)3^BJ6o535#vNaz;Y47J6`W3=FI5>uROI zr+|#er8ai=>PYB*%PHerF=4+VYqq~vwb(FfFi{dxLS4I@=VBX zbxW`kI$N(K;F{5kRWNtxku)sAF>r&(fRBggfv-60aRAEEDcmV#05gY8M}P|{CTB7a z6%mX!7`Xn06=!ysa2;=7KE~NHe&htP#GwYwb_v}BfW@K0vtT?(TnfZ3OHB-BP`Gv@ zg#Kuo>e7&Kv_y%~dxFs=ITseW>- zk^~D>`Qv&nu(&0W&kjPEWa%I2zBM zML<0OH$Y`Jq#>UsT2LBNoexkEw|wY=J0jEG{hE zepCG4Gr_S~9{BNt;RcD8n1sali3z^9BA^vp;P+|PLtzoTpu<1KjNJ=SP|$e+INEu+ zIYZH~^Fk4gB~Ro04|j2XJvU3{fQ3!2DXQE2U7eCw`i=T?lt0bC{y`D;KaW&tWCZ8nwh@bDVti)w0&HO!zx z)czBuAljhoKI>*nl&#RrmL_KSyn`p_u0*Cp`5w#A+3wS?{=GUS2@cS|>7PRAEV^SHQl-V=?uA^2@;k=yk7x(+3z7kyFDqFizNqYs}hnijce z$!F0B@Q}6Kv6ZAuvUHbG__vltF)15?(+5+aS=!X#24Kx%+$)`i@igl%IFmG9R9wy) zcv)+3=aH>UkxfugE;63%Bu|X~+KU34h;?edO)x#BI0kotVE+AHun^m;OnqT{c^ac6 zin%=i!>_dL4!(^T@u8`%bGig4nNE1G->sv=>Ej38QHa@d*7eZ*OqFl3!p;dmuC-J? zc>PBMUu83e@N>lKxNBwC<#hwY-3G*<{4qw=v>6-;>hs>)JlloSf-Q9Ow<~B_yq5hS zyTvGv&py~%1Rz*9xWul52*~o_UyzLIx;%QT>(lh#pMWF#2}+d!2Dy4v{C__}8~qNB zZ?Ki`n0?#PZsiu;A0J2tEDfs3n}{j zzYf}|TQ@<}3WKcv_cu3rz`n?TCIlbgM}8yM%wYH<50-XH|6ZPyVJk=@VbIaGey!Gr z%Vq#}bOgXMwiv?x5&bh&Ut($_2qlfo!I(gnSs%U(2=>KxT(Ex>oJIz2by0)ulkYB7 zE0la`Ov|kY>rgTG=0O-Yv$~H3sQintmgr?t?C<&aqc9`>liTtn!eBoV_Gjpd|MQ=`sX)YT%-+DrjbC{QwIfF9R;Kdue{05@7n!QR7FcSs96@a*0phBF-U1jH zY^s}9*8i;>NFDH6CvC*Ave|wP;BWul zot-KMTv@0svj^OgS-Rl(mqp7N=@=D1j^#1{CPO|<0 literal 0 HcmV?d00001 diff --git a/modules/microsite/docs/dev/adr/process-files.puml b/modules/microsite/docs/dev/adr/process-files.puml new file mode 100644 index 00000000..2c5330cd --- /dev/null +++ b/modules/microsite/docs/dev/adr/process-files.puml @@ -0,0 +1,43 @@ +@startuml +scale 1200 width +title: Processing Files +skinparam monochrome true +skinparam backgroundColor white +skinparam rectangle { + roundCorner<> 25 + roundCorner<> 5 +} +rectangle Input <> { + file "html" + file "plaintext" + file "image" + file "msoffice" + file "rtf" + file "odf" + file "pdf" +} + +node toBoth [ + PDF + TXT +] +node toPdf [ + PDF +] +node toTxt [ + TXT +] + +image --> toBoth: +html --> toPdf: +toPdf --> toTxt:[pdfbox] +plaintext --> html:[flexmark] +msoffice --> toPdf: +msoffice --> toTxt:[poi] +rtf --> toTxt:[jdk] +rtf --> toPdf: +odf --> toTxt:[tika] +odf --> toPdf: +pdf --> toTxt: +pdf --> toTxt:[pdfbox] +plaintext -> toTxt:[identity] +@enduml diff --git a/project/Dependencies.scala b/project/Dependencies.scala index c94f29d5..438060d2 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -36,15 +36,20 @@ object Dependencies { val ViewerJSVersion = "0.5.8" + val jclOverSlf4j = Seq( + "org.slf4j" % "jcl-over-slf4j" % Slf4jVersion + ) + val julOverSlf4j = Seq( + "org.slf4j" % "jul-to-slf4j" % Slf4jVersion + ) + val poi = Seq( "org.apache.poi" % "poi" % PoiVersion, "org.apache.poi" % "poi-ooxml" % PoiVersion, - "org.slf4j" % "slf4j-log4j12" % Slf4jVersion, - "org.slf4j" % "slf4j-jcl" % Slf4jVersion + "org.apache.poi" % "poi-scratchpad" % PoiVersion, ).map(_.excludeAll( - ExclusionRule("commons-logging"), - ExclusionRule("log4j") - )) + ExclusionRule("commons-logging") + )) ++ jclOverSlf4j // https://github.com/vsch/flexmark-java // BSD 2-Clause @@ -57,18 +62,17 @@ object Dependencies { ExclusionRule("hamcrest-core") )) - val twelvemonkeys = Seq( - "com.twelvemonkeys.imageio" % "imageio-jpeg" % "3.5", - "com.twelvemonkeys.imageio" % "imageio-tiff" % "3.5" - ) + // val twelvemonkeys = Seq( + // "com.twelvemonkeys.imageio" % "imageio-jpeg" % "3.5", + // "com.twelvemonkeys.imageio" % "imageio-tiff" % "3.5" + // ) val pdfbox = Seq( - "org.apache.pdfbox" % "pdfbox" % PdfboxVersion excludeAll( - ExclusionRule("commons-logging"), - ExclusionRule("org.bouncycastle") - ), - "org.slf4j" % "slf4j-jcl" % Slf4jVersion - ) + "org.apache.pdfbox" % "pdfbox" % PdfboxVersion excludeAll ( + ExclusionRule("org.bouncycastle"), + ExclusionRule("commons-logging") + ) + ) ++ jclOverSlf4j val emil = Seq( "com.github.eikek" %% "emil-common" % EmilVersion, @@ -100,6 +104,12 @@ object Dependencies { val tika = Seq( "org.apache.tika" % "tika-core" % TikaVersion ) + val commonsIO = Seq( + "commons-io" % "commons-io" % "2.6" + ) + val tikaParser = Seq( + "org.apache.tika" % "tika-parsers" % TikaVersion + ) val bcrypt = Seq( "org.mindrot" % "jbcrypt" % BcryptVersion