From bd20165d1a4db86e08f68e3d2849536a864e767b Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 18 Jul 2020 23:04:01 +0200 Subject: [PATCH 1/4] Use given folder-id when adding initial fts docs --- .../main/scala/docspell/joex/process/TextExtraction.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index 384741e2..bc048467 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -36,7 +36,7 @@ object TextExtraction { idxItem = TextData.item( item.item.id, ctx.args.meta.collective, - None, //folder + ctx.args.meta.folderId, item.item.name.some, None ) @@ -47,7 +47,7 @@ object TextExtraction { } def extractTextIfEmpty[F[_]: Sync: ContextShift]( - ctx: Context[F, _], + ctx: Context[F, ProcessItemArgs], cfg: ExtractConfig, lang: Language, collective: Ident, @@ -60,7 +60,7 @@ object TextExtraction { item.item.id, ra.id, collective, - None, //folder + ctx.args.meta.folderId, lang, ra.name, rm.content From da68405f9b89055679d0857e99760fd711533b5f Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sat, 18 Jul 2020 23:04:46 +0200 Subject: [PATCH 2/4] Extract meta data from pdfs using pdfbox --- .../scala/docspell/extract/PdfExtract.scala | 2 +- .../docspell/extract/pdfbox/PdfMetaData.scala | 31 +++++++++++ .../extract/pdfbox/PdfboxExtract.scala | 48 ++++++++++++++++-- .../extract/pdfbox/PdfMetaDataTest.scala | 22 ++++++++ .../extract/pdfbox/PdfboxExtractTest.scala | 19 +++++-- modules/files/src/test/resources/keywords.pdf | Bin 0 -> 47863 bytes 6 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala create mode 100644 modules/extract/src/test/scala/docspell/extract/pdfbox/PdfMetaDataTest.scala create mode 100644 modules/files/src/test/resources/keywords.pdf diff --git a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala index 6d2d4a7b..839b0261 100644 --- a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala +++ b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala @@ -37,7 +37,7 @@ object PdfExtract { for { pdfboxRes <- logger.debug("Trying to strip text from pdf using pdfbox.") *> PdfboxExtract - .get[F](in) + .getText[F](in) res <- pdfboxRes.fold( ex => logger.info( diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala new file mode 100644 index 00000000..7cff3b6c --- /dev/null +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala @@ -0,0 +1,31 @@ +package docspell.extract.pdfbox + +import docspell.common.Timestamp + +final case class PdfMetaData( + title: Option[String], + author: Option[String], + subject: Option[String], + keywords: Option[String], + creator: Option[String], + creationDate: Option[Timestamp] +) { + + def isEmpty: Boolean = + title.isEmpty && + author.isEmpty && + subject.isEmpty && + keywords.isEmpty && + creator.isEmpty && + creationDate.isEmpty + + def nonEmpty: Boolean = + !isEmpty + + def keywordList: List[String] = + keywords.map(kws => kws.split("[,;]\\s*").toList).getOrElse(Nil) +} + +object PdfMetaData { + val empty = PdfMetaData(None, None, None, None, None, None) +} diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala index d44e2af7..233d7c31 100644 --- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala @@ -13,18 +13,33 @@ import docspell.extract.internal.Text import org.apache.pdfbox.pdmodel.PDDocument import org.apache.pdfbox.text.PDFTextStripper +import docspell.common.Timestamp object PdfboxExtract { - def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = + def getTextAndMetaData[F[_]: Sync]( + data: Stream[F, Byte] + ): F[Either[Throwable, (Text, PdfMetaData)]] = + data.compile + .to(Array) + .map(bytes => + Using(PDDocument.load(bytes)) { doc => + for { + txt <- readText(doc) + md <- readMetaData(doc) + } yield (txt, md) + }.toEither.flatten + ) + + def getText[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile .to(Array) .map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten) - def get(is: InputStream): Either[Throwable, Text] = + def getText(is: InputStream): Either[Throwable, Text] = Using(PDDocument.load(is))(readText).toEither.flatten - def get(inFile: Path): Either[Throwable, Text] = + def getText(inFile: Path): Either[Throwable, Text] = Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten private def readText(doc: PDDocument): Either[Throwable, Text] = @@ -34,4 +49,31 @@ object PdfboxExtract { stripper.setLineSeparator("\n") Text(Option(stripper.getText(doc))) }.toEither + + def getMetaData[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, PdfMetaData]] = + data.compile + .to(Array) + .map(bytes => Using(PDDocument.load(bytes))(readMetaData).toEither.flatten) + + def getMetaData(is: InputStream): Either[Throwable, PdfMetaData] = + Using(PDDocument.load(is))(readMetaData).toEither.flatten + + def getMetaData(inFile: Path): Either[Throwable, PdfMetaData] = + Using(PDDocument.load(inFile.toFile))(readMetaData).toEither.flatten + + private def readMetaData(doc: PDDocument): Either[Throwable, PdfMetaData] = + Try { + def mkValue(s: String) = + Option(s).map(_.trim).filter(_.nonEmpty) + + val info = doc.getDocumentInformation + PdfMetaData( + mkValue(info.getTitle), + mkValue(info.getAuthor), + mkValue(info.getSubject), + mkValue(info.getKeywords), + mkValue(info.getCreator), + Option(info.getCreationDate).map(c => Timestamp(c.toInstant)) + ) + }.toEither } diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfMetaDataTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfMetaDataTest.scala new file mode 100644 index 00000000..b3cfb12d --- /dev/null +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfMetaDataTest.scala @@ -0,0 +1,22 @@ +package docspell.extract.pdfbox + +import minitest.SimpleTestSuite + +object PdfMetaDataTest extends SimpleTestSuite { + + test("split keywords on comma") { + val md = PdfMetaData.empty.copy(keywords = Some("a,b, c")) + assertEquals(md.keywordList, List("a", "b", "c")) + } + + test("split keywords on semicolon") { + val md = PdfMetaData.empty.copy(keywords = Some("a; b;c")) + assertEquals(md.keywordList, List("a", "b", "c")) + } + + test("split keywords on comma and semicolon") { + val md = PdfMetaData.empty.copy(keywords = Some("a, b; c")) + assertEquals(md.keywordList, List("a", "b", "c")) + } + +} diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala index 1f436b25..b72b182a 100644 --- a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala @@ -17,7 +17,7 @@ object PdfboxExtractTest extends SimpleTestSuite { textPDFs.foreach { case (file, txt) => val url = file.toJavaUrl.fold(sys.error, identity) - val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity) + val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity) val received = removeFormatting(str.value) val expect = removeFormatting(txt) assertEquals(received, expect) @@ -28,7 +28,7 @@ object PdfboxExtractTest extends SimpleTestSuite { textPDFs.foreach { case (file, txt) => val data = file.readURL[IO](8192, blocker) - val str = PdfboxExtract.get(data).unsafeRunSync().fold(throw _, identity) + val str = PdfboxExtract.getText(data).unsafeRunSync().fold(throw _, identity) val received = removeFormatting(str.value) val expect = removeFormatting(txt) assertEquals(received, expect) @@ -38,11 +38,24 @@ object PdfboxExtractTest extends SimpleTestSuite { test("extract text from image PDFs") { val url = ExampleFiles.scanner_pdf13_pdf.toJavaUrl.fold(sys.error, identity) - val str = PdfboxExtract.get(url.openStream()).fold(throw _, identity) + val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity) assertEquals(str.value, "") } + test("extract metadata from pdf") { + val url = ExampleFiles.keywords_pdf.toJavaUrl.fold(sys.error, identity) + val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity) + assert(str.value.startsWith("Keywords in PDF")) + val md = PdfboxExtract.getMetaData(url.openStream()).fold(throw _, identity) + assertEquals(md.author, Some("E.K.")) + assertEquals(md.title, Some("Keywords in PDF")) + assertEquals(md.subject, Some("This is a subject")) + assertEquals(md.keywordList, List("Test", "Keywords in PDF", "Todo")) + assertEquals(md.creator, Some("Emacs 26.3 (Org mode 9.3)")) + assert(md.creationDate.isDefined) + } + private def removeFormatting(str: String): String = str.replaceAll("[\\s;:.,\\-]+", "").toLowerCase } diff --git a/modules/files/src/test/resources/keywords.pdf b/modules/files/src/test/resources/keywords.pdf new file mode 100644 index 0000000000000000000000000000000000000000..963fe42cb1437cc479b390049be7da7ddcf62cea GIT binary patch literal 47863 zcma&tLy#`O7NFa*af*N0wr$(CZR?b6+qTV9wr$&X)w>V(MRasWbmwU2BnRJU4_1;Y zh=|cL(y_vj&MghE!LSlA5ZD`8!SL|F(94+GnY&mJFtM@`{J#f=Ud+Z#Hs)-arfL&jBsUi+i@w*SoWGm#;Pp?{l82L;$*oyvQF8o)_%SP z9(vJc7X8x30%1+sv4T*}nhrAG^VWp9-`~Cs|f^S`YS|xVc&v7$+M@&dm*h1j|1;s;aK5rv?{8z<&Hr{!czEh}JsydKajR zt^l3hOR5`?Aw504BR!(Ln!CK>N}s6K$_h|@4(q=WJQMhMC1rL6g%r@a$%+f0qw1WR z?EnJ*&NA?^9pro)d&}!9!)c&;M@LBipKBl*-RzpH-{J9~XMB(uy_K82z9E|}-E1H# zrVZpqtI7=^OiDIeN}#!Qn0%Vwwk^yn!Z(1et>yk>-FZ6j-mezHo*!-0uh#E$=jYd^ zk?6r5kjA=pATat$>&uLU&F_M2|N02{zt7wHl(I`dB|m7-uVcvk-zq}Te-j%g^cyH( zO5j|Ypl2mI**d)aD>p!B0BUP@bN^!4F&KhF&{ntB#?}u+tTPkDhwtBmU&sO=h@WNQ zf%TWVz^h+`b?>o_PYvyg&7fZ?J5aFil#9If+~>a6$2zg!+Mi9BJNG&@x*3AqU&}Z5 zbia*;3TkO9ic041SGXU^Ne=B!jdku#kmi4WXyBdN-|TntTDTU!x7Z(uDZg8&{)c{l z(FoSIcOVUhWI|L1a;cRJtMhGpS1rqbTt}d9wgQ{HMBAj0WH@6v9xG$<>oK$?sUKfwoZ!e@G&n=? zDOS8?^cl>q2Bd3ghH~B23y7kG+!bmqp{B2-D*3zcM6l4Ww?Vn@W>z={)3XUeb9ViW z!U%dX*aIQHb>@X7ZF{^Biq(BXB?$aH3w|nH)8nlXIybHW*Ahjs*GOklxLM6d%Zq=a zY!2Y;z?Q*EK~K%s$OhKm{tnh;zs0s{(Sua&tRxB$OCrxyfNsNGsJ>v#*f-nv#mpA9 zmw*SSO`<%aQ*!gT^;2fG*m95A#6k*wl-k~2`nB6>Bbc%hZ-Y1lF-H5A91-ei#Os*; z)1{ypL>4qC&BR+w>z;g;+sZH2vm`I_{>6yP|KbZxp|_^q?AkQQ$B^sY$Nqa5jgQp3 zi|H)}cWfGF_CCbG@`jy0~jnT>%*X%1};_JO|@;q1B_G()BI>vN>TNdry!;y_Bgu+&+WZk__ zvoeo&Wq@iqQKtCBmn|}X^C8Aq2)!HsMHz2JFpyx~+Jxkb#Db0amV}i}S`2)I=oRVb zhg^jf8eZPyf&Za&XZ+({i>wjdIDS)86}~4D<(<%(@fBL$2-~0bl$g7rkdVvqQ_(oI zm9A)}D~rG{N!IQ30Ud#H2zS2htn>2_PA$~@>xyhyp|S%EC#3TOgpmtQsw=72jSlaU z(An9TYu!4U>qMX8kD1M^L%`#3S>HFmep{}4m@HjHHMvM8g*(1m0(;(;*{1In3qgx_ zz5GQH8UUdRQc;_aw!aX^bDQH<#_S-CWs45e>f{#_IggnM!Lax%cvguTvz;I)E08dR zgEyOue)CZOQ6-=fh>Nj6D&^iMe%ewN?<4YWLqPViZep-3Rco-7wkVCb^^>pzABD&W zYr}Zq8YQ@`b+Fv(%C{c>OX->TV#``u^_Mu}+B@UNo(f(pN(q4GChC zz|nX{H3_yrBjT_jb(=LFm$z9`IL@montkg8M&?^pk3$ok4G2|0YxU*vH+ynm{kd+j zBUl~nU^O}2Zb<|pX81f)=Xj&cXO{qNInogkyW4e}n#r_$q-&rt;LCmZG1|OCwW2Mr zSG~oDud@vRk!1YIeXVBqb_rbO%GNT}M{FX_EeqW&I)COHz4FvEin#*deYJ!q?sB;A zhI6>tVb6C+kYGUp6ZCxu7(HPQ|9KE|?8R6C*UH6pSxWH@$3+I;AxSOTS3JY}!ii|) zBz41+i1;|WZ_HnAJ6Y*hUsLC;#CZ4w*np!PjjXkPesCJQi2$JMjH%xmlKCymN2*0@ zf6TWd{EK6+R<=O93VbP!*AkPL)TrJbk%px`dT)FE*anX7CW|Y~PGnQ$q~#4NGwCK3 zmX)D4OeG{@TSoBBM{`Q}O2MQSquytDF$Tg@AXsM>kIryiDSXhg_z&qWyiRW=ru#s$khiaR9Ohd6M zF4{GdDBgGY#-LscT8>@j4ylxS){x=tRq`HOHxUua4xo&x5h2gamGpl%87mnZs1V5l z2}(?$Qp;F3Q=_V`JseX+bt4rP`K)8768WNN-$(JAm%O6x#587=`NTv?(D@FG9Ipo9 zaLqb#2XRGU-0(iyj|Vvrw|`#ZViOH4*=ssLa9GPRscX-mKm^uo=F{$w@~5J+*<=C9 z#uVOj$p@uW;9J)&4KBbkh!>qQ?$s!SwJtDPaD}Ip&O*HQQ*yUMv`H4@2d4H|xtP)x zKt@EcHA3_-Z$*MMV243GZqZnpO>SjRvgk@(1n!m|@%WW8m7~WsN1Wg8d4pO_m4Zmk#qv+qY zyAj-5aszLCDJ`;?AP)h!_xi{Am!AqKKY&YsdRY~KTp@OS^n~G4csQvZXAsR8#-KYA z>Ss!?=dU~eJ>})N2%$5Q=4jQ}lcD9zv3YFNrUm=h0_B+4;&0)+N0VMX%Oy?mE1HT9 z*AxdcTzrjEKU-sZI>axi1`}}z29O(VA@x7W-&alvG=Lvfq4~X0Y62=kXvnt4M=w!{VxTey!PXcQtTsW8q<}>|zGb>65w0gA zi)l%p$(wcJ4cQVV^8tEEnL=&^zFFMg3z9?kv1ACEv+JK{NJk4?M&2 zHcvRQskeu)f66=o0shq%{?Vmv3SZeHZm?_sE zD?~+0G!?yj_OigSH$>-INVIjh?{0lIF4+bF6H0SnrSXU?0pdGt@@h3tsBBufkTZrg z7gEs@l(1=tXVLZ7ku2M{h>Jz;>B#+oCZKD^Qb)J-?}+sfgG%28)cV6uH)n_9BRXoW zGSk9|yfn$nCU;9tolEj9jkg!zuE}xKLzMTeJ<-wXV*Px*u@BZi+ZUBj%b>cATup zYrb$4XJDQoglf-w^#xt-Ja~J#%F8%j2)aBIBe=zcSr%hE>iX1u(Xc@GO^FEy-UpS(=SBjn8P6+WA47u0!G?+rJk4_1Y#>kCs z=Nkn@3>grfZ(BOS2bOW8ST*CB7d~?ay9SmM$<_n~ z+`p^bdx3VJ5It+UZ;`)V)YnT8bPtrVz`W^pO08+0oZ0(gbuZjF0K8d$**7Bb<}W&C zUjKf~oL!?qAE5!Ct*mm*kvu}%Kb$&$JmFBh^_?Z#nnDRR#aUX(GuhLXNE@3^bo`mJ zG45;yO)Y2Bhy^)V$MrKWhdm~&*X`{hT>+tQ_*}@_08^(KT+jZ*mQK6fTV(E#S=NG$ zzOu#&!Sh69RhD~6GdfT5BG^e~>TAf!?tcYWuXuDmk-GA(QI$rbR#R&{*9*~P262xkODBu9;%uW;%VrZGI5+{VsE8M91b zAj!`u>IM`uPh{s^P;pET4AJbJz;J|M&{=2o7UK!dzn%vMTMrO*^7p!8zeP`EICnlu zy;KFXVhZ(%QG`~@S5KBd3>gw^NT#67XJK?8epJX-w-@N>c3rm(?a<~>X$z`~Wo0fN7Vky@|-Uo{7hB=K_v`qPy{ zPm5rUt?qb5JI}~?jW+mtMP+bfc_rq|6yD!$SW?I{^MFFM72t(KSaPSGk#GJUxGuqd zux>kKY1^T{zs~@s79*G3eutd+*E!ICZ)@3uI^^AyJes?l>$}4r6Fbd1HQn=me`ouk zaX?q#ItmZ5$;tZgxQ#)hP-O=p(upHRzb+}(J_4O5b2RjD)3_5P00 z3YB=loVP$v_s86)(rZvy=fl2YbW!q}oO{LEP;h3vu9*c)tSIof&wai<8SeIpf`^d4 zId8lZ#lLWYjyCQ0Eeu~ZjDtWbJeZ<`U=EtpM_IMDis<4TVY?AUe9^g9tb6ptowAJ+ zZ|G*z_p0&wXrGrw0q|~e(AgY2<#|?l&8|*b#sJ$!m&z3%`hKO}1Ba56Oro7PyKC~j z3~5p&wqPafWGjh13Fu^xqY@UN{-qx*msQa*Y zwQO?gDw$8j_&!z}UU7_Sxw$8zjD~coM8-^Iml_X))eh|r|81VwgcA!AQ7zYFTTKWf z!j7MS%xt%a_PQ4sBgN;NQtAl*Z(dU!b^F~Nrg3aVTPvJ0`iFIGez{Q$ZA_GH5o47P z*)@-=Mm79aYCP_g#*pO6DTcWx=4@A1@BM&VCrp>!#|>{#ZP(j~(es5w)2JE{93 zyO={P{&W`o5Ho%=KcoeMpzvc)g^G)hOsxdS5%fXyRZI~e`dM5bBxZ%0Z1VfUuqLwV zn4+VswokenA9V#~PR@|f&d8mrARu3}UV;ogf}d4O_gDc;ODTTXJD3gA!hHEOz}-0W z5bR1EJ<$44KGxOI0KM;eJh!42Afp@);ZgcFSY_k>ioswc2zPYIM1+-cG_8#?pa|Ae z(B-u64+kxwKmuj{?W$;Ykp{r%h07Iw=nT?^3A>?cs;e(v8y7J#+Cyn4LH^3nXo4!x zumph7EYYoDW>xkv`k`GI19e$=2d$PZ=>;Z2*En5uO@4Y_N|ZvXNbl_T98$QlgD*K) zr5C2(Ht~Ix=Z?EEE78^T;Th4V9`OA+z{fyJ^Q|6#+?nNPhk(B8*>JX z`O`y@?v8G4dR*4o=x|JcC@9Cx*cNW$%M1 z!UoFi`20yK&uWCNqSe(wukB$RU(97WeNv;g05uq)HXU`3(gjsi@upUWg3!1;3d;Eo zq?^ZUxt{yoIl@)k12wl>QDm92D&e(8EHswG?E*ne2g)!Z@?SSiv8 z{?pm0wa1UTZPYjx#Q~D_-ADGA7K#Uw{a_W^Dz}XQ0bkYmhuKtVdN2K>VtLnwf;lX; zOUny}myZ|~kUi96U6LJcdnzs7c?V3@f?&N{2&Gx+SkgoJ@aHbe*;%NrCsvaT&B6`) zQ3o%|nCe2Bo7=R}Jh&E2geM!aJ*`1@-JVlfxqJP=68DIIDF*ssFM2^Ir|d*qWPGQu3?s$hX1duIG^tuMTh z>1u6oO*Jsr|Z&M+nrzdh!!7j8YZrG$Ju%ShKP?E%)qgc=vy|)7C(3031t^Q; z-RX}qg&Um)U(1%Vg%YDy<|6%9DHtm zwkwjLq_bnlLs?bkM9V6`$_yWlz@v&8iWd*t4%}RIZJVtX(~2JDXVEacv;k}n@$h7T zmcQ(^zzM(Fj2Dp_C@7jexBI!=;BjRb<(eYDF{IQ&IeRQk;oM?9_ zGHS9wd2H)4Q|r42AY-8U)2hHsfxNM4qK~_KH^?#Qi$LXHe=Ir_q0?szatxA~zd-%_ zq(w<-g}mRIE^3~wOfM>YFjnf> zex;C5DsDDPW*%ooJZWL~+=UKd7|u|#(NpdndB9MCBP0tU%WqH>Ox=vw1=}HdO1Sws z!{>vcId+$@kZdY*v=?yHQ2WFxmx!=YVID&CL}5_`aqnYxWVQ#UZIg5np`SsD)ptiP zZKy}Ji_;EzrjoL7UTP8_&5P7{VNj0dg<*C!<{i;4Ssh(b zLw0>6*8+<6`C9*jn}@Z`h(K4-=x>D^mzuof8-6>(oz^vsoiSW|==rR+G4`$4G9*6lWRq6_JVU9wUghY;U4Be z@_OJ*br*jfM8g+HTOlzIZnW!B7&*5##m^Yb7&_arz*1b~?_JQxYs~PxK9n+l*363B894v@nNi;`Nr#uact-xg%0M-1+R`%=nVJTF)zR$6Oc3ptG|T4mh}`=u zX9J0>2r*;jub-W{h;`pLVv5JWmJXX*D}Gyx^nL~xR_(x95$2&+Of>VUUr2-FwuXVR zNCCXx8&cnG(q^FA-^|#K_u8D~!kfgnY#~FKwaJbSzc17L&{bLvMGLn$p%HVP0}kbG zR`W6X_KRnIqt%FLAbGlX$ib4`chn7u$JBpC5Y4YxVsnEp>hmpin9Id|Av+Jb`Q5S? z?t{{UTa4Og(Q@v*xZH@!dE#z;zPOkhN`zv(&oM49!EqIIGOg~&-g(fVLrXeP zuuB(xkq)#QdcNE5#D{Hbq$_(av*;8MU4?)T;rzm9QvlPOAgpC45yUoOTUajEqRW)0 zveIiuJq4T)TbgYvd!sN@UG8CI7s`x}ztDfGUh>r`yA55PzSWwjkyq-H%Zl0oI1p5f zG;9P}9M3S2Ik0z8g1lS6$>;x3JY*oetueudZ|vv%S_%~9tG8X24p%nU1wGpcqnrv& z^Sv4ddC6pH3)0PRVtP6lM86W7_zPr;XO0A&AijE_Hq+{VyxN5I{PW@?7KQzhB0*Ha zq|&3YJndQR&^>buM8-BSC_%Skyfs%A^Z5ki|0 zkzr8WnUiX+u@w8w2YhM{sry5bOSOuSR{XM9cp6_c{PZvFxMHa`Xt^^~LL*3jq(+$_ zzqHUb>2&%!`(vc(fO8d$Fl2GPwPv6wYZ=~MSoc7}E{TjJh{-NeE(j!4uR~F9Q+tN8 z;IqfZcpEI+=-(rZZ>ll`0e8p~cBi~U@!Ed9eCBoB`JXrWB2bRTlXVmTxBWi^q9c5h zGOJO`cLQM6}r6B;ic1A{zbvW z3q*IqyVhbp2RrMA$n)f*HvAjPs*~NcJ6SNqJEzmyJ=u<`$6~W<)Eg#`%2{09=vM0`4@vmcY>s0Jrsj4a+vMO45Y~LXw6SrdCO~}gk4);@=@46u~ zfe*8u4A3-?77CBjpIN$crQ7qAWWJ5H6U=HsS9oo46$P|ex`$i6hyU;qnr?n@o|=Wr zk6pAZWz#>ix4=X;rMiVahCC3tUTVIMx@J`llP+JoCZIc#qqsegx3~3$rsG>viOpJw zv7$Jbe%y^W>4(G@WsI}P;qnc@n9L5^c)c26KtQyl#|5*&1B0+$5r@=7<^ z!W9uB?pq;KbK|wzE+oR2SrB@S&uQqX^M6%fvO7Ulajd2PfiBQ_j$^kc)V--fG2r#Q z7~jv>vWzS{592uZA6V^Q-?21K)M0hgR* zRJZuNOMz`th)Ci;pE;Oco7rGN0p2d;da#_0^i|uS9FGami#eEMqL{UET+PPsqq+~8 zj$*}c_G&vO8j^;|CMU>pGNR$!hGvSVe0XSf($GpYj2WbsjDeed|Az2Bhowk8i;6?Q z_?3jhgc8&hcE{`~H$=uph7_ew(~(T=?k*5n)h?Em#^>GNxi~->cgsjJ0%8uft&6@v z{r!a{@twXD%C#jX3w`Z20h{(|=}1;FWb?406FrdXp(#>kMsbS=ms-b}i0hmr3MemJ zS%p+1N-8bAzA|`FG>V9-R9C4Zj(hte$paoDUYEH^e?nhoCnb=2*aY*29aVtXl*7M10HsR8T zrKy?-cpK^Y!}u3YMuxJ8I41X#BBv*rRMzXv&gIaKrY48%GuY zDv-nrwzMSBQAc1KE2R4@v&~=#h)^d5rI@$BOm_mORjNua@}_FMrwWrZ=!l^+ou2kD zuS#o1smV;%_8(Z}2R$rj`zfy-COFwSZNSaqws(;fgoY0}MeE9t1Lzkx9(Z@TmOFD_ zs+jcC!WPJanO52`igpTIzC|0E$4%o~> z2=qR8+i@?hj4UXgSvI{;3htkj5+8Z)LC(<6ePl}+l3AYnZ-bq$llrEqt9reE-YP61 zSw@*=22(GcE2NQvJ z<2P!YVRLNDkfM4vBfO6zrTHD~ z`Ckr=8F?P2ekaA}ucuDe9uagH6(KjaZmLoT%f?rhzR%9T=-8(N`cRZA#bi<1^tS`v z>O%C&QW6kZ8G=x4VlvsY^Dt9Wt+Jnm<}y}Wa&^bQAsnYTGS6A|FU*R*n$McJtI+P$ z#zDWhBl1TiGCb4!X*CJ_xJU!3*BH*1vW6tFC1sOrVcrX+_i(x3J_@iu-!hb0cJHz5gS zjp>y*HsK_8jdcQ9Lc_G_%!$z#;sSeuWa@zfkx+5f?Jl7}&Bh~5d-674&y0S9izkVo zYMN(Ax;{M|ZHQuy2)yMyY@H@npXBIcx}((HbCOR7#SULn zw`u;wIaLYC>9&LcOuKT{*z(mY^QQskfQWn;{&aL{>m7m(A#5vUPVx9uSX;Gqy} z_@$|YbI={82$4~(`qecT-=VZ`9j-b~IpxJ@&@aVW&OJ$-zG&|DAS&%)WpGo;rVH&3 zNG^(|$lJ6d1@m~;l;)Ezv^}OVvO`s^>5nDAvcap3;$3|OK&Xv(_bKW~J13$J+cMb6 z>}l6XAy!$ObVE|YNmI`bTYV>*p?GEo?t4lI_Xzs1{h&anyeNUKc&ZF~I|_b193jee zjJlxpK)^7UMrqy6vK!srcH>-5@Te&ra0?k3z4(hbKMOu4#{Z?@`~S@z69FSL zBMaMq-QNE|_?X!l8UIfSU#qi2sXjU@)~4G|tnDTn_X+mq`Pvl*?&if>>wiq&t-V=q zM%_(~_AEcP^{S?Kj_xPt5kJY*wayX`uK(226r2nXkSa8qrx}@@8xRU8H4bM-Im+DF z@WPJV)EM{}=p0xkh)MtgkQq7)s!LquzKUp7P@sIppKq|i$67US*ncqQf zEe(ncjo={VBGYu+S1swzxrFhf)ZUA-$g<7Iq&=v zjNjxY=QM`rz$~hbF7M)2%*0uKBp>cNC~tY*36Pmuof|)8cyDodzgy)MJvlL_znl+w zY(IYqib~t7tD~W=erw#;E}KY2)+gP7!t7tf#ClLzBBxNFKG{Y(cmA*5#17(ypk=> zH}s+Y@orEhV@Ab)Q5*NCZL8MTZ{_0#TRZ81MtwB`g#|Ixc)L}i-&}{NePB~I4*l_B zk-(D=x8sIeFTPVTK?b|9`E@{HX69mHXCWb%+yE5(Td6XGxSRILC z1NR4fk7`xR>r~fFsdIvL&9s7}6uKgpDMgp(^3|a?=$(yYvLr!fcR*WuDm+m3QX5pT zP{1L|&q_O80#cb0V&zIBAQN~hx0419(pJm)B@7CUPO=J2#B=k|S#xs7iqM;86&EmA zm1!e^es#;O=+FVYT2W(HEXy&dXu(qFC6*U&gw>$zK^U_f+G&qxi6~t8w}&Qc3}FS z-9(|AyK%zlUE|uy?04YTKTaC_2_4VtCfx9(Zzz;o3oo}$H@K#-V0=Hl#LlKew|#tA#-PZs4qA$;S13Un1b>%w6&s+^902{A|JGoX0aG9VWexc(U~wI^}aO6k!>#c@q*AI(+t= zIN0Eg;U6Nu#U|X-xM;$k^@%Q#+O4ZX&N{5rX)r<_DMYSDX*k2x??aTecU2E_`xLM% z#{F)yQcDoHLP2PBfx_Su_#h+navd-J=7Q^6*y|^`91|!GQP}$XyDhb*(vR$>J>aIS z$_(>ucTPv4m3pXNFiYLYz=e*=#tmgUy7Aa+{Je{_5dJ?Yt*?q83VzRQJUN9BzM?f_ zn1g<7%wWjKUK#tb786REpWdB#ru--GQ}LpCSpM!1tU)`x9FME+vlam?Q1M#j#iVFu z@r@{l{0ID}#WYNq^}N=lY0-DIM1S7K?xZl-@EaBbPwK=<(%T*N9**It#Vf+orw)GU zyq9%{u&;)PQhq;UJ=(XVM?QHY&(#gYADJ;wUnVC073Zk%P z+6GjK`nh#h0`wLi0v-?|QmUbh;tX_uaE=2J)bf$HB*Yc`kdHW?fti%DiTi?wJa}gD z4J|a@yw@9GPxUdXge_B~<13i%vj24Y^Q6-hwjI}!3`U)EmAfSZK%|6-YhUSnsYa@9 zKY67opGn9!=`6$9-`c50pB?H3uXssI5{^!_SuxpI_6f50v9tdj-x}fXP9D36i*ryo zu+Dr+tD~>S@Nd;zwaDwwtW)PMP_W|6?oHHLr=r~D!jT>CMqP6aUX`S<2``GF4ecX; z?>bK*z;Wt*HbLc)Cyp&yMvsH}^Bn>b`7CdfH zad{ww#CE~a;JwU)7g#6eSrIg-9ki|dW5?%;>SP%$5)9zImXWA!X@$omqSx8Q2~yj~ zuXro{Q-r~v#1)}IxH_(L5SY8ACCGFdo~Z{ z7Ie~5!LF*xto|lD z7;h85_%;P5$!0&-()V}e)2VhhBunxu0Ci#oafjz~Zak>H>JmQ7RA#3KvECzIeMj&h z&b90VWQinz=FhsthWy9|dx>}E+f_Pks(!z&!=(z*VX>fP!9qPGvY%|I3=2%edDMtJYp3aah8FZql@w|G+HyjRI75WD=|gr0zR3O=YWFCJ%txuR1bCOGusYY1ZWAggqRq!$}H+`mgOY6aA!e zDi+8V??>e?=`=W2Ifym$^+ZSanB0D2*F?zH!&r!$B}_-h-jQ#<)#J>UB*iTvF*By9 z7l&gecp^W|cc{ip7&!#QGm56ZZawd`q--^&n1b0BNL;%Ab&kLKU$E*SObEf48s^jq z3|iCT*B|54Z0w}lifje{8+}kY4kMu6(mM}+fgZ}vTQ#0lGj@WN-2CQa9#Gi+eu2I# zjgp9vHTldnQ}1-FVB>XZ@l=_Hp0q`cU0k5)DmP+-#aS;(&UC=A<$}P#ycBXmYu_fG zxQU3NB)`MP(33#J$Vyu?XBjSHW5YidD&jIt65Ix>t1y>r!O11} z8O?xc0?=eTriBz-1h#G+XE$rUr=-&T@pmbj#1bR!jxB!3o6z=MoWUyhd^Ziy<+2sULFMa zu1g3%vX(L+&0u}fdZ3XYDbMPZ-{h7wjV)l>_H}w+Bx7N%CSR-m-Dk=vPkXPQ3d1ci z$$QU&(DSs9p2^88E%Xa`<5_qwZt2RK3{uES!rP2`=WRJt9Lf8+1pU^AdRvL-&X+p2 zI5AH`l#dic0!9UVYm>Q>>`*OQJUaklyWnexuSsw>N7Y4H+6;^q;X?}= zcRGi$&{D8Li$Jq!cD+LRgYQ&gsFLnOJU-$Ma;3_smP}IegM4p2sw~U97}x;>vqry& z|9$umQF^DL*Jn3f*47lQ-$$dx?guoPR@BC^8?bA%#V+yBplZ{@V8n3(BO~|DvkFZO zEXzlus-uMmjf2FC`lf*C6TYo5n%CukkY9m zUC_F;L%L4j0?U40Avq6i3QV)cpK>K=Y>zmL?j9(7R}_%Nhg18YRdCdQfr!jpVD2O6 zh!eubu#V`n>sT{B6EQINhM?|tbHak6z_9;F7t=Rse=pHD{8RH4+sOsLV`|O-IS=!k z`|iQQF9w>m-94xtnmMCXXyOU*98Xa9X^0NlA`AI+4m+Do;(|9$?h#CYU9dbm1Ioc~ zgl}$r%=!g*5rfpfJi)bT4dE0Nqa^h|!);o&6?j7ywY1Bd(f z&xMtS`W!z{SC|jPLP$)3rD9YV9mbjen2mo;h1gG=Ux~3)$sPIblTsa!tmL_nIJNa@ z%hJX=RN_PG8Qk9}$Yj?|6lNLW8#q>>ajr_Xh%hzT6gD#}8S2>YYkR9N(}EOIMv!Gbc?KnypBZjcI^pYF5EPX0Rl7}1s{`_u z%xEGcG`Y7^4@cEuJaw!4Wr@MX}3X|>ZW6_*N=X4{f3QIHmsZ@F3kBySI zH=6>s7lvH6Ed9gmf0(RxjR&S3Ov_gDblwJZ?s=~GPr=bq`>d%8#TJn=h2CUhoB=~n zwPK(ATm*NSEjdQuPxrnR^%wau#_&oRglTC=9)AjZoJKD1N(OY}pPMZ_ zO(5Nv^!yxC#HSSO+~;R0pLm>sE{nVQbnaCN)UZb$MbD#QMPArA*Cj4DtG!q|H$>H` zn6KJA;$^iGI?!Dh1Ut2I@~rfvC%XmR!iS|aagmb%R~#dIP0@yGg)WdIo6EJN71m@>0{~jrBh@FjK?Fr%C{KDN>$Z9(05%0Afgdqd| z(`}+E&+GYjs|c|de|&+{33nE9CC}ykI@5d{#kS~>ObmNLG^`R*jW-KW?#{UGP4`e1 ze)Net3fs7|V(7)n7(pk1%?peP^bTJ6c^8yUGGP@Y#ClCqB~eOKNY1;1dFlGg@wN|b zb-q~6wB6F7a>B58*nTD4bTyP{VbEsr6Pqu1qLyK`B6T6$FOW5wcadQGH8YqMPrG;; z4NWXYuv$zBu)EkCj|-A?u)=4q`*?3g6&G`!BQ&o3KS{LI_6OtQCxB`smS7kT9{|UB zUpmZ*SfiL$e$Lsq39(ZT^Hj?}(vYbfsj`&`Qh{eY0@&F%YDE9#W@3%}4xu_35 zL*zm}SC3?Q9H{c`Y@boY<23ndFlo3uIhV)aL7SSQ)nFQv;GiN2Y4tx_BH|Tz{H*zI zaawXbxTI~mj?mjjGD!RHm@h>ukh$g2UsO2thQRQycMKnz>STXcY@73g>YDO6mNtE~ zY>Xcd8Q*Q6yF|K#);FU^3i5i)@G9r0*&xG`FW+C`3lvOWXD!foM*x%E+z?J{M==1z zy&!2OO5w1kV94ku8fC;l&)|hXtwR5Pg|Svg{#d59HZIwT%~Qb`q#f|MFc%xg#!yq4zpsMR&kNZ(2AX z)y=u4KJ1c5mf(2CN$VZ_m@bD8PBMVPqo!tFR{lcti5mBBDske)GTwu^{<>f}1}}*z zoNqbDwt>1nuq2nxp$l!z0I|)(m7Nl%0isc7W!{|v9LR_i9WXSiGS|Eb(WZx0L+=l|E7y&jBef`BhX8JCx9p#P4o`7L?!6OnBmTbp19p zo@}*HY);B|MMQS%*fNj0+oYc*AaAUSsQpP2vOX^Lk-TjZjn;xic3L~PXUx5z!YxLF zVoA3wWm{xZMC{pNydQ$7EMXygPxxK{7DB-GsD_?RtOl6gKt(<~SH1h$syFvk9RM zhG72vAMwKL7j0>;D2#b2HV8Q*shA>a&Va!{)90W}`grj}Z}YFTFeeY8?+;~xg|O3I zvnqQ~(?POR2jp#F3yH_%IVP;QzFuyylnlrnf8Y!aLbQGd@DSvA4>>KkM3*LmIXg~Jsc4_d6zQZ zy&u-0TD0d)jsN^BJu~L`Rtx353W35WhTev0Sqy*si} z>hRvX=+qdK*@K!!A)jLWrI9qSJmvBGc9lOe2!l*~(9);mg?1c7?jX-McxHEZnMfm6 zKP<`K4-@7qKzcy>alCNpzYwjFr}@TDNPWV9k9hJ5Qt0cN`@=k>=gTt5Uuq-rn)q@dfL}?j;zO;inWjrf0t?*^JZz zE(}uN0@R&9rIxf6J!U-1&|wwFn27MAgejK+E}n!Z_SN(A|ah) z;!C){l6Qv^($hM7OKCpYgx41is_3SHhPKAp^*qGTv%s3$sqP@|WS;+Ml2@w$cWhy5 z2bwvoe(Fv-$-{<^_aQ({XiPZ{^gd+A<#Ltp1(6&1R{f3O47IYb3?r2rhZ{pd>d7Bm zvJzYH389W>C``$dS{B|f=C^}nvP!+6NYZh>X~pxo4HBQxO~rcs%0(pu45+(eP6r2P zOUobQt8l3Nh6k80W3j9lPA#980k~erO8ew*@ac7h>en$Ub}EG6lD&S0Hgv z#M8_51E}7{nHnh#QpNQlSQ*Xa-UIb8VT9HvhMa->I(gX?fh5PraJ&BHug6E9{D3@gE)yTw?=0dq|s>Ya1SOe zZo(8AM2jAF6t_MWD4I%FyU37IYGI3LH=RDycP9+@F0FFMqAdyqPTk4;Ny~ISd}fuW zQo*qz4`=nH?#za#pVci==0SwVeu2@`N3+|}@CuuB)a^KrbXM8ezndF)mOAjIEUOYd zrqdl7ORGL{Npa)`LxsxaxfMY$^Y=GrX;@j+yi!ige9n}m!7ar8fK0S{fC>fgXdhnT z28P#UHJVVmoo32B`P`Hx(|=Zuu76iGW&O1WHWjW0YjHU-&Dzv#PdUi8{_A4*Ht!JP zfdeSL7l0EX9!))+u~hTuLDDoe&d%;ZE#ENK-IwjQ4@#1k0O8BC&Qt;VCb|ihnZL+yp5CU zLVaaNiEQq+Kp*EFrN4%5&}3+cW3_ux?ABwzWQ~!_f>AS$Q@E$WN-e2Utw`Rd8pxyFJr}UP&_y0Tm2=3;f+;}&W`4G*T%Vj~PvmwfcgAYLN{2f}Pk@E*^%t8eDNKyGInj8o z_Xco6t0rE`2dzoGI)YyLvzA4^L67GUICF2=z+rG`p7Tk+bSz6OWUJ(A*>0(_u3^a` zSrDlYpb;&8PNm%$|60A2Kf#cKfofiI79LM>3=mR!%wULeT0rr@&B8Zv@bl>@KYO&4 zM{jGWoam&mBpDT6&ek&|Gh@Z1Wezje%~_?SeO;HF1e(@QOs=idv)lq1r4J5Cl|7XG zhoNvp#Th#+hA7@9d;}%xDq&4Bo)>5i^a@D=J8xp=jF2_v1QQA-g-uq01xfFzZfk{A z`33Ggxlau_UyR1|HoeESocxJbS5CH7GcJHewFtjskC1oUyR(ZOnKYg{9LKgY^pC-G zs|;2Bi2Z3&NqK81@vFV`W9i02CyjxdcH#*gR67*I)tX>AmlS>5H+DtTP#9Jgxm)Fv{&4H+U;kC;B8w~MT(zUjVz(bTQ z3c=(rZ1~-LkGLk)Tgzqc6*0cHpfcfAj(C0GU*u*XkRM)~FAX?d9i9 zy>-$OdL)Y7tboZsiz;IPK0-+tpX*SD{shS_g9@Qd(Z{%~(iQ{JipNL=YL?J!?z1tz z3Qy-2aC5No3Q^25umz+^BXZG`A1q0AEllfz%6f$Q&2x>LOnn5!#(93?#AIxdOTrjF zJ>TH-a0Kte)OY0NNavqzE#cbipPxy84CMP`MSc4*w+=_c+PkOIVVVn^1`B%;8C7o~ zt#BQY=rMKAd2hb-7H;T|PzlQ-M|e%ryQB!KrS`4NbNwp_kT!2vIJiwc5PUFn@1Fb0 zZ%r}*yvj}L_{tV|{hu{?88L;ycPi$jgV%)5_~W%#^)AtIcDX~--}*m%nLpdTYnW{7 zMbC6333GJrNsalTC_tT53wi1S0SG?vW&8Nv#W7Fk21w_O1S3Y1`q|pr z6Xo}Ld;9Stdx*tw@z`WK{B>vW6V^Pd5Gg)9vCDvLfquiW&S?<^Ik4uwdRvD6X|2c_ z>F#Fp0jq+-Y!vgFzI@{8_E22k%AIo+2Tibomc_G*+YV3?%!2@uC#J;Mf{wHSn0E3^ z(ZX<_|E7@XHrrKBkR=2OryUDWmw@BD;!l7HGWyKEphJq$$!gXEAmHUT9k)c#hld1J z^}=v=$-Lu%VNBK%H7FIb{S`{a?&&SD?VgK9-;v%}D9a8RP#yp%tJyG_^j=AY9Xquh z4YJ?FQPIJLgji!m^`}NmfXfTThwklZpI~zyAjqn<)6x9(v0!I}0LmgO$evmAa+R#< z>W0Xn$eqTy{mJUx&)e*ksHz%52I}B6rditDZ|PDuO78rM|C%ogvP(1YtvG8b%u$;i zxpZMRGw^Lc@#7l@tX3yFkw6{r>vuwuY;_d5-FbIcCwkU&NOshm%`nUQO*CZ~x$6@q ze$QFZ79Nv9V`}Qs7%;7RN#fQM7UPkh7EWcSV(}?jr6kCq(f{>IXA?bvX^$_ngZg$z z&(<*-K6TEOh*-?=7gMrevv#M>fa_Ku`_@Ppo6N#B$4EHvvln91GH=x%|^~ zLzHFO#+(qzhH8H85a|?!r#{xX?Hmo?e-40?i$C{1k5Ihm#u!|_;j;0cYUA|YUQMv_PUXRC&qhHY8{)5@2M)+$$WQzS~1{D%Z)YZm*0oZ-F1fHI(dPC&Pl$dXUc=ua)#mw zK?}l1@I}us1Y?56C4e6I;yl})!_iYd87}w_KPRNEf|jS>8^PVF#clLCh|Hwn``P*K^(5pnxlOG1}!blN{{%DUsB<bEt~vDSnyapKNmD6@%Yqem=#HTWhTbb$K%j+mSXp2^j`~2Ow-c;Wz z6lXh^n1_LLE4`x!Jq2GbQr6{ALAL6)(v}Az3a;z>hFu{O6GN7XpP-7p|A>`XTKdi1 zJCoxJF-OWUag4{a7F1tb>{YO1EN_Z_$$TYLe$zL)4vX-Tza&{l;6nbL1HW_n3u%XN;gMOMBx_L-xPALq>l&d3q4xA{l;!<}oNo&R8YVAveQSe}W-Kd126D zQ~V=OXh{mRR)x&r6v`i7YAe5*I;mnU0A!Sb-ib=(FfVpK=_%Lqy2>|Kap;2NRLON= z8BT2T6BY-YNF*-k#Zi4)d=qqB$V?!X9|=c1>4<$@n@|cs z2`Am_W>%I)K11tSnW3A|OyISs%QKb$0gPL0Zp=_jG}JNA&MK5wky4kEHe)V`APh%4r!@`~01j>}H9M=Cq5TeL4S zTtn1-v9Ans87d7umQZBwr0i1cBUd$R+q2yZRwm}%i_^Qb2o{0Kd-Dh0|C?0eSJWQJ z&e|5wwRk*-Mzlb>`L)2GR5H1Llrj0N7q}2d4{)bIEf4vPhy4i2nE^O?U2$$A$I6A@6!yNgWuP%VvOC;2ij&w}w9i5; zJNFu7$owie81W|LG(1GW;5Fayi75>cshDu&VMnc7 zuDRigM?Y8y4@u6*uM}K=J;I`PGJ7Ezl|NtHnJi3<_iA|8Hz26W>{xhy&DXjk;Ho`}lCF!&9R;V>l7aSqGsf!yGbUpr^XDwSA@=-@@`5 zidM}v{BA>3(hjOwp^E!qNoM69N3i?aTkpDPEV51&R@$amFKEvX^qQOf>9V@xIH<$JrGVG5Snedgfm>auXUH$~6w#O=KUZ-$R2raT~~LZXl~jMp#( z-L4CtiVper*?^n0kL7z-pI63zU|ug2R;p>73Tq?7Bf_D{>*-IsY$#JiScd^A_&({C ze6$@dA$^iL5FwwJX6&D}$l~KzBYUt}_(B5CcU%*^MK{%!nCc#F6EP{)?WmLUdJ{KR zy(7@dEZD1RT=Nf?^;;6mn5NJk<~2U^ByGKv`mBQ0DIfAeVSMl;`vMJ`8RQ1q%T?>O zdEclm|9+-FjRbuqsk%w!4X-qommr$6+Yu!KKT}-~TwhZsPWN z-lAgtfyXNuiP#CfYt@>q5s<*ZS78$MG6nkT#1GYxRCnJfGf{VO}4d zUh(9wOL;Iup=t7-KFH^qM%gms3p6Im9LVesfOla356w1V^CY>OdA@dxd<{v2#ml@o zO9E!3Twyq#d{(}}X^r`G5-z;xj3dHjeaZ_SQzd7N^l3Q#sFu!m*K;TGKC`rR{!2%) zrNySi=m&dJ>`uhYY269(JsM#0?7lC3Bh{X*Hl;)E4GDz)Cob1+w3`h5edkt$(p)Tg zi4lu$S3aTOurPD2a&4%J(FR3=%udZF^~J$CO|&=hi0QRs$k>~W4_fks3S*76>S+TZ z^PbROY{0N28a`C0_0{h#wbV`^A|(up9~T32DWPu#vQu`)W3Q(Cx-XWj4bLCF)o!KT zOtrGhgjw=nt2esN&pJZBem4cKd$NhWofc6HT~O@^AC_bsw^Ka|oU7tq{di0GNkVT^ zGlut1CB{BQP{2c3!CPB{>=$>z)J{xDUHfpwn8ISx+>jwhdY#15VtHxj6mKwH*u=?a zPwHmib+X6btz7p2V4#uvB~c_kkwupxoppJ$lom!gkJ6&`2ebGiNpTIX?}(%v!JK)j z2*h90c}?Il3SRg6^?;~<>?T}-q>CDAq4aW8DyGpe$5;iz33&MTSQaHYO8xy=ve{jp z`B+zpONR9(3$b03JH;O@NZ&k2(dkU&qKNSBcTk^dnHu952hRkgW}=I36Qzy(47}Koy5ruU9rmY-71r z4W^N`dJn66hfD!jC)Q}rq!veda;C1^JY5{7(U~>2Gi_X#EZyErr!w|+^`Ft^GMzx5 zFlAOpkL1FYe*lD_KM?l3ZXE}SlRtOC4tS<*^w9hX9LOy=p;R-yP@xU0#Z8mFc9c*@ zAQp=JbdQ>!GE$JD*DJ3AbBl|&A>F8pCyDIqTj|;+Gnaj4@aNx==GYk~Qx*w*LT+ao z3+fbD7U%I`%K@brq_LFp@CE(4{{4y?K&Y;-o6rm($|cX<_a&FH&+GTh9PHaZSqzc^ zzUv-9+WVyEPLyHUp>}rY$9)xVKR!@3CzBUeGPJ*-UBCP%bUrFj3$4ek6xoYy=(g&3 zy|B}@FMJ=Yg8u3@dc^HYxKfoJAHa&!AS#ZCzEZkg^n|l@9o*mu4w}rG;q*D|m#z|+ zOW2W~U%OC-4kB$1K`3kwDtbV^@sn!|fhJ&rDyRmc8l8cX>YGp}>iczwO+fwqZgSu0 zvYHbK)B4;CfR(;<&P5R@d~=2F1>uqiS6t(`O&0L=jYcbrWb;3Qgcy6p1E*S8PpEh< zAPF#n;XKj~3&T;RjH`$gXT5hs;$DZhc?=LoS1=FTo6==y(?@P1v_Dy68?Wq9o1)Oj z-?>>A3>TO)9+nqeY2>h$&+_E>4>0`bh*VdU%a9>#@)$ApwrJelzxWb{ljZoXW?>Mmd3V_1}62Y7qxiWA7s%E5rmv!IU`!?D9_#>qFua+mjOO)ZSfu2 zWAdfcl_*J`;SP_;gyIS}w;J9{i_0)F6*>ESzHZsfAzHNfO?CPbm!ag)=EP_zBkU4@ zfXMn_c(<@C+!&dvrr!$yJqcGk`jlWVEeOgGi50z3eO^+{PtN~6ovD=i&eMobvG<_D zbGA66{Ko9Kl}L{2KiA|4pDqmo+FHNKbGBd(FI~)Qo%#2e=2Q}MBYq}i6|Zd6QMZry zfKwhLCAAvHk`bB&1fMkNVs&u={%p8(dK2iMKBfitW^tiD3j>8 z1kK%JiV#61*-BHIY!L!K)OKa@>u?zSALFQl+u)ROJv{HvNS)l z)?m3fEWqS1JUPKD6cY`^(m044eV3*X*YBO4SWJEhJ{ivY2U2D%!{RJ*~ znzF=m*45=$wNM;)f~0W$k@9f(>X!1%q=k~!8^}LOZY;$vkrM}n`ap? z%AX2a*;?cf@wYU+X118i9^X22Fadi>kG#*`ud&$-cjVSQj9q1{D}ey2Uv zdZ8|@UO%8m!^S_6V+B>GOfO7tp*&_ZFP;Tb{i$Xb6fQbSRPw@+y?1Kn+qBSaZq^EH z-h(PKbxNvR1?-ZPuE#8dI3rG9p&txUE?pU<{8jpyI2tv(C>hU}C4kS@&F@zfEK}%; z7p`8q5aq*=mp%a+USXqTOy_6ZuRjl`I1##YIRELyVLD#)ed%W{R$_6#5e)kEM}a}yDR@f$Fgzw^W{hGf z;D#U+w0jytP)#9M*F8bOCASr+8+(&&kh`fhGknt|a$9q@#ecvTNa?%)Qfd2}j-h0n z)MTTC8F$An%km5>t#t1~zo6>lVcypR#lcqVUnutMe#~B(>e+$=VZf<(t9}L1_#GJk z9~9VsT-X2Q#s43V`~OLSF|#nS{XZ!%W=^jEEztk}5V?DSI+^dR*7 zu(G{@Yfxw&=;~%>fGi|9z+hmMC5w_-C+9R~Cb1`B|2GTv0)(M4WC#Td0xq;Nw>ye# z=CBV}#8LcC3BAz7!2-S+ga?e{vF6c0gbE8ZSX*10RGSK0S{e&<2%u0WLfW$f2FnVr z3kFgaC%3l-mj-Gt3*iw$v4VADYk{EZ#?lRy19BlkEviRNf-tf;g=+eF1u-(Qw1xS= zq@)}VgxcraWu1k0!+RQE(X>n-25hBA!}&?RS=AQL%S!nn18S8Z%!ZT>)-n}N2x zy!{^hKoVcuJ!wK3>pl)nG5_*{yQBbhgUY81%>|Zou(tfrE(&_f?+wn26Ij8xG{1Wa z-Zc^gDz2=|t^GR;?D*mx7sO@xTNOh)6}|Yq4)mM$$6o$QPPpR)Nd0v1KME}8hx1-h z`!Cd!G$$OvFvq_3K^N#lMw>L0RadUmzFgs z{QEXOGJo~-x5W0p%o(c9{YUa{u=UIGJ25#($g>D~>+OWy|qH%%6@IEBl#i}Qp zTGYLV9OLfzZv(Bq+Corx_3$wGZat1c11`$GThM)H{-gc*7KKWw7SYn}eY91IQ$Km% zDM^cqE8t>Q^SCdw$LU*^3SJKv6}+l83iq_|e;BYf)GO@G%2gHwa)1)Kb_u-fmQS&A z$AQMltv$K*Q`N$fyFgNqubVm*D+cj<=6J|_Z!MB>w=l1iCR!H+#=AN23#9QsPrMa+ zzAe~#xb>w@4ts~jAj2s@l{@0-Vaa<9-MwS7G+5i$0{#x#4b<#KcMTjq796iZDBCea zA!$9C9yc8?7RA3ZQ*k_UVY3_Rf2R9+oww!>+iGL)rpgY}chw6Ka+V`b5EgT@$A%E2 zlC%^&hs^NNlmH5T)3;wSWbQF5oLYW8Ts0*-r8`dSd{+@}$|w!S9*MCyi#k)>wS;at zb=aSuzGg`df2eaArp7C_AvPmo_Y#?ncFO)^(u_B+P-*Tf+n;9AY|vj>?4d;-Bm{BG zyb6+5LwnO5qTHZeX)B?-x*-Q&8Mhu3-E8SVH%}PnxqDS;UCJ5qWHcF8Gk?;Y_3+!V zMVuFH)v3LC62-}e+{jyK#;eD}#Bv|tU+s}P}R9I_1P-A%U_eZgCZ)($kEzYX;o z#xQEH?^NEz%zzz=VXHd;rJ}H(&5wP033}K17zicOmtRSiPwccco%>-uF*KfhQGs*I zgf|apNe@pDZ@Bu~EEsG~p>NrkM2ty7VGt~sp^4dRoT;`dzsK!eWYsZIFxZJ=MN7>Z zkmBhLqC++!w!`3o*)+~7XD1`&-&jjH+ri|*Bpypk96~2HXJ(Q>XFB(BNUy3|zttycVWJMPoZ$?`*xcYP-C9 zh4I}qFRZW>Hc2cf8+-0Yupxm!xUiRm_Mlr8PO4@ zSScovP`2*S-Fq-UJKWrsYW}n}lbk-FU`Bomm`TbOqN>~G0_@Y83TlIHPJ<;qTRtc< zoT^QDR=QXBTkd2e^jy0t1f9l0{MQO|1)NlKdDYD_raV&4>0lELFF#}q(>UAhT;wwI zmOQi7&a}b)ud?%=`4;aox1n$0#g!ddCnvUCu&Kv8?uq1q*9{(bdi7e8k-zSRSA9~m zHAT8>a^!A*cGbh-niSN6Z`KILz`H8I8P{YoUfN=EIa;55C^QI>3#G02N&*s8G5RQE z6r=Y!w3S`gPn?p0B( z7F9PgUiWb3Z{o|pvMBTf7I?Vu_&=om*koL+hCSG9md#&ApPF;nzoJpgKHOB+x1W=R zp>-C!<1(GuF`}}jBn{a|+z^YN6$B#d!jTu{4G`IJRpjhaz>@7csRmaZ+hAcbbn(RM zHcPmYh+4QoH+NBT;yZ9+EgP4d3J%5?9dP7m z?LXUl$@L~VhFmJ9gY77M{Y>XirJ6C_E5wv&H>Iq`3*r#9Wn7JAZ>&Uj+HOE(vsDPj z!O&})?HSN{5`x+rmu#lw!POmQd!AXl`|)+kSX8*6gNk@WHCnrZO|GG${F|aUsFX0a z@F*5sx?UYOy-Vc6=AutMm#sDj~}#VSebze)lN=e~@svlbcz zvqq-kd(Z@-II+}`qJ!fxGSYnIFUi)+K`c84vOA<|br&I=Vy*o?M^4hG5az9{EZ&FM zXZ@ZbK4s65x<&2i$qyTlRK_f>XlQxw)kzqImXYM)bD~aFsf=I}&sP;uNT+_{IR%q5JU?Z9l0tzIgr@pBui zwpiB4D(LoYx;`qe)A*eb<=IcbYsjENd1LDPucaW7uT*MI%v8p&WbP2IiBmR9G(4uW zHyMZ2!l6i05bhC>Vj2cIp0!^41CuxU0P{_EE|w=8hAqBFR@tDSupC6-mU<>lVt?^F(29-l63|b!5Y&bElS53}T znw=eOifDa)dU*&IAB~E~vHkbF_=qYhV8BbjErrA}zr`wzc4~xJ0{*ttqF_-9T;SgL z6a78t(Shm?hCqyZX(d z4B0O+Bn9$l82L=@RWp@<9)EU7PtgsxdZaQ&MS!io8*U8Vw`vX}x&`kvoP>87VnHw~ zG$@(1eonMc@1q?rjJ`?ty}~m|56)$^ZD`x+-?$6YxHEq#)A4aNyR8XNZjdq&!Mk$i zW@)p2#?)DF>rHO$(HER*AY;X3qJH_{h6m<72SW15-{k!F= z=pI!w^t80#?-Pyr^!A?>2Oae=;e$$P=zbf=VG7Kr5?yK13L?2$E7NfJ-}c~UT3sZQ2!`nh%Rzz~!{*6C@zqD2W~cS)2eq^|pdR;w8-uJKnHP->u~UyS%N9b(3i^1fq)M>KbJniq1C1REr5^o@ zVjFg66k8uoYsYd7BbuAUJ6}K@#HrOgZAI0^=~SUdlcX?d=f7Q(+j&eXAdmD9Kpmaou0H6QVK6pzMxl#La$zShHzRybIDX1}W#3wk@1VWZ- zJF-8H2EF0YQZTYnr6}`YX{>i8+w?v=mj=!){m7DQp}z4;^IMj1I#kG&k$SuYp2{v` z*^I`BFl=m3%rskGtqptqVMf3`-(?N z86RyXBmUf%l)d*+7=T7~0RN=xUw99RW1=#$sLHf%+=OHHPK>RCXH%|YK;p2e*vx4s zN$#}o_GZRN*8{4&V)fPj6vs~UT;H82K!*6`Whd3?=rtJAnbI&irX#KJU5ujjS@AYT z(ide<)TvKk^|)R>%#qQaj#nq$n-xu3qkj2SWKyeuGra|mIiV@}#X|dqS6;SoPP*13 z`0DYg1oF^i`%N1vjgd8I)h?F}>`)CRGVc!m~T%jTRZcRank@?1Qq}xv;gzK*@|njxmoj zy0*5WyU!S9oLtXST|!$mL!Y47p_S^M(ffMN+uB|pQZ0%gqLB7+L6Y4)I-X<-!A6#T zbEGo%o*#hFPMHhq?gC}P@ZK$}s&k5=Z{Qq~ z5Ph|xMf}p!qMj{-TfwZ!&FTIX1~goN5O1>Y@9Y@w#zlxURqsu@V+crKZl!c8R0A;DVjjx&)K^)^8)$MYUrrKaLg2KqR4%0 zcc$=<=^ug#r21wpmj%BD$<&)();b82UdFvtTgQX_j|E=_=C zFO5{!@F`RjOnV?WaS_8j9)Z?0dodwV>)~yb*81*1)V-XK6K_^52EIziNX_s+AV9lE z0hwAI#^A<04pJNCRh_sg%kFwf4mp*X{NM%zQ6)o*7qsHM3ze;yjHb0T(w_Y#FSXtP z(mV8U;AE22?pY<1CR;86;;?Tu2LubX+JC1_WbNm_tK~W)!fy<*S5?@yf`~5_c&K8~ z4fhG(U%XbMfvCXSPWVL{VmJANRYOh1_=2N#uTBRj%b&2W+JXwFLURL@A357RUO46F z=s$1}*UYn8!*OiJ*S?H6=EGQ8k**%d5gsKs!4ogTJSVv3ao?F$7>u;Szjt(0v2aIk z7A)nXFlqzf<#2aBZ-!XY4``aKYGrZPFhZIhUtTvEoG zFftVCQLZs2hDCc@`d-nY@1C$A{2N~;??kB5T*UF5Ve7Bc43clo_AcP+xS{uwVTVOS zO!azNTMoY3g-dsTj?*sDmc8lg2KWPpz{rA+^7a{3sr&)>XVqlF zE=;|`%nHku{cm%2kb94n(gCsSN&Q4c8k&v_!>T0j3 z*&9=SRK2aVHb%9bcQszkadu`Gpve!@)2f(3H-k2R^Ck5<7wBVc87apj=OAG41|X>( z$sB#D#m76r>{Shm6+HvsxSKa4blzkBqM^ z272n^W5RM6W|KDAyst)_%$#D=8DQhOt@t{aXKUkWw&W$%7(zHXMyP1M@QZX9fa3fg zT!1CA!tGUH`Qb^z(wcdDdcGQX)uc=JK3Pb_u|~n-57e#62E}C8*ISPp*$DeI*f7U= ziX3B2V&}yBCI=c&F9-^sk}+(ZY96aT)Qx6+QQ+}V@2#t?3>-^?#-(+{Z^(Cyv^NJZ zia5C|kV5YB(B!J*hc8K9tfoc7vFb64CfhATE*GcYsxkuHaWY_=|Llqmp(Mo0&lNnDHK=ZF%1SZA(AbgHQb$%T*B-0h-s zb%tLNB2(%Xq5n(*E7UN~SnYRi&_ckcW{fG65`Os^Bp&Iu_J)$YdfyzWd zbvJC;T!^y0y=LLQN}*LQ{|_>YZo<|5&`Q07v0^7X>%6=u6e6B+MQYux{xgKkP(VBN z*CgV!1CAU{v?Ry=x~VGr&ITuqG3Nmy&`-UADFP+iU}VL_pp` zOZg$A0s@=j%sF60Qp15ngy3OiH zeb+5!;7z3N3qN(;CpJTv{z2IG0=7)_*DyTw@=Df)sFoQIW8;xP`UCnZi=r2?P32#O zcVMt=2f~hslZ8k^&JBM7C1PEms(fM}mm=$7DCHg;@_#2(`HosR+CC1H2NPFg`F1nt z$H=S@qiJc?-1-U0L&ZF1`+!^9;hKoRK2{LdB4YGR_}e%KJExRmYgPLgMowgXYLW70Bf% z+D!>uyH#jPxJs~SSNCYLzn!0qN}?IUwc_X!jMQkKmBirf_r+-YdLk~@X?`VIWW~z> z8fvFeYXb|Md@UwAF81Q@*h^!Tk?tI++?(l+;8OMf%iN zptdPpBAb(HRS0*wm#h5)i-;ftQ*Y{1xWr}C#t3>ny2G|sZoGF0jzZRt8pY>=#w4TA zCNlFlIO{j#_8w6CiThaye>oSTgDWWx*ByFF`x|jZ3i{lmapB9MjjVy%zt^T4zUUdUqBAa11LWS>e`E*ob& zz#?%E+Ilhvzo)frH{m0WFgr3!+Sc|=x-QcOzpC?^EocJS zjHw_wH!?2^0&A(Mk?sD1i<`4ifZ@XeJhjYNec5*PRg$4>_3)$^vJHh>b}m^Jzjlh1 z89_%$DQDyphpDo^#zpmyXm*qF2{f^Utw;SZ`30&NL+LYh8(mlTTeJm9G=Ymw1i)+@p8wN$_Z^@F9@G_R5}apDmiO_ z*M=HwJHlbgPQflLCN%bch6!evti7h#B9z_AliX%1vjO-3mIiawNk7=^aeBpIu&DZP zuv}iXO-z3&w6$KS;#%kV%31M|cRTMSc@e(-67>J1n%j z#xEaboI`VL@|w$#{>fkVVI2LtD|Ksu8CKEf+N$hbx0A%}ycty{Xtts8mc~%o&O8L{ z-bMfM!T+G(jmGZH_MMbmuH$MW?Cz_qUweU=yl)wGtgvc#EzvQx9Zy^ln6(*hq2|vt z>UMT{ct2vDh54{=p81&3vwM#kMx(5JpBa0i5^|^xWkNGzL*gJ)p(U>N2i1ZQV&s_c ztw3Uw@3@ND78;oBdNW|X`F!~w-y~40OPn<*z3%RlEni(ohf03jKqsyv58JH2D^_|^ zcEizzv&)CY53$_MO)12bwK*ezU#yoc)OENGT6yErjYAznoGVkji?~0%x{nZN6rVX+ z=Blnm_fkd6hCw+oW}`H%b8ddv0u8o)(-XL-EHd|zpR7Gt1^BBHuS2L8Jkk1G|@mG#)ir3aH&m|GG zQcK=yY_mSbn5pK~qolsulOXH-c@W1|uX?3FyF$*102OB<&1^jUkcJ;~ zy@KXkN8l{B+^l?^8x%1>Gj{}GiyPKn-Z#qqvx44fOB!(eL!ZTd8BO9A`1Nfp zMOI*^cIffa8Lai(+p<8NNnO5X8LfuZ^eam)N^G8{1zVkx7(%S!#ze#_Kf9GeS*R9M zq_iC;5csC4MfJCj@>imU#V6uwRxgT2++3XyGCJA|c5a~(My7-pW3Picsz zB;0MXe}WKOq)?S;8sr~{8R(3Sy^-ydcDjtea1cwJIOYI6MlcvU7mPD~_b+r}mp68u zGB>ii3G<_q6TZp0nnBndkf`8qLb?D=73S4QMiE>6yGHN1R6E3O>@wL|(EEtdJR1PTjHO$;i%Y-3nh(9YD*N{P5Gl!7xwLeM|-HooUvqIHPGV~aRtt(%94ElfaU1^dBKUpBr_=*sKAh)-}6x#R?ec*f z(60nux_$)K6*_flX+!$RMM;CZSgaM)&*xWCh8h^2pGA(oIzV$?Qjll79qI zsk|+V>&fJPlu%Da+W0zb`dl2+EwiM~4El?)`%Zpmj`f_p^}Y5UlgXGQ--)*o|-Z->HhMtW#M(vQC{VGqnVY#6<`7(9EF961{tV<9i!R z1r5WwKlF2M&30MT+n)r%F$R)0;gIHh^&_&*V$iQ*Ev!~3A|0<+;A%;p3R$NADN?dL zAXRyJa*mM_8Y^hzLE40V<)U4M$a9R*6r}K$eW7Sn|1ePDW>LH;S|`n`o3RdTc1;T> zpfvyBCRQ=xdhr*LWVg`UO<}sixecdpNOvx~OeG4#8d-s9M8&MQvk{%*yzQGh`^AUy zQWe}IrH$thtrjYc;HZ8?AqZ>1$YQhVwd<64{klM(I*QfIXn8eY=g@n;uxx+$b?$^M zl285AcUa$D=dB1WDP0wQWQ`Q>?7IVL!WMf8z3c_YEd|X20Q!p87}a?1i!=I70)AKS+{|AP3>+EQ3g^APB$KTKHRfy~8f<+?b&d=qzM`XRj^3kY_% zVieK%h{5aMBzi;0@_Lhhh}Bn9{#5Pd`)XU8ZPK-h^U@^z>2T~eo)i>=9f>|TpU}}y z*veEi3d6=8V{7n;YehX6}&Q2xutl75(`{ZwZ zl&Bg84iWQxL}jX1CyLiK*HSNq?|i^M?9dlMJuVlc;0)|XS_8MgMHj(a(PrL!XjGD* zh1Zc!b$+#8@(3eN8&usf34Z2?p0H_0_&$MJylm{Zzd0&_zzsx{{czsekPjXJJ=get z0bG-zHD>o#Ztv3-!L<3W0+}I{5P^$PB{3vmVk#JRH(*Eji@2M$>{Y%)KL?5h;b_^7 z3qlHd{Z@UDC*8ZeZgsh@GQxlMHyUnawL)wDRYFX{O?H5~Rs}WLeQPrq^KN*X`MgEFpsP)r^r)ky^k0{Ti|s%_yGl73WPA`a0(l!0=2iKx=AIWiySwxo}ze=F8 zAa2WwUWe*Xqsoa!q_WVF1x3F%8;yw6D=U05(S#|hU*MT){w_cJZP;esi|39AU5-4O z1?>N9MGJ-9pFC5rUOGeJsiTB#wDTmOs6i;%@SO=`>D1Eim7Ofg|C=xc93E+wWSg`Z zyQT8G7cVFDa%T^(H&XPqzF)?&5#4T}u)E8Slt zU__HwqNN}4`YG)c@DSHC@p4!4{lf)uolCin;?rMg+P~twiLCJhHD|F)$LWc@8$73t z!YG01l}@UHr~JG9JU*&+beWZI=aUnDJNp)Si)42YYy<&EomN~!T8MC$-58P}znr-t z`)9lnM!N51L0%T(UL3sE-u`^m@05~aMXftJJye(01o$SulXpVj zlvE}rt%}lW&oe*Jv&KRIZ?D1rMFkC&2eTa6JbI{JKq69s(R8j6KX0Am;1F%vWkxUs z#$dCnT@&hwNlVBUI?YONYz55Od2v_iKSytBI#AiKD)llR<&M2VcI9|g9ZX}Q)Fcg% zAk&V^c&0aRy`0}7;WHF$K@TuR-S8k=orx>4YleEM2Vc@@^hU)&(D3a|Ut%9M(&(wX zze1HW78E;)DAQd!C?@dt@U*l8LwSQh!$c&>furqI>bIn6@1}CL<$`vot6q;q{z9EM zlfmMthErViB}%{{=n5FIV7m+)9zY}S5TfI6;eSsz(HgkQ&_B*KDv2GUMv~f`SZ3Om z@UueD5#-P^)Q_IW(KZe;dGY&o*Sjq1jxUWg??PNgr)G}^2J9?oO}eN}xVu6N?lGhaUn=?Z_9Y{Jnrz!@ zU!9M7bq634B;d~GrCWWWgzUIOOUxZYyZlV;?`86!V27Ui^Fm)|9XVbG?_j$=0s(^g=RDp33Fi4)(o7dZyZm zAEs;LVq81cq{wO%GL31_;b9_O{9uI_Zc#W&SX4~vwwK{p7Wzl$=@uNpscK6gEfe?W zC;$Bt*kjXU(^1(k@Q$rf|J2{IhvzLye{*jL^e}k|nl77OIxA*He6UnF(uo(wOV9?l z74RN3*AQlLu24AnmR#<38ItyWNzho=S)W;n<}zV-=&YG8IJ5WSnAx&Xh~h(+CT)?OSn5=f;TC*y884q!`MdDk>LqAVRSYlBQumDBr%hNDLm zGR`uLjCxj8Or+k<^=Id-c8JDzFnBoQq#sMJ<-jBJV^yzXAC)?jD}!KzI21*b2Ih2} zenl3skYHT8rc05@LPm`%eC|oOOzb|xWK{QvEb4`b6hf5STA0I3K-;=kOB4%Ud$+ec z4xtXq!@J@@whN`araE|%Q-M>Vg$)83){}~Mp&l_&Z5EOew7NGExZ-GO24kZGDlusEuKesVhXow%2G? z)?u4Wq7gx~$Fr06gd@gLq%tYo&zEc8V6=x1c!m_ntz>il50yFv@3C3e!>SN?CLESL zq=!&AKZjgeoaY0-#Hq%~-K?%$a{s6fhU<|j7fYiG5HL0^>-gzRH5@F9g#C8PwHWxc zA2k6>L8>O1+9Sj$gMJQ8j>pnvCc0Y!VPb}zk4WWeB}B4aMq`CJJTU>Ep=QWv(Opzk za;BkHM8Tzwn^WK0a@vv!5*GqsX=k%arq>=nSK8ITLZdK_A@p~gF=M5Y_{XHodrNS> ziEdJT;}+0Urr4tJ?K#hCC6lzNPRN; z7y7rQT`bROJf`DMI4X67hOZ*|5M9$(zl(hZ=)!k<`V-P8msvKz5IlXHdqYgnY*WW4 zEPL2F(f7JsA2qWh{mOj41l1BtpZ*U3I>-MFKhDNMxH{J2FV(i#k@4N$^x8Fy&M!22?ZKgR^*IcH1 zI5|3+i@?Pe7h!+TOw9~Zg3QIMYZ#yW*#oa{a3obxk@+cHiee)PEv*701(FXA2F5NV z58{pl0*#N)%gXUDsq{Fy?LUefbS`7@8yu|pO7_>v(~9BG)}oxEtddf%=Q`OS1kto2!QPPSccAjzaZfUfoXi1bZ===;cS5hJtZf&%F!hOFbj>N z!hrFDED>JG5q=G^wzRe0e|-P_N+W@0`Md~95Bg(pvi60j)g`6l^YdrrN_?NXPWr}Y z%PFxg^uvvZ5*TQW-qrUu^4rDeE;b|&&zjfQO-jJ%%ZZ3O3nq&i5Od_4v0u=P2HHh3 zza@x52>kD=j0*E@-@iUJm5Ts}*?`|47Fi&d-)VFiBK<2wAmE&-0X5K@eKP^1L*FJ5 zQM}&1Cgvx9z?fYc??Kmq9iE^cx*t2=ak?%V%s59NucQB=&#AsG5`+NA@xZ?e-9;US z`Etr8juCOHJE}B%hfBc`SciPP4}+d39iXXhd6;va&&j7hlBhQcQ(PBzWj*grMXxJ= z4YK0}cM9@g2@_d!D?BI)X<47CbUYQO-fN;;yF!)8Svu4rczhkAB{p_H=%pA%(=pta zEAMnj;mCyY4;Ht#;1jv!tk2#4;(WAUneBVsmBYyJY7VJ7>SQ~s@Qfk00rtdH=P zm$S4@(K)zj#xKn4v(7>6uQP0LNhSAo9AqrY>S+&}wUUrlJUa>iq3eRVC<3m&OZcawWEJTEX!ymt52^-+e?qvIuL;T`I%yqSjj}xCp?tP^a%RP)S zD>;6&S*vU>HC=`g67FgjL!HJ3NdbrJW6!$oS&N!+@F}CNH~_ofhdDjo@uO9-SD&>x zGyNc6lyAwFa01`KB7W6Ww>Rlar^j1j#P3bLnj}V|BYJeQ)YfElG`(v1oa3@A;sHc2 zc(*iC=zXBDcrlt!Pvc-2Yx83#f7WDN%DHSy$en3m)eJ9))w-|z!Pnk$L%U_VpGRT( z`rg9OFd#nid=D+BU1G-UX(Y6iEJdOy)Yn4jrJ4iV_E(7Hx|SyhK69>)M3LkeK`YKCStHA=LDkKIzzH0ko(t;?9kElz)k$r413tcN9Ai|Dpl1x&S**GAL}$v zK!&i&oZ`RN$4AR`!zXQZD`+t92Afij1=RMaM8dXEFaJ(VWrTfUj#`mS6AsR$(@UCV zau|$L^3i7F!;wEY9vUNaK>;OGH+9y=bIvX6NsmC>dMr_+?-)N?mJI4x`z%M`rZ z=Ja&fGpOX5;&Men1`<2RyV#i3aW>|6Bs8nan@`>MzvMlN@1V$Z1} zaLVS@>zPhzGP~^WcZ`mM$m-o@4zpjp&*E>loSunaIw*0f#9O0{6A^zGYt>bE#81Nu zv+-s~De5wVN*pN>i9DQR&Wl+Z=^J>ti0P=e2G_!QGW)(V2ZW-4AOmYx^MlQJ@|ybKd{JAtE83~fZ*hpSy$Qsj^0 zg$*7M!OjV|J)!;_(Lh~|b@QCW<^Z31^^RkqTKa?dFjnue+Oj;<4n(GM={?%$hPTv^ zK(Ciw9&p1jEs|~DW&I2sWBm3^(|IU|DV3fzpW&k@p2BH)SS6d`Z}S3K>dVe&&lqFk^ zU7gbHZW>Bf8{^4F{J??J4#vL7XDnnl|1zKMU|+6@dMXd&RIlH2eIPw^(v#;l$;1(* zgGB}$y3SsVlySI3;lD=%b!oHbO4FEdE|Plt+SCHD~ox$u-+ z1G?mPYBUGvz6{O$qV%BH6X;z(w$mS-D!rymtYD%BFx zVn&r+&S*IpB_kX(K(TpYS9V03lv2wp@guYXq0$I`ZyNCq^!}6;z^>TG#(Yt*!!=k| zxpYJGa@H1lldY*AO>G;QOX7^7BE;;*i-XYQqDO0<#HrC`DaP}7=cx6bf65E8_py5K zkP()pzCK6l462_E2-XbJ9Bhaf3s=+=KxDG!9sFqSs*Vk6(wyIhPxK3Wthd9~@Q)8G zR_r;rd37((42InM8=P?C`=`fS#!Py>K1b**Af0WszgeSXZeK&G>3C6c{_}lcG=1vX zVVTL`Wnj=P3(S?svU{8!w3Xa1#69TQE67&+{g-H444Rx$yJ_+h`^eXb66X_%aDy49 zWQ(>`dzFTMs~~jlP2(u;{2B5$^MTFR&|vrshYJEQF?p z`v-jhZJ4xrc^GivjC30(E5#!Jk`l-qb@GjLUjUa;5;pG3)RC{8F`z@K5(KEiFuSCV zlxm-%oUYcH5hP3rtG>86U`26g8=o)g!@_kd~$Ia?q*+? z7Ah5z_(V1Ba&-SfNv4I#nlv%t9zyI*#JrHWM#x zdxtqr{LP4d5k>P?!J$t3;(Ame{q z5L5c4YW02?1og^DL5;rG`W@f?WKZUTs$2I>7-4mU`?*U+Je+D9TYn--$;tG&SJY+7 zJ76G09aJ1U0GqErO2ZWj%YS&VY2!ft{d4ao1xV?!e8H8;_yH>||2sUuTdb{Se7l@Q z%fAUI%Y?CA&byUKa8y1peTaO zY(04m5wS-awPIwb-ZUCs;F6whv9giS&?K4(2a=cTA0L-xEYWG?rKgd>WSa4t(R8;B zD&7Gr(yi(K@?2DRjO0JU&pCL8*=O?}1S>VqEeU_f0(e$%K9+6K&8?_DU7w-g%pgn; zyO|;{Qki8q zNg6#ghwvJTH&+bhv2qJW{v9%? z6A?#>Y)9y8#yp@6p(a#Y^Q}qaMX*}j664(Hkb{B+OVYZBOi|p)YHyTv&$?LCMtL%$ zGx~hT9lP`spQ-e^ew8^XDF@*#Ei{+P1v$*V#}ZAfmzXuUj@KZH^`n}JVq1jF?LE8% ziZz8>pc=?>%518|X>@3Kt6an8*({xZXl7SDRF8PxEi3xQ)9IS;xrr=D&TeKb47iKW z2iKlw$8n9p3d(s%2fUI5Mv~#rSqc2UDDv3EI&nH;IXX(w6xYBE=+sF#RxNH}NIxnX zm}c4XVumqPr({G>MSiA@jKNJwTg+d|O(79dWmT3|SIXZsPw(tDueExDPA;u=P%|yMAsOZVAfz7@xn{z3 zCDH5^0Z)1W7v3DT**XGhJ5y!B)n)wDBU%$Z_40V${wXh z;V?vb*9WQ2f3V5-V@K}J74*~5e(&>Y!512P#=DsqH}VWP9!-KP{eNFj#RBdQGX zdgwW39e#BihTXr;Q7$n#T87jIIMIbFFY+pmIDH8e>Hgqo z_wl6M>%D(QFzvF1W{Z04d502MzzsI&f@0w}=GFWAYs4OWEoig{j(e&$-M|Xu=)1hX zB+zTK_5G5nRS*aL*=zRK8}1r*jMyy!B&e|SWL1k@^gb=+PNBn$F2B{-ReJTdoj>z( zy7qOZ{^2%*zi&VEJ3jZQgewL=c*C@`Q$|?byi&1=85D|Huk0ETLAPc?X&VCOFMpcU zoTaV5AP{Ua4YwxUO<{2KUGdyN7dhicAvzar*Bb@(!cG z1%4Y{{V0*1DEucQf%m1^Gs@!b?6FoFe77@1auRB9xs_x$1Y&0AX8E~Kj@AqWF3H}i z=Zw#e`_7;v8_tryraVrTz?~yUfu?gv3|qm#y0spwGy<eM%qcwTT$8CJ_q@=!1>Ao!6{^p8j_D#d&CERMgDf2}S zWnZ^)Yv)dKOc7lkfb$uX(U_CO>xQ@}mk@BrwV84S%9;C-7W7Cr>RWb11oE<@fB)Ex zVuVPE;$kmzKONBJHhvYIPzrBe=yJUyDHw=uR_0v**&g~Wj`y~N4_;<6ca&DZd~e9a z^@#up%qV}Jb~-8S05(F=eaCkyhnVN&L9E}6_@Dci-i8+ar6i>q9;hVK_^pvc*)>0% zAB|)&$aavu%0?rK3Rl!c38h|mjkpqx$a!B`8_Hluivw+c=qIGydN@eA&7y?0dH>t5 zc!U1x!7eRp*Hv14D$FF89ZIL~;N#$t;L0}?X;e;u84fplj~((7 zTeQELtMG%${aP^O_BjdMQ?n1d;bwy3DcBrFI#=h)zO+z9gETmUST^*LN}#vqNQd6GcB}u8d9bptdE`?eM7+%dT+`L=F17*IAJ_b;Xhi7uR%Ci4Ov4 z9@Ip&Te2!}exs7Er`Cx~_Ml@2DcQS8Z^>VXgxJh12#`d#)wxUU_(R~**FErUBbN7w zz&F@kiM~L!rNCnkzdiM4X+E_UKG9kFJywXfkxNb!hWT*B#EV2Y#Gzm5E=q)^z0cZS ztSEPnsbucylei_cQ_R*7=^BeZaZ0KF3$cH5J;xk(HFkZ&U>G5yav93|{T#tDY)x=% z@#r5CLDXZK)8wO`dd)tNUkiHM{ndzGNB`4cC1xTHgcOwt88I&Ww z?^5n3L47?co%6x1y%jl5X)bW2xLe359P1qd*DKCbUZJ9S(lnuOHXP$i^m`y;qow7v-q#P;TN)bEcK7HMJr<;ul*@>e&EqF#49CBU3PhTnNnos zln!)0uAXgYqI{Lj2D%yb2ec;drc+$>iOB|lp@?XMInAlQgWk^WXO$xo+b1S8j1}2h z3lcY+Z-)3)^^K%W;*!>5ykk@aRb{5dI05|Gbo9)oippnLrt0H+ZAd7Pht4(Gdt5p(uxhmsW}NjrGqstF67H;K*x($BoY;W)9Y3w+ zs>N4v#vKqY-0?-!SDlbNmhOU1(CN?a@0VWGrvqCOp%7EP#B@=9zr=GP==@l(J8rea zDVfF!iBSrhd++%nq3Hf-i5hpO&w`)BUOphEzQN)TPzk-hy{h7lqX<1+!$t@qErj}B zM71v+e!peiHO6|r-~Y`s$vOL3T`fApIBx6^oj1=dFBy?1%wmO9b>?~5mj@$0C?`*y4%N|@WJ)P~!S##(EX0V938pbW|KVUUHD zMaIGTwy{-hzk{@T;+U69OTc|k#V|I&Jc%TW%H;cVqSyUvXOLxva|?lKDz*?q&j=4f zULI@tjKB_K z`y4K@fyU)yOAWav&MHGtq*YB>&Gv~fr-Gbg)75Na7jx}2p--YXVD|bd4xhKK!C-}h z1u#{EWV&R(cx=XyT$hU)Q948s;qJmOl?2xs9TIOdyp}y9;Tb@(sZ@r>v}JfkzirnV1G*70t{1`dXGw- zz0!(`u*czK!NMQl18k4mC#9#U9POKxm$>Ql;0%sue&FOzi4?dpFNCHZA4K6r8Kg|8 zPy=s$Fx-~@9%`P3PS%SBB@u~B5&Ipm}kBj4T_fyNlc>=>F~X~@Us zK@gDC@;)|x>~wWZU{rW8NtbLb+vD0DKK@P$4~KZyh97yg;#H_;J^U-rBU$WKVUx@$ zT~{~w=PfoH`m01@K+drQijxDzZ1g&B)6!rreSxEhPukA3nK#iY(Zj?U5RaDB__sjF z$X;8TBQXqT{U(7D9CV-HVY|vXaXzVm7*{h zO43tKgk_<>2&45`hor)wQ;J@tLral=zn*S7Y4_9b_2RPsOA zeINr(w|BpQ5yZP*^sEUWX{DBHZ*qP>>~iAc+Kn)oCm59{m)8CS%ARr?aaaih!6_1O zy0o`m>30aZAz}1x=t#i6s1I}{!BUuA_ zcZ5mib@vp8R&>^>uSix)?S@N|DPpb=EuL!cVpp4AA60=M$$o1^vE8dctN4%20GDe% z1c~ghHD5w<*60pL-E{Vv&fFI-Q4(>{%?#T7t3bAYuyW=f7jW_-t3w6C&y=DXljH^s z?H|8DVRHgq&vo-T=aW#nWn`-pxO``k&jIq%pkJQRn{v@bJ!p{G5LqvTG4eY?<3dr$ zefe9x$}N_{pUm>Q0WNFn4_}_dfD$_^n4oGQWIJDJizKD${o_+QmeiN_wo1u`YOhnP zy2tI_zXCu<)64+g3M=bioEOfoQ{ndl)r9?Qy}eb}6%aQB|45t~cjKYf&Hh1QFR=3? zwH^e-Ip3(Ff`gaBY5e&Pse6|}SAQPgqEk1$#R(v#X;-8d?yQk(!bLk;!kEAtY|s>x zw56muD`i%&FHy=|F=PI~LRzD59jiKnPP1A=45n$=`DuuX4W3xCjpg&u;}E6B*btG; zXed?F3elp(r&u$v^Jd0<0T&PL1AsA$d+4JP%BL|{f-@RCRc7Ge5qKzgYfOjYk8R zGMTqhW;^%+UGTI;7O2EC^FifKV@#UGO$~Ic?yu4p(}|Iv3J2-C4LOA#m@Zs#R@^NI zTAZk5{v@Gdq%638l!+dB@n~zbQ6VvRE)u=VsgW~AVjNyJU$rB<<8$mf9)?hncZ>Eo zrCt0?W7fKtH7@IjpcBgy;Y6GK{Hqz**(QAl?-0 zORDaC!6q^Ml_rv8clay#_ zP7vV>6+SB_7X#kJjr_KQY7PW7A3^s!y*4{qPgb5eST24DOac&myPPnbg?D+*DnG$T zx+pGoZ0m=r6tqVM@Oa-A(rwV`=X^ata=_Rg+5@Yn+PFw1pO?u23u)CngB#&Yr7H~o z5(i--44LP^_Bl0(n>?Mo2p7^>GAtd@+z?@hPuRugwo${1?-3BNGTZ1zo*ReMoj|o^ za#)f!v3lR5Qa+n4Gk;5PnU|@^IrhrZUBx?kuz7wRt{ys|QdqKWo;uxQUV~J|J=Bg@ zvy1Deg0i7|DYf7$m%cPB8~(MQmv_e+F1CsbKWa+J()rvVi=U}pIXEkC4tnGIv* z>b_;gOV?jJx3V}O-=7PdrrwIdD(ys^X4NXOikq*c3BG0;CZTQ$Q2#Vv76O)2uR|y9X zGpYN9wH){Nhp9!<53|a}CIG716MT#LjQa^-*r)#;FT^wR#PPaP+)fZih6n`gCt)rRJMsh`K-+{temNa@NsLW|`Ap z>`8Z6L0k6f2whJgqFHF2Ndr-J9et@%47sg3-vkuYxSH((#_#Y-2y7kBmT8%qE;}0T z=o3jN?_cJyROBk&xl?8mbgPqGaWhEXo`|QJjno33b%`bR$SU6ZUWwbm>Xuz{x$7 zBG9+$C&c+9fu_op%>l07qR=fIv@2I;na|>xU9OZ|y09N6U71sB&8>aMiTP6)>dt&s z*FI6AysB#EM#7!Q9(LY~T<0lWuN2Zao)U@yX6(iUwz z-;(OnZ}-qh6*=$S9Jy2+XL!_RCrhFxgm6}LtB-Y#_rR{@88uKzBa;jZH=WuZ_C)iE zFYXTLdX)MK#z;i;(bw^}0{z-NcI+}S#SqYoj{p!Oz#K>>NzZ z06<1IT^bsu|58Y;YUbif5BRU<|I|+pP<1eM`1dORU6+-Ey_k`!8Gu@hhn1O?nVE}) zo0Wxwla-B*nU#u}nd+aMoWs8o_5c4Os?J7^j%KC+CJ7^37c)2}MOAT4MhQ1tTVo@8 z`~TSCzn1bp>_HDuH*<`V4gV`UvieSYHvNY+ZAn@@6EFpqb97*XuW$DSX;xPup z&9|}QrI@zhzKek{1Ch*%M15FQK{xc>BGX>{o?qE_@FM|LyDQPejWRLiyUYUB8>l{a zU0 zEbrq*&v@Bimb&y?ktVhU2_mNj@!c_d9i}I;)cvr9i}Rcq30gGa7nqi18Ew?&YvLals38eX5mV8>l<-({Uf0bGG70e)%bM4c-C?29p4eUQ^T9@DD$8=* zT>llG5${LeP6HdWbUZ8i#f~FRAkM6pOyB-noD`gGP;nv~nvj5E#;6&@UY!>7kAzoR z$mJ=3p8_o8Xn(>4bTSYNGDp`gB7xG&bqxmA7E8Y-LX%-JgSc95jJWt{1!6<9uo~rG zny1FO|9xV3>cG`sRXO5Aw<^j}nKL zc|7uw!^5h+?L0jN6HZ9qJebY!W3np?sm;}j>va2t9}>IcO_GgQzI21Oa)}Ww>Ubq6 z?)>9LF(fG$qHzD;vTYl;^3|e4w&uxf3ZMR8>%!|ZG3<}>p6pGuBa0oI#Es<-@BEArizt1AMQ6UEAYg3__ak{Um$d^7yx z%^))556f4-%QXm6r%kmOHoP-@MsE)B&!fxU0|@fIy;-&bm|{>-rbkrRe^)oUC7jpO-Ybj%q*r?lBkPpBG;vh{k4`wD#VXv!Y16nt&0k+qkR#_uHbZDN)#SK9wt= zU<&%f&%y+u8L4(u=3JWh<7b-hjLpvNlUA&;=78#m|NW<+=1r`;eC#vm#k37g;U^zG zE)14mC62Dmd^u4y3YX#hgN~8aeAr`R+-q@f2kt!`$VY#j*RaJ0l+cCOIJU16W7S)v z;(eEZJw+en^WE_L`kB#4O|U0io8JD&ZITo5or(Vfi2|HZ4vIzhmEN?&EC07D;|1c{ zn~vd4&Q(qy&t!cMoI?LoXeCZt5hqdo;cT$qSlCa$T+Xf^kKMm_cJAI0@AjpM%hN`V2oB^=mBTg}%^1wkt;B3j^j?U0$h5 z3QtU|kvrfjUR5X#xQeZdg|JVrC_^^?>dYnV&XfJzjcaWK&uIT_lT$IC0No|#GJ9kH zoLX*d2=K%?s=GF7vDKa+03*I#bmB@_Kz!*suk5iuO-WGI7%zApvY0%SxwGAcjXO|^ zJ1mH*z*g`LAN~62**be(W`I*{6=N&w+KB=BeK8tw(1qx zeedy5Rh4ai*@t^Pv8#??o)@o(FSrcHPC=}D-F3Z)9VN7L@;kgJ${P{DP*knK>Mjw7sdBCqRc8z{aWv$E0HAZT9a8 zj!6Tc!vbIhu>9*&c5rY7umb;$SN@NNK62sbyo0O0>F@^2PdGkXhHO8}7df4CyR{{v@?ImwF_sE8sqe+>;r z3icHqQEjZ!PF$%Qj0%*fMY@5ou%{xkQuy)LW1sTS(5(TTRo6*47Kz{9QYl|7q= Date: Sun, 19 Jul 2020 00:28:04 +0200 Subject: [PATCH 3/4] Use keywords in pdfs to search for existing tags During processing, keywords stored in PDF metadata are used to look them up in the tag database and associate any existing tags to the item. See #175 --- .../scala/docspell/backend/ops/OItem.scala | 25 ++++++++++ .../docspell/extract/ExtractResult.scala | 20 ++++---- .../scala/docspell/extract/Extraction.scala | 5 +- .../scala/docspell/extract/PdfExtract.scala | 34 +++++++------ .../extract/pdfbox/PdfboxExtract.scala | 6 +-- .../extract/pdfbox/PdfboxExtractTest.scala | 2 +- .../docspell/joex/process/CreateItem.scala | 13 ++++- .../docspell/joex/process/ItemData.scala | 3 +- .../docspell/joex/process/SetGivenData.scala | 50 +++++++++++++------ .../joex/process/TextExtraction.scala | 41 ++++++++------- .../scala/docspell/store/impl/Column.scala | 3 ++ .../scala/docspell/store/records/RItem.scala | 3 ++ .../scala/docspell/store/records/RTag.scala | 18 +++++++ .../docspell/store/records/RTagItem.scala | 25 ++++++++++ 14 files changed, 184 insertions(+), 64 deletions(-) diff --git a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala index d17b453b..133991ae 100644 --- a/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala +++ b/modules/backend/src/main/scala/docspell/backend/ops/OItem.scala @@ -6,6 +6,7 @@ import cats.implicits._ import docspell.common._ import docspell.ftsclient.FtsClient +import docspell.store.UpdateResult import docspell.store.queries.{QAttachment, QItem} import docspell.store.records._ import docspell.store.{AddResult, Store} @@ -22,6 +23,9 @@ trait OItem[F[_]] { /** Create a new tag and add it to the item. */ def addNewTag(item: Ident, tag: RTag): F[AddResult] + /** Apply all tags to the given item. Tags must exist, but can be IDs or names. */ + def linkTags(item: Ident, tags: List[String], collective: Ident): F[UpdateResult] + def setDirection(item: Ident, direction: Direction, collective: Ident): F[AddResult] def setFolder(item: Ident, folder: Option[Ident], collective: Ident): F[AddResult] @@ -90,6 +94,27 @@ object OItem { .attempt .map(AddResult.fromUpdate) + def linkTags( + item: Ident, + tags: List[String], + collective: Ident + ): F[UpdateResult] = + tags.distinct match { + case Nil => UpdateResult.success.pure[F] + case kws => + val db = + (for { + _ <- OptionT(RItem.checkByIdAndCollective(item, collective)) + given <- OptionT.liftF(RTag.findAllByNameOrId(kws, collective)) + exist <- OptionT.liftF(RTagItem.findAllIn(item, given.map(_.tagId))) + _ <- OptionT.liftF( + RTagItem.setAllTags(item, given.map(_.tagId).diff(exist.map(_.tagId))) + ) + } yield UpdateResult.success).getOrElse(UpdateResult.notFound) + + store.transact(db) + } + def setTags(item: Ident, tagIds: List[Ident], collective: Ident): F[AddResult] = { val db = for { cid <- RItem.getCollective(item) diff --git a/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala index 3a0f3a1b..d48b63c8 100644 --- a/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala +++ b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala @@ -1,39 +1,41 @@ package docspell.extract -import scala.util.Try - import docspell.common.MimeType +import docspell.extract.pdfbox.PdfMetaData sealed trait ExtractResult { def textOption: Option[String] + def pdfMeta: Option[PdfMetaData] } object ExtractResult { case class UnsupportedFormat(mime: MimeType) extends ExtractResult { val textOption = None + val pdfMeta = None } def unsupportedFormat(mt: MimeType): ExtractResult = UnsupportedFormat(mt) case class Failure(ex: Throwable) extends ExtractResult { val textOption = None + val pdfMeta = None } def failure(ex: Throwable): ExtractResult = Failure(ex) - case class Success(text: String) extends ExtractResult { + case class Success(text: String, pdfMeta: Option[PdfMetaData]) extends ExtractResult { val textOption = Some(text) } - def success(text: String): ExtractResult = - Success(text) - - def fromTry(r: Try[String]): ExtractResult = - r.fold(Failure.apply, Success.apply) + def success(text: String, pdfMeta: Option[PdfMetaData]): ExtractResult = + Success(text, pdfMeta) def fromEither(e: Either[Throwable, String]): ExtractResult = - e.fold(failure, success) + e.fold(failure, str => success(str, None)) + + def fromEitherResult(e: Either[Throwable, PdfExtract.Result]): ExtractResult = + e.fold(failure, r => success(r.txt.value, r.meta)) } diff --git a/modules/extract/src/main/scala/docspell/extract/Extraction.scala b/modules/extract/src/main/scala/docspell/extract/Extraction.scala index cc333b71..2507c119 100644 --- a/modules/extract/src/main/scala/docspell/extract/Extraction.scala +++ b/modules/extract/src/main/scala/docspell/extract/Extraction.scala @@ -40,8 +40,7 @@ object Extraction { case MimeType.PdfMatch(_) => PdfExtract .get(data, blocker, lang, cfg.pdf.minTextLen, cfg.ocr, logger) - .map(_.map(_.value)) - .map(ExtractResult.fromEither) + .map(ExtractResult.fromEitherResult) case PoiType(mt) => PoiExtract @@ -103,7 +102,7 @@ object Extraction { val cs = mt.charsetOrUtf8 logger.info(s"File detected as ${mt.asString}. Returning itself as text.") *> data.through(Binary.decode(cs)).foldMonoid.compile.last.map { txt => - ExtractResult.success(Text(txt).value) + ExtractResult.success(Text(txt).value, None) } case mt => diff --git a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala index 839b0261..4189c510 100644 --- a/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala +++ b/modules/extract/src/main/scala/docspell/extract/PdfExtract.scala @@ -7,9 +7,15 @@ import fs2.Stream import docspell.common.{Language, Logger} import docspell.extract.internal.Text import docspell.extract.ocr.{OcrConfig, TextExtract} +import docspell.extract.pdfbox.PdfMetaData import docspell.extract.pdfbox.PdfboxExtract object PdfExtract { + final case class Result(txt: Text, meta: Option[PdfMetaData]) + object Result { + def apply(t: (Text, Option[PdfMetaData])): Result = + Result(t._1, t._2) + } def get[F[_]: Sync: ContextShift]( in: Stream[F, Byte], @@ -18,39 +24,39 @@ object PdfExtract { stripMinLen: Int, ocrCfg: OcrConfig, logger: Logger[F] - ): F[Either[Throwable, Text]] = { + ): F[Either[Throwable, Result]] = { val runOcr = TextExtract.extractOCR(in, blocker, logger, lang.iso3, ocrCfg).compile.lastOrError - def chooseResult(ocrStr: Text, strippedStr: Text) = - if (ocrStr.length > strippedStr.length) + def chooseResult(ocrStr: Text, strippedRes: (Text, Option[PdfMetaData])) = + if (ocrStr.length > strippedRes._1.length) logger.info( - s"Using OCR text, as it is longer (${ocrStr.length} > ${strippedStr.length})" - ) *> ocrStr.pure[F] + s"Using OCR text, as it is longer (${ocrStr.length} > ${strippedRes._1.length})" + ) *> Result(ocrStr, strippedRes._2).pure[F] else logger.info( - s"Using stripped text (not OCR), as it is longer (${strippedStr.length} > ${ocrStr.length})" - ) *> strippedStr.pure[F] + s"Using stripped text (not OCR), as it is longer (${strippedRes._1.length} > ${ocrStr.length})" + ) *> Result(strippedRes).pure[F] //maybe better: inspect the pdf and decide whether ocr or not for { pdfboxRes <- - logger.debug("Trying to strip text from pdf using pdfbox.") *> PdfboxExtract - .getText[F](in) + logger.debug("Trying to strip text from pdf using pdfbox.") *> + PdfboxExtract.getTextAndMetaData[F](in) res <- pdfboxRes.fold( ex => logger.info( s"Stripping text from PDF resulted in an error: ${ex.getMessage}. Trying with OCR. " - ) >> runOcr.attempt, - str => - if (str.length >= stripMinLen) str.pure[F].attempt + ) >> runOcr.map(txt => Result(txt, None)).attempt, + pair => + if (pair._1.length >= stripMinLen) Result(pair).pure[F].attempt else logger .info( - s"Stripped text from PDF is small (${str.length}). Trying with OCR." + s"Stripped text from PDF is small (${pair._1.length}). Trying with OCR." ) *> - runOcr.flatMap(ocrStr => chooseResult(ocrStr, str)).attempt + runOcr.flatMap(ocrStr => chooseResult(ocrStr, pair)).attempt ) } yield res } diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala index 233d7c31..def9c8ee 100644 --- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfboxExtract.scala @@ -9,17 +9,17 @@ import cats.effect.Sync import cats.implicits._ import fs2.Stream +import docspell.common.Timestamp import docspell.extract.internal.Text import org.apache.pdfbox.pdmodel.PDDocument import org.apache.pdfbox.text.PDFTextStripper -import docspell.common.Timestamp object PdfboxExtract { def getTextAndMetaData[F[_]: Sync]( data: Stream[F, Byte] - ): F[Either[Throwable, (Text, PdfMetaData)]] = + ): F[Either[Throwable, (Text, Option[PdfMetaData])]] = data.compile .to(Array) .map(bytes => @@ -27,7 +27,7 @@ object PdfboxExtract { for { txt <- readText(doc) md <- readMetaData(doc) - } yield (txt, md) + } yield (txt, Some(md).filter(_.nonEmpty)) }.toEither.flatten ) diff --git a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala index b72b182a..3659cf4b 100644 --- a/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala +++ b/modules/extract/src/test/scala/docspell/extract/pdfbox/PdfboxExtractTest.scala @@ -47,7 +47,7 @@ object PdfboxExtractTest extends SimpleTestSuite { val url = ExampleFiles.keywords_pdf.toJavaUrl.fold(sys.error, identity) val str = PdfboxExtract.getText(url.openStream()).fold(throw _, identity) assert(str.value.startsWith("Keywords in PDF")) - val md = PdfboxExtract.getMetaData(url.openStream()).fold(throw _, identity) + val md = PdfboxExtract.getMetaData(url.openStream()).fold(throw _, identity) assertEquals(md.author, Some("E.K.")) assertEquals(md.title, Some("Keywords in PDF")) assertEquals(md.subject, Some("This is a subject")) diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index bf48f49e..08de8d83 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -107,7 +107,8 @@ object CreateItem { Vector.empty, Vector.empty, fm.map(a => a.id -> a.fileId).toMap, - MetaProposalList.empty + MetaProposalList.empty, + Nil ) } @@ -148,7 +149,15 @@ object CreateItem { .map(originFileTuple) .toMap } yield cand.headOption.map(ri => - ItemData(ri, rms, Vector.empty, Vector.empty, origMap, MetaProposalList.empty) + ItemData( + ri, + rms, + Vector.empty, + Vector.empty, + origMap, + MetaProposalList.empty, + Nil + ) ) } diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala index 46ef9f8c..d4f83fc2 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemData.scala @@ -22,7 +22,8 @@ case class ItemData( metas: Vector[RAttachmentMeta], dateLabels: Vector[AttachmentDates], originFile: Map[Ident, Ident], // maps RAttachment.id -> FileMeta.id - givenMeta: MetaProposalList // given meta data not associated to a specific attachment + givenMeta: MetaProposalList, // given meta data not associated to a specific attachment + tags: List[String] // a list of tags (names or ids) attached to the item if they exist ) { def findMeta(attachId: Ident): Option[RAttachmentMeta] = diff --git a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala index ba51af23..b0c279e7 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/SetGivenData.scala @@ -17,19 +17,41 @@ object SetGivenData { .log[F, ProcessItemArgs](_.debug(s"Not setting data on existing item")) .map(_ => data) else - Task { ctx => - val itemId = data.item.id - val folderId = ctx.args.meta.folderId - val collective = ctx.args.meta.collective - for { - _ <- ctx.logger.info("Starting setting given data") - _ <- ctx.logger.debug(s"Set item folder: '${folderId.map(_.id)}'") - e <- ops.setFolder(itemId, folderId, collective).attempt - _ <- e.fold( - ex => ctx.logger.warn(s"Error setting folder: ${ex.getMessage}"), - _ => ().pure[F] - ) - } yield data - } + setFolder(data, ops).flatMap(d => setTags[F](d, ops)) + private def setFolder[F[_]: Sync]( + data: ItemData, + ops: OItem[F] + ): Task[F, ProcessItemArgs, ItemData] = + Task { ctx => + val itemId = data.item.id + val folderId = ctx.args.meta.folderId + val collective = ctx.args.meta.collective + for { + _ <- ctx.logger.info("Starting setting given data") + _ <- ctx.logger.debug(s"Set item folder: '${folderId.map(_.id)}'") + e <- ops.setFolder(itemId, folderId, collective).attempt + _ <- e.fold( + ex => ctx.logger.warn(s"Error setting folder: ${ex.getMessage}"), + _ => ().pure[F] + ) + } yield data + } + + private def setTags[F[_]: Sync]( + data: ItemData, + ops: OItem[F] + ): Task[F, ProcessItemArgs, ItemData] = + Task { ctx => + val itemId = data.item.id + val collective = ctx.args.meta.collective + for { + _ <- ctx.logger.info(s"Set tags from given data: ${data.tags}") + e <- ops.linkTags(itemId, data.tags, collective).attempt + _ <- e.fold( + ex => ctx.logger.warn(s"Error setting tags: ${ex.getMessage}"), + _ => ().pure[F] + ) + } yield data + } } diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index bc048467..9bc41683 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -32,7 +32,8 @@ object TextExtraction { ) ) _ <- ctx.logger.debug("Storing extracted texts") - _ <- txt.toList.traverse(rm => ctx.store.transact(RAttachmentMeta.upsert(rm._1))) + _ <- + txt.toList.traverse(res => ctx.store.transact(RAttachmentMeta.upsert(res.am))) idxItem = TextData.item( item.item.id, ctx.args.meta.collective, @@ -40,22 +41,26 @@ object TextExtraction { item.item.name.some, None ) - _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_._2)).toSeq: _*) + _ <- fts.indexData(ctx.logger, (idxItem +: txt.map(_.td)).toSeq: _*) dur <- start _ <- ctx.logger.info(s"Text extraction finished in ${dur.formatExact}") - } yield item.copy(metas = txt.map(_._1)) + } yield item.copy(metas = txt.map(_.am), tags = txt.flatMap(_.tags).distinct.toList) } + // -- helpers + + case class Result(am: RAttachmentMeta, td: TextData, tags: List[String] = Nil) + def extractTextIfEmpty[F[_]: Sync: ContextShift]( ctx: Context[F, ProcessItemArgs], cfg: ExtractConfig, lang: Language, collective: Ident, item: ItemData - )(ra: RAttachment): F[(RAttachmentMeta, TextData)] = { - def makeTextData(rm: RAttachmentMeta): (RAttachmentMeta, TextData) = - ( - rm, + )(ra: RAttachment): F[Result] = { + def makeTextData(pair: (RAttachmentMeta, List[String])): Result = + Result( + pair._1, TextData.attachment( item.item.id, ra.id, @@ -63,15 +68,16 @@ object TextExtraction { ctx.args.meta.folderId, lang, ra.name, - rm.content - ) + pair._1.content + ), + pair._2 ) val rm = item.findOrCreate(ra.id) rm.content match { case Some(_) => ctx.logger.info("TextExtraction skipped, since text is already available.") *> - makeTextData(rm).pure[F] + makeTextData((rm, Nil)).pure[F] case None => extractTextToMeta[F](ctx, cfg, lang, item)(ra) .map(makeTextData) @@ -83,21 +89,22 @@ object TextExtraction { cfg: ExtractConfig, lang: Language, item: ItemData - )(ra: RAttachment): F[RAttachmentMeta] = + )(ra: RAttachment): F[(RAttachmentMeta, List[String])] = for { _ <- ctx.logger.debug(s"Extracting text for attachment ${stripAttachmentName(ra)}") dst <- Duration.stopTime[F] fids <- filesToExtract(ctx)(item, ra) - txt <- extractTextFallback(ctx, cfg, ra, lang)(fids) + res <- extractTextFallback(ctx, cfg, ra, lang)(fids) meta = item.changeMeta( ra.id, - rm => rm.setContentIfEmpty(txt.map(_.trim).filter(_.nonEmpty)) + rm => rm.setContentIfEmpty(res.map(_.text.trim).filter(_.nonEmpty)) ) + tags = res.flatMap(_.pdfMeta).map(_.keywordList).getOrElse(Nil) est <- dst _ <- ctx.logger.info( s"Extracting text for attachment ${stripAttachmentName(ra)} finished in ${est.formatExact}" ) - } yield meta + } yield (meta, tags) def extractText[F[_]: Sync: ContextShift]( ctx: Context[F, _], @@ -123,7 +130,7 @@ object TextExtraction { cfg: ExtractConfig, ra: RAttachment, lang: Language - )(fileIds: List[Ident]): F[Option[String]] = + )(fileIds: List[Ident]): F[Option[ExtractResult.Success]] = fileIds match { case Nil => ctx.logger.error(s"Cannot extract text").map(_ => None) @@ -133,8 +140,8 @@ object TextExtraction { extractText[F](ctx, extr, lang)(id) .flatMap({ - case ExtractResult.Success(txt) => - txt.some.pure[F] + case res @ ExtractResult.Success(_, _) => + res.some.pure[F] case ExtractResult.UnsupportedFormat(mt) => ctx.logger diff --git a/modules/store/src/main/scala/docspell/store/impl/Column.scala b/modules/store/src/main/scala/docspell/store/impl/Column.scala index 67c1097e..134e0afb 100644 --- a/modules/store/src/main/scala/docspell/store/impl/Column.scala +++ b/modules/store/src/main/scala/docspell/store/impl/Column.scala @@ -53,6 +53,9 @@ case class Column(name: String, ns: String = "", alias: String = "") { def isIn[A: Put](values: NonEmptyList[A]): Fragment = isIn(values.map(a => sql"$a").toList) + def isLowerIn[A: Put](values: NonEmptyList[A]): Fragment = + fr"lower(" ++ f ++ fr") IN (" ++ commas(values.map(a => sql"$a").toList) ++ fr")" + def isIn(frag: Fragment): Fragment = f ++ fr"IN (" ++ frag ++ fr")" diff --git a/modules/store/src/main/scala/docspell/store/records/RItem.scala b/modules/store/src/main/scala/docspell/store/records/RItem.scala index 97b87d84..e961e8b2 100644 --- a/modules/store/src/main/scala/docspell/store/records/RItem.scala +++ b/modules/store/src/main/scala/docspell/store/records/RItem.scala @@ -314,6 +314,9 @@ object RItem { def findByIdAndCollective(itemId: Ident, coll: Ident): ConnectionIO[Option[RItem]] = selectSimple(all, table, and(id.is(itemId), cid.is(coll))).query[RItem].option + def checkByIdAndCollective(itemId: Ident, coll: Ident): ConnectionIO[Option[Ident]] = + selectSimple(Seq(id), table, and(id.is(itemId), cid.is(coll))).query[Ident].option + def removeFolder(folderId: Ident): ConnectionIO[Int] = { val empty: Option[Ident] = None updateRow(table, folder.is(folderId), folder.setTo(empty)).update.run diff --git a/modules/store/src/main/scala/docspell/store/records/RTag.scala b/modules/store/src/main/scala/docspell/store/records/RTag.scala index 27a04bf2..71b7b1f0 100644 --- a/modules/store/src/main/scala/docspell/store/records/RTag.scala +++ b/modules/store/src/main/scala/docspell/store/records/RTag.scala @@ -1,5 +1,8 @@ package docspell.store.records +import cats.data.NonEmptyList +import cats.implicits._ + import docspell.common._ import docspell.store.impl.Implicits._ import docspell.store.impl._ @@ -101,6 +104,21 @@ object RTag { ) ++ orderBy(name.prefix("t").asc)).query[RTag].to[Vector] } + def findAllByNameOrId( + nameOrIds: List[String], + coll: Ident + ): ConnectionIO[Vector[RTag]] = { + val idList = + NonEmptyList.fromList(nameOrIds.flatMap(s => Ident.fromString(s).toOption)).toSeq + val nameList = NonEmptyList.fromList(nameOrIds.map(_.toLowerCase)).toSeq + + val cond = idList.flatMap(ids => Seq(tid.isIn(ids))) ++ + nameList.flatMap(ns => Seq(name.isLowerIn(ns))) + + if (cond.isEmpty) Vector.empty.pure[ConnectionIO] + else selectSimple(all, table, and(cid.is(coll), or(cond))).query[RTag].to[Vector] + } + def delete(tagId: Ident, coll: Ident): ConnectionIO[Int] = deleteFrom(table, and(tid.is(tagId), cid.is(coll))).update.run } diff --git a/modules/store/src/main/scala/docspell/store/records/RTagItem.scala b/modules/store/src/main/scala/docspell/store/records/RTagItem.scala index 2782731d..35050225 100644 --- a/modules/store/src/main/scala/docspell/store/records/RTagItem.scala +++ b/modules/store/src/main/scala/docspell/store/records/RTagItem.scala @@ -1,5 +1,6 @@ package docspell.store.records +import cats.data.NonEmptyList import cats.implicits._ import docspell.common._ @@ -43,4 +44,28 @@ object RTagItem { def findByItem(item: Ident): ConnectionIO[Vector[RTagItem]] = selectSimple(all, table, itemId.is(item)).query[RTagItem].to[Vector] + + def findAllIn(item: Ident, tags: Seq[Ident]): ConnectionIO[Vector[RTagItem]] = + NonEmptyList.fromList(tags.toList) match { + case Some(nel) => + selectSimple(all, table, and(itemId.is(item), tagId.isIn(nel))) + .query[RTagItem] + .to[Vector] + case None => + Vector.empty.pure[ConnectionIO] + } + + def setAllTags(item: Ident, tags: Seq[Ident]): ConnectionIO[Int] = + if (tags.isEmpty) 0.pure[ConnectionIO] + else + for { + entities <- tags.toList.traverse(tagId => + Ident.randomId[ConnectionIO].map(id => RTagItem(id, item, tagId)) + ) + n <- insertRows( + table, + all, + entities.map(v => fr"${v.tagItemId},${v.itemId},${v.tagId}") + ).update.run + } yield n } From cec4948710e150b3431552e5984991020ceed9d8 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Sun, 19 Jul 2020 01:07:49 +0200 Subject: [PATCH 4/4] Add pdf meta data to extracted text to add it to full-text index --- .../src/main/scala/docspell/extract/ExtractResult.scala | 6 ++++++ .../main/scala/docspell/extract/pdfbox/PdfMetaData.scala | 9 +++++++++ .../scala/docspell/joex/process/TextExtraction.scala | 5 ++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala index d48b63c8..ac9716b3 100644 --- a/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala +++ b/modules/extract/src/main/scala/docspell/extract/ExtractResult.scala @@ -28,6 +28,12 @@ object ExtractResult { case class Success(text: String, pdfMeta: Option[PdfMetaData]) extends ExtractResult { val textOption = Some(text) + def appendPdfMetaToText: Success = + pdfMeta.flatMap(_.asText) match { + case Some(m) => + copy(text = text + "\n\n" + m) + case None => this + } } def success(text: String, pdfMeta: Option[PdfMetaData]): ExtractResult = Success(text, pdfMeta) diff --git a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala index 7cff3b6c..4663d1c8 100644 --- a/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala +++ b/modules/extract/src/main/scala/docspell/extract/pdfbox/PdfMetaData.scala @@ -24,6 +24,15 @@ final case class PdfMetaData( def keywordList: List[String] = keywords.map(kws => kws.split("[,;]\\s*").toList).getOrElse(Nil) + + /** Return all data in lines, except keywords. Keywords are handled separately. */ + def asText: Option[String] = + (title.toList ++ author.toList ++ subject.toList ++ creationDate.toList.map( + _.toUtcDate.toString + )) match { + case Nil => None + case list => Some(list.mkString("\n")) + } } object PdfMetaData { diff --git a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala index 9bc41683..89bb1f61 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/TextExtraction.scala @@ -97,7 +97,10 @@ object TextExtraction { res <- extractTextFallback(ctx, cfg, ra, lang)(fids) meta = item.changeMeta( ra.id, - rm => rm.setContentIfEmpty(res.map(_.text.trim).filter(_.nonEmpty)) + rm => + rm.setContentIfEmpty( + res.map(_.appendPdfMetaToText.text.trim).filter(_.nonEmpty) + ) ) tags = res.flatMap(_.pdfMeta).map(_.keywordList).getOrElse(Nil) est <- dst