From 4ed7a137f742dd54270d4984b11cd9b0732658bc Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:42:27 +0100 Subject: [PATCH 1/6] Add support for archive files Each attachment is now first extracted into potentially multiple ones, if it is recognized as an archive. This is the first step in processing. The original archive file is also stored and the resulting attachments are associated to their original archive. First support is implemented for zip files. --- .../src/main/scala/docspell/files/Zip.scala | 48 +++++ modules/files/src/test/resources/letters.zip | Bin 0 -> 35496 bytes .../test/scala/docspell/files/ZipTest.scala | 30 ++++ .../docspell/joex/process/CreateItem.scala | 2 +- .../joex/process/ExtractArchive.scala | 169 ++++++++++++++++++ .../docspell/joex/process/ItemHandler.scala | 6 +- .../docspell/joex/process/ProcessItem.scala | 13 +- .../mariadb/V1.4.0__attachment_archive.sql | 8 + .../postgresql/V1.4.0__attachment_archive.sql | 8 + .../docspell/store/queries/QAttachment.scala | 69 +++++-- .../docspell/store/records/RAttachment.scala | 3 + .../store/records/RAttachmentArchive.scala | 90 ++++++++++ 12 files changed, 419 insertions(+), 27 deletions(-) create mode 100644 modules/files/src/main/scala/docspell/files/Zip.scala create mode 100644 modules/files/src/test/resources/letters.zip create mode 100644 modules/files/src/test/scala/docspell/files/ZipTest.scala create mode 100644 modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala create mode 100644 modules/store/src/main/resources/db/migration/mariadb/V1.4.0__attachment_archive.sql create mode 100644 modules/store/src/main/resources/db/migration/postgresql/V1.4.0__attachment_archive.sql create mode 100644 modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala diff --git a/modules/files/src/main/scala/docspell/files/Zip.scala b/modules/files/src/main/scala/docspell/files/Zip.scala new file mode 100644 index 00000000..55d4cef9 --- /dev/null +++ b/modules/files/src/main/scala/docspell/files/Zip.scala @@ -0,0 +1,48 @@ +package docspell.files + +import cats.effect._ +import cats.implicits._ +import fs2.{Pipe, Stream} +import java.io.InputStream +import java.util.zip.ZipInputStream +import java.nio.file.Paths + +object Zip { + + case class Entry[F[_]](name: String, data: Stream[F, Byte]) + + def unzipP[F[_]: ConcurrentEffect: ContextShift]( + chunkSize: Int, + blocker: Blocker + ): Pipe[F, Byte, Entry[F]] = + s => unzip[F](chunkSize, blocker)(s) + + def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)( + data: Stream[F, Byte] + ): Stream[F, Entry[F]] = + data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker)) + + def unzipJava[F[_]: Sync: ContextShift]( + in: InputStream, + chunkSize: Int, + blocker: Blocker + ): Stream[F, Entry[F]] = { + val zin = new ZipInputStream(in) + + val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) { + case Some(_) => Sync[F].delay(zin.closeEntry()) + case None => ().pure[F] + } + + Stream + .resource(nextEntry) + .repeat + .unNoneTerminate + .map { ze => + val name = Paths.get(ze.getName()).getFileName.toString + val data = + fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false) + Entry(name, data) + } + } +} diff --git a/modules/files/src/test/resources/letters.zip b/modules/files/src/test/resources/letters.zip new file mode 100644 index 0000000000000000000000000000000000000000..bd2eafb102493a7dc75ce98e41764571e531d2a9 GIT binary patch literal 35496 zcmV(+K;6GkO9KQH0000805`T=6mIapo`TWMZ(cnbgl1n2_*00d+J004~q1FSec6eS2h+qU2PJ=?Zz+qP}nwr$(C zZQI=cWOlyYWGDHiX^Q0Lw!P^|Z%Oz=Mo6rHGrwX=yM0iCF|fwPIQiIJVL2^23cl#{cgiGdB2`(}*tfa4N9 zLeCl1@J#MNrHNGINJMHYeKI{gWDhe>d&8kQ7S*!r=3<>$i3&kL^U?&#uKONuk5AX@ zPIv+eZ!~!&qo{kT;s|I)TT#_j@^bvAwBlqk5j@^H(x`bZ6@;2G<0o(DSD1jkEK@?y zFhP0p9I~;t;>7zEq-<$XkrP=CJ?Z}OMIc1JJO_o6h{oi2Gnv{#yJvI~chZ9U5`3o> zmDSM|g?v)5!(@l>I(if1=BNfDPdomOzQQP6fJUlQfPdX5nXGm|&Dzc(!aFsln&}JN zfz6A!mYYBNQ?CV~$l28@DqhZ6B8oDq?C0Qu_&Gajmk&hm(wJ6vz#{%3x5$Q||8@Kp zWe7qFWgT-URV8kYjGHJTOfnSQuOF_v!mm!4;9_8pDV8F?oCV%=`KKO!rEC%Q*la{x zm^4=}X0wKkHO9~cnXEc2g8nq8R@`DP3oOJs}CLFL)&8@zf?LA#9 z=+)l2F1(yqncyckt;TYOFaLaxRIN~~L${}iN)ruas-Sa23*P445FSa`<{kLj-+sXQ z&5~)KR7k&6k(!o}><-4aXFA%QHfO^X28i2AsoxRg_R*NXusor=yVLs$z<1#ieWy>+ z!N*m$^*fruhlW513dsQnwcp2~)^ZPzFR&Lx@&c5Jt?~bTss9`O4?r;df04mJz{be% zzb1?XjPxu_tp97uL_q)l0}vwv13kz8A%LtsfTQhp9!<7dzt&o9w*E(T`LDfdz3JU) zv)Ow3*~@w(W6H=rE_Pk1ylzojk#IeUM~vWF@2d3V$YfYxSYUDlRvu0{Nl(Yb1dM*B zrZ+Ywnl%-zRfT&k6sZHF}Q%rXD;|mL(aFg zGPX1^JAsjRt9P+)V{8Dq@9gS=?(Aw`;Ocr<`b#P{GzGJ7VFpgu+QS3orQA+;9yW8bO7~G@lS^cs)Xnt-$fyy`jG4Fse*pvGiO3J>-yusrRjg^ORZV( zqdEH1{EOlA`rRrFGcW{7*UV@Kik_jazBa)46=S3DasmhV>f0%`js1iC5gI?i_XU{r zV=e|t&&>D>a&2l!VQXXpmXBm#=jiOn3f>_T{y~% z%;a3(^8C4@`QsSvV>OmXK~F(fDE+Vl{-&ZfwL8?eG_eDeclZ&LA~sw4}>}{rt0lAN}+AHM;o0Pp+>IDuc&G{)g=!8rJ zqJsA8;r&BVqQh+0vm(eyNXRh%>V5l@Y zvdk-K9v0G!*R=b{gU<*e`;aACCC?VNa-wy@T`PumV?aAeY>%w%6G}r%vGx*#Ad6rdn=!|(hir~5 z!Cp&oEbm(TkQ)#8k30vVUf!`9)(7Z+E?10LdW;~{vyr-xZgR|j6*69%!-mr$HV-Zp zNiuVvcrl@yJ@wV%&ti~8jgs*$`G<@FXH5lA4U!7~8f9N+iX5(|yfJ2W67WenS~mR2 zRXO$v&%i|f9@7B}2Y)g-+71<9uPYHNMtez*`G5~j5Czt>G`>cb*;g=Ar&ldm=tUm8 zg^l><*CFmnnj*ThOa&qDqtW(c)yCoX-GC1yB5w2CYOQSsad7_78t_Jt#paC5t8ZW) zU2pzQlYdiBVaTPLYSljO{rRM{I`X%;MZE*AVm7_LvHeSKAf;;J#Eo>K(eX(Euc{Op zxfTL!louyV79gP6h((NB+5AsiZ|oQ$O}v@0a5yb`er#sQfFHh*1{!tJhXis426K-M zuccyxLg@rWKa+k!k8olyYY&xBGWFy9EM`+V+uzixw#!yo_p+&$LF%3Vg z1fL;Hn;1F()OqXq`2GS7&2>N@WbHuU!@<9ym`A;>Q=1m+d~ZfW3T3dd0XLR*vGpo* zqA%p99P`!+{(;(VeB^#NL&IL1*qI(QY8d2MI{dl*2hV_f9 zerzjcMatRhVrjtg0E|@JA37>iPGTi>L$s*o*l4ZOLw7FfUTQ22Vq-jEP?e@bYfr3YQpU@m+&-n0Kd>D{y?19v|Pg z-D=nm7L2kK6M87B5?_6R^z|@XzZ}ILoCwPnC<9Fo=+T7XaoLNX_BL`Km&T1({ai}y z5G`zp=Cj24q*#q~mZvH~us;&xp)$2Wr!}-xu?BJ>FlJrdGLr_)CUJW6&zl4=c@01V z*jC+^^!0#CGOKNmI=)?^PB4Tz*sg;9$(EB6ve}O;rgLxnakdF?w!yJ(AzxF;dNV1r z?nIF^G4LRh8pc;`^H%&c-7XMZl0Jlpy_6DqC7;3X_zm_It&2NX#0L!>t^R(YfRs6{ zyQep%*$(bUdOo2bDeUG@KxY9a*@6kpKfQ7H!g>&AD(;^mHFk44b1{ZxTB8J9LZCAFSoP<`v1lVT9ir(Iw8R_o&G8N1F_m*UL@!O$cY4lepgk zYNSCW=}J{%yG?N93YF1Y4+|Y4IqXjQ0pEMP#}xbg_-JEPCU)cKHZJBbMI(^dXtUSD z)Ct)WZ!LXQkN4APpPLAu3BN=02w6yYrN))7#60h7iLt>=C$F>Kw z>cM_L%>ZJ;r;gtwSt>EXN6<`JfMkJgu&CzL+FP59x*r&QaT(h=_i+8ap5B#O0>N{- zAT=f|A$e8dwLho5e3-^TT~WjUyn#K9oOJ~|&C}4rsWGVVUBAGdBP_<$M;dG)DOVWr znsI@?WSQl?!+mJA#+->f($eX81i|GL_77jhF9*1GxzSQjN4O^VcZ9oEQ8qDyE}P)1 z2i%|xEb1c~{gwK#SW{vnb`*o?lMtZvC+q6o5^Zcz*R=5sIa)z}W1!DX4VL=^=7Y-) zR`B&DNiDR0{EVbg<-YQm83B=;C|%J7@F}O{26BG(I6L%8=OhuZ!zTRGw^VqL5JBU7 zAq1r>kDCZ;ll^WG)%=zQGbWu`ACyjJ(71@Yu4=pqi{*6T5SZuclUYh^gerU0e6aOE zc=rA;f*7c;E#{rSCH`}7HYkdpYcIMoFCbAA!)6N?h{7o@rNwh(WAJ#1kXHgJE{ZYu zTyvX#cVPlh=ICu~suX}b=AfZ1Qk4js77}C~uRFAZkBGMoj$yUBaOna+x1>`8zJo>> zRZjK$d?ijx_PxRsTYYml!tuBrC!A*k-ySNT^j8@_-{al=j)ID$EkG{+Uk#F~ zPRWy))r`0t)Ii8cl(`E^q{A&Hz>t6VSw*hnc_z8o1VN){q9RmbJqn-1y4&{lbmj70 zYQy;`^0falCr4HEptUD#4AIX8*KB5!x|0=0LL>rKC)Q3FO)_uxynE__@;#Odex!nE0wIeK ztVXQDMM*ow_ijxZ!d~C(7sk@1#Em0Q}*7S3?3u-q&Y#2f-|Va6>y9FtH|#x z_vO7Tv~{8L9d=3=n^n$y{0<3-7I-M7&WUYj944pM8hfk39S=>D#s%%s+o^wTj39_( z!D?X%K;lFbtPi=6{Tx~G3>xf_S|5OV z0ie2Gl?Ky8b{rKW3HqCBUWr0u3af^~x_ZcFFj=szjZ3=JSA?#VMio7J<}+h+e!c01 zROBkjilQ&%s7`44d8;Hzp^H#A&0uVZ?l}0bqMz@F6nriA*ERL3@(S5Nb8&c8;oUBV z4Xt>CdeKmk$MpDU&-46CKwgtq(K=OrD4Oxpr;G1`A2dHeVSS%(Qjq1BVYSsoCl zv`O+X2r4PUN?dEv?#66_8P&MYC$_H+AqZ@EYT zh0$x1zzeVLW^&C>|7KAHWxsL+PH~O;(!EN~j4M7&C(}O<{)S$e%yto< zCAkqhm9zz~xS~QjI)g16d<$hqw91jKG<;BZbq`)sAEDL!7jzj$D*fx$(FH7FVZPVN z*vI;tG18Sry=;r{bwr0p9uwS$w1(_5F-2=%+X)Np4GKnl%MNB8YC7BiH08M`s%zvu z6H!(u3aC_2 z2lq@|26MTZXgeo*pw>%t zD}*UOxH2T-`XVMX&)+otdRIJicth;G3}fw!pvT=tNO$BXv0>j$Ahx3uPAX_`8F&az zAL%S@fh&81&Dam2FOd8gZ~byzxq29tZaDKpN#)W3F&k-QDbJU}iMg{?2)l1eJ`-w^ zQ=Le(EB63771c`!9@g;6C6JR8-C5n@8p=Fbvqkp%r^%_glXhgp(&%3@o6!n&RxE!2 zgM$!;`blRMTC<32ldg^0XP=-I#`Qyujh-#*Fu~Dx9vybISeLEx9+iX(>BiPfyYnTn zEe}{bNQXc0E{)=0HA8ZChkq)CiLM?U8Gfy?N&4^R+7SlAFw`LYLL{Dso~8A}XG|9? zR8w7tWle(Jd^vE?q`z$gFWqKrFo42`(v>E2uoyOU^DDMMbGc6;74(ov8fNsRh5zPG2tWfOkXp;!yzYRba2X( zgR?b-Tl)r^l68~*sB;{D9pSwu9;}s<`nzk(oP@SH-yz&~r%%AZpdS0YJz3IKAA_P; zD^AfMJqAT!kfk!VXkXfF zJ>PggN1J`~`-HmJtE|O(B45nzBTuE{$ZZ?Bw#uzB3Hv4=Gx%~E`BkUYMsQ5w+$|%& zRVvf!Pi_L?YWb@@qFyd*06sbb6VI#PRO0u=5zrXRq=FPpSnnE!FS4!l0&B?POS8$L z$GskiZ*hBpCj2*VnpST@cz}lps^(4io;c@|Ve&z=I2>q=Kus$Ya(eJ^rP7M@H>F`M zh6aZ|hbn<}#rkj$+nz^4UDN^XwMmV=q2qz!bNZOVyPKu*wdXpd;x2WXbvo^(f*xYks3w(AM4vi9;zax^B zTT2yR)ws3ROSThqQQcA0v9zkvH1^_Zf|)iDe>Rr^P4<5lFL zES1ZV#B2oeAn|gvdzZV5$@V)LDo4(u;EuebQp#thp(-gzvV{_(5ZQ!y2?`CRY80oT zb*j~Bn7O4cg@P|8BKh`ws_Oc@!Q!^gFLlpp6wGa}#61%He#i+w7AGZz&G^s=Zz9c; zb)MqSuddv@r3iK|#6rb^K|zZUb#FUvkN(TOSnBm2yu+Z^s;>v@Wrb5&!o9EL^G3Ga zVOH?N<~&F2+e#MoLa9?iZCmWi1{4rxSvi-ySNq6$vx-QCFY$OYO+C_n7ouf1tnxT z;!Me5D1|fp4Uj8>PT2RPO0@ZqG}I?MTY#IEZB0~b(lOgJL(!$6l>c_;V@hmS-lcZ7 zOk=f)-B4>63mAs&8$EN7%L9nFqt2{1ErttMQv#kK0{qCX$ESYZzG!Y@ zAJ+L4Tgve^pJgYg_9C1XV<(*hMxPctvymXMMrWME`2j8P7P53>Rp$@RsO0Z5fnv zbsj1riNl2Xbxf;Ed<4$($F)`?bJ-rlzQeG52=&E&LzGc=hC3;R+RqzM?G}WG=;u1E znbS|xdc@Gg&H6&m)FiGSQk}@q#CVu}M^D<9kFy*)+oolMR2*JhYS}Df+pT1k>{^c5 zNW@P0ak0dCvky;{ONWS@&a25eJ)Y9V@~D=cSnMr5pjJC0*b+hf8tEaE(QH>4<)lMF zKGZOIZrzpGt;$oAJ04^kLs;32EkrCt!ymHZ= zr1&Q`-{6qH72Qu5go+UHA3N?ntQ zsHT%G@DbLFG+vam$#>=HWkyIJMA|fO=d`!T==GMQwbHw{rAC@{puPBcg|gNrBkp@p z?9Qlt^wF+lYRhf%wL@iyQ2LwvlqmfaN{?lv?|5;@!^2hR!@x1Y(%sN`wnPxa#4tde z%{vOHB#djp0Nu#cpOXmHd`-mltGl(_#@}(nlUp=r77?o`Yy4(l0*+f?*ap^ zTtfk&Sa=+Hc4KA!tO1Xdvt`UvE~74VWxdy zPX764*CE#|3uf~M_`;ovD?ocvP&#wvZeuNY{25i#)sblSy!GWkg|D`sG>|4EXzvVJC)`!E2H!caZekNP;}y&D3_G-E!TXga`#>CnC;?=jDG-lN_? zD+Q5sB9jyc`6e8YdO6%>+ozNW?JIJubPd?ylMI>~dA0vQNp5n#>AvvOT3WVZ3pn;I zR8u=S2gsFs7+c}JR@`(q1Uc-@TVuN}-O{Y*w@Bi9OQ?Qa57v!5nq>w? z5Uf^HsSvR>4B_})r_3m}mqO2z0&cF-(wEf6IZKLBpLmj8j0Jp~I!{ds7mqO8u^GqA zguE3BBJmTG+oe2MO>9PO-Pl+#PNYj;OK!YG+|myOp@9%qA8L?N67az%iRrbVZB1ea zqgxLoELl+Ihjl}HzINo6xKD&aEn|;}W0!GEANw@c*N=qTfmQGc*_2GO4?(?v`%5}w zc``D*kZ6jTg(%Oih_;3&js|mZu9z1cO24;;YBc&%B_QHzJ_Hg3K{*+#%qg_!9`Xm9D!HMU2-RiIA{kQzigY=wnR$o&H6iowKI-}dwm<~iw@K z()8Y4I?r&IHyJP=EQ_IG(pfTK&NjU*8knLoQT)JKBc|w@h=eZ z9bAS#%^U%Fi>%}r!f=iXvCTdeP0exhpsWvh<#QjDbDZuq86>L&3h;E4D!A+Ri$GDo!14D-$E)UJ2n^OUf3A~IXYws}E1O*QP9;Fe@ z@C;<_S_#oD?N~!PKG6Moh-$z*PSEQZ5(tmbBh+>q-wSvj1V24i(&qHI{FFrzB!_#X zQ~=*EqZ|pzdhlkT0AfDqFsYfBFy`c_PO0Wapv>*MC1gQ&ME= zhMBbR7=<1FXeks&eDOxjGLtgWUv~miU1oRTXXx5%0^{!e54o+-*Yd z=)j~opOKOtn@HucH z=hvjaBjS6*fU)7FEE}S*Za6V@LCh|sCMVs+akXoA(Ul{}=45hRx8g6Kr7b+LI>l@| z5Iato*8olzY&xB zQue(Kr0fI8@u-`RGoT6=|E&I4aBq~-?OrNXL7{-bMx9V%_*h8xh88h$RMeUK)%z2AI;~pPL*y{ zOFNWe#yEB-`$U0;C?HINdG`q|kmzRO9Uq-vnNZ{_u{CtO4>G?dPD#Xxx07FTL6>c3H3)N zPt&20WkCwyCH;lFaD=27eNV-Sgwa`n=}v8+XXp0b&b5hJj*mUmp!~M%rl2j^kLuyABIUCddFxWZ+R5N*~L%0H(Du9#&cD+$Q z=?gLp5$0MygozlXj*s3U&3*l;JMy13Aayrd>7_J-1ip{mAbrGZ{aVLLvH|d>KKjqm z!XPIayN^a1W1$pxrq(MKuDg?Jt}J@rpsm>a?|v^RrI1ll0|0!OV%wDcYn&tf2*IJ( zoMsj0zzLqYB0QfFqb>typAIbd3k86Xr`7T;pu97;!LUjv`HKBpTegWIp%f7jMJr+A znoYBKnSQtLuGe8doLI=|+a1(tl84z<0(%%uv|j6(ENfLbU^hmqb0A@A2qhSP*~_zQ z^kuO5yizyhe9~=rcZxf)|eJhig4%|M?3pAyH>;*bkF4h zBZ>CG!3u)qDMGy!T-+s#>PMymSpnt8uVn_avEvUKdE|U!t5Dhfk^e-Xa0JI4e)0ND zFTdc+apUNPjXC zqhZV<#a#E^KFS#sm{1_Qsb_X(ia^}&DQL(h7jQJqT%h~%;R(*t*jG6RgGo8Faly!& zSyCqSrrk?Mo-%)Esss@xKu*b4=(L>aOZ0>mkv6XTL5d5;!H=_wMv6Vh*h0|W5OGx) z-sxp$=gn^sWOp|g1>#oiHaG3YeICDcp~o6DL2J!!0iHecX~)3Y}Mrvx7$`>V;@rDpHE`RPB|n8ZsYto zS7=w;8T2z_^AlPtTtn~+z8k~t`0Uzv_VXbavqS%2{v6*+2NjiNm{rb_)W~?)#$`xz-s*xI5;NN3%hpOq4V&}$YfT!Ih`aDI$#gKh>f&H;kwm!F?d3J`5 znqgy81*o#yJCUv_fvCL**Td!k{jAk;Fvd@)Sl47>wd9$Bd@|SM zIP@B+Ee2}ce<5d77*itzzG0mGTUkuG`O&%u_Gq)~Rn^Ry9)l8{>Ty z#T;-M@44x7D56nGvL8A^kK-eOlXBRk+tU|xl}zngr8;NN-$0TNy@ZJ}9k(kXrqHXh zd3W1A{%Nt1>_)csZaG>Xe^McSpRM5<$r z5xLyaji}b`cBUS9UI%n;u)J1mVG;F7TI|LFnYT+2kzv0@<4i_;?qtD(B$&B!YOqOt zA>0Y>QKaG$A9(gs2;Jl@yj|Gdtg}1Y*CTCHgp?E%U{bHOSNfz(vDnBf&z-HD08GT> zB|jx?yd^6WRE*dWznjs}8WMoWOa9&#ggmigrm4{ss=}uI-RiUNu&N(Lu_{J={-j%F z%Xcu-B2s~i_^99Jw|5T|VH+B>WiBa^(j)E8WoWz9tne6)niXBt8?L5n&WL#Yc`&YC zaK2E*H9{h4sbwx@*{z=?1L}mY^2a%umuLaoYS^uCMpSpgEH%MJPU#!6MKP-!=0u+W zUadE1kyQ8ID8C_W>Vy#kbi8)pichTAQ*6c1&md53r4r--nUcKD93@D)QtGxuJs_fc zvj8Q`#ZkOx=*TcSX|a$UTA^KHKGtJ6%w0!>-LG?OR7s_gTOY-{wc+m?cDwRg+8%^V z)br*L0^`apQ`{JXLCrJKp;I>>0};2g!ty%H9Lh*hhs=?d7j-y&yK>Y2uwvL7Z(4U^ zi?-FhGC!VF5@jSBqFT+Nj34*~W|x0;9x~N;)vbjpL&rN=wv= zc))=cxAe!jLk*%EG6X+z9zLEn=9hW%1w&|``W>k5wAP=f9LB?b>d(8D+CV1j5^bmW zzUv%o?-z8@a?G;kX0L_?|Dmi%jF+(aofK6EKqNPsFfu3AK)%*rL5i`N1lt;UB^%^E z$N{Olov^TI>!;QOpe^VJSFCNK))-GWaO0qgg!O}#MrTV}Me@{XDj@{oF@h4pQ-0XS z!`09In-b6ckalW#`!4V}su{7>1_{WjbYQPbdcem0*Yac9#1*R#KJ8LV$Fe@Dgo%E65|@eaWfzvTI1avQ-0rXeM+j z2Ba3bBw?5|JeDjaltvOG_YW(h``+BzV*5od-zu8ZE;*T_ir!c!P_2rQMqP4;R46(9 zADda?9!l}1BaBL1TKrqFZ@S!HxL31^s`K}p{iV*qSh+CLemUW3PgIJ&N%zZNFAY8tY@cKI^vg%JvNtBsAnKAddvh0?yPyA*U z$%bfKrJ*L4q3QJO2_;8#MOVtLEPfiiE%SeP3BEEPiEbl+uzkUY=`JZ7RgD?bKLH0 zn}tSt+f8e_ioSr--ppv1%@tPa-X`Lj~`2kAZqvymzm4{D~pd)hth>qa`z1k;tEV?{E-LrSggYFV4`a`F0HXp&pXkvI+E zhn}RX$GuIcFWy3}NUFTZ0WG@0z}!^dN;U07obgt=3w3H3^)QSbv=i9OMVtSW?)5nf zuM(MIpRP#AN&4W_?pY+`aH)L^8K`YLqjPREfJx_gl~wlTe7h#`Q8!8*@UZ)mw(UkA99Rp(b)Dm3R;C1TlzA<> z3o}`OpI+A1Sau^&dETV&!`}gHcIf0mjrD#b{3&HVk|oVx#5X&AoOCPpR@qZK0jRW| zraxbD*FP{<@qicpz&w=WnGmVH`ZooUoU{%e-%nTz@Hl zR#oPP`+M8qPAV}^B~c(27{lCE-V|@$Z)U?$FS-kEwaR@!*xX|E3dP2Z615?AB<&|y zf`;$hzbp>~ue7xSHMA?8XHocaiLik@KnZP;u9)n*o8N$dG4FlUnS+PvM6WRdvd@61 zO51~8EWUf5+>uz{5rIo`?4?bBlC6Dcnrz?b5wFRZhR}vB5iUHx6nvJ%h><7XAicYw zl1M_$C%<{-L68$7X~4kYm~FYuX{SL=aXz*%JELS3yo2hdH!4VHeR|T58!ygqs_*v0XR5QUbUequ=oPBED<}*4A-cH|Z{35Y8w_=&Ddlb_> z@jSgWAar^cP4)H9WV({~(s0TeYBmI>YSU4^E zUOLnhHpBZP>6-)=yo6R)VTInf4mr4|<4Jv(Qf*7^Rh<8HGxk-h>22kdl>>+l#~|EX z5fQWL_>K*urSV0Whp(Ju)mCDBKsg_c3xm%%Gp!$imKz~F4sxyW4^!;U`$E%djE$sS_eq=v{YynCU7?RV;k?)z5QK`G)CV4&cC?q zbv!4)FNioH62kH792ce%R1cO%+YH4!m2K476G^A6td+XKoJgR11M<)<^8ZUabfGY2 z=O*meD;rDQiHn}2$x~nxVXFPaRm{#LvuMzBuddJmQ76%Cng`1Bv0~!TkB*wJ9BvGO z$R&n{MkI$oN9tx-k@x?{pc5`}nI~{y6{)dc$M7)7aZv}^@uggA*UE&t(=6RNWG9A) zYQ-2m##?bF%+b3I({v@xbb2|GDYi62?kvyaJqf|Ndq{hvKSMz^QI!coAW*`!1iI%s zWZWcMv?l;CxU!)KkK6PX$peCMr0kNaj**vR2={&CfZX4mc9^I#$*V{XT=PEE)5#)2 zby9?4>{t5^#SFumgnY2(qXSWEjHFkj=9OGx$xXM8I|zqm3=|#9MT@Z!Ms;UWv?>fb zEMp~=z2*fJ^E(_3xvIs(ITKdzOQF7SleJYxbX2y7_>9N+;ZFH3^9uy)P#8vZ<%o`Dz^Lt?M$WVpZ+-9aCVwp*`OBrwYL#&+R@zE_H6!=iHcyz0D z15YYX@MK-;k6kU6EBQcet|v~~hg0Pzt-n=}1;9v;<~cMaUYHd87#$|Th4ELS2SV07 z`uhwmf2~QpAT>2UWHw5~mtcbq9r0AtP=sTsYCKjKL7@C$&RQq+l}c@Y<-PjBDwvXd z=9{y~`kbuI)>Er9*%{PNmwrR6AYDMgx<&Gm+*r^e+|aq!@M|7AkS%QCJ+4*rjc$tt zqUY1i*a@%oTpCr;sfSraL*Pq*{i4`vbG`oH1}eGT5)VY4SJhG^Jg))^2Y*? ztDlFBeyibsKP}uBoF-kT{&mna8U{qgI^>hoRqX8AdKm>e6@n#|EcRoxBvxBW=1pKvhQ+(Gqzc(n0{|@GiJPq-A8mB%KSPp=np!TqD_;x|Lhn zDY7#n305n+YZwA;RzKR>3+WtCHb_3mQTrZQ1CmgX>J&GZhf9&+KU-0tvH|)c7W5Jt zbYjX$O))~F`vo-kb<{?X9;qvD^fKf}J{tS2sTuT|O8QJRa~hyo&?_k&5UcXU#?o9o zC%{i4n#{>cNcvH~c`Vjz!#{I;K! zQ*s7Y4%eXAjT(J$C@FVZ{~wbwBZX<{-HE6QZ$tz}4~G?EenXTzE%xY!i>O~;iL7%6 zQ@4&K;4t?oWW7TNS6g9ocF8^I-~9{lGfIdD5dk=f^0kIzzM_x-9q81zwrZd zD_?{pvmMVOa25V?-RX#o0hQ(eRFyi)ohi00{`*co5uFSGTs!8oaf%zV#FFc@MG@)D zA#}@kkGxYn7N_u@Bz%d&nIf@bo#|2Ej$u@+@>Gs4;5)w&%_O<{XJ{X|6b$Z>qKw>;@r2<%Uvd#!8!?a=XN=gFIa?7JJ z1g#Qg54-zFh`d6cL&-4skrV-I4dO89u_;i+qh+gB4~^C$yggJ^%cZ_iOwhR>6sf0TO@0d-rX0zaKPSGx6J$$sXT<|t44bLBsU}@Y&oiqmD~?I z-!0aMrd**VHN3lAaiP;Dlk549x^-!ZLwgv$5^#|+e{9f5cawZxEGO-&+tBU{9{Q2QKaDeZiXG{Jk(#1~`<~ zv{ zeqcGd-mgSy4dZV2nfO*jW?QKK<#;k(8Zbi2hj8z#_UW8s6|m*prXGXI{qXh_c?Vz1 zRRVv(l+X43!}ahlYqzy{VV2>e+x4OShUBPFs|~{CI1fBDs)4r zp^@^r-c{_C+G4i`QUg>6tCXb)4|8}AI~>@YMJ(X~OQ}%{OhX(c1ikvTQcgx5{ymLT z+AoXaWj5{Izgk^r>;J&C&HBGr{Z!Ykl2#1&SyQFB$NC!k)$sKlfEGfIAS`hbyA`FGe8XyT>#*wFI&KJ0;%&YBCWY0{@@bAL6RDauGk1*8z-*fcHj z!hlA(5%Di-l~a}4XWsGO0HOie(e#F5gn*;sDrH+?lUh-=yJ_Wb+TT})3EB-t@`LJ8Nsz}zeGQvI zRer%MLnl|;!fr{gJ3lR=9JEu#po(^FDp+wGssEUStznyUx1oEM&!6#W+vH25dX zi@fOB5I*h`yngf1w?jgVf?c!a@l70}0R66tb{t4p{oJZJ9C!m1u%~}>CAwQbQi9SL z59kb*pdC?F+t607rkSx;I=R3IuN|^bL7N74P4(SUjV-(Rg|7IP#<;%G&YtA*_(1qO z507Hhnv~Y7kg>aNr*pu!`?wAdphE$z#U6j}E{7PU_WV96UamPWu?8%a0I2Cf=lkgxAX@ zzuy;66&pL@g|jN*&4$5&&0SkHgM1{1CW{TP$gqwHBDld}i zEtA(RfTeMsAeIWEe{~jD=?%09)?FQkyKkA?)S{73%F&hn67U;C6^7SKsYVxoU=xXq zr4x)=?b~h2o}&yO$k4Bj(RI4%L~J0js+)YeC_)@UW`!vLN?uB{mxk8k6;?*AvMF8G z9xq&d=sUF<`u!l}>`wBvA~Q6%>l%0X`h?EB&b~jSsXoHbvt!bKwV+gx0g_S*0k4Z+ISh`8VN)BwI0QOZ| z&B`QQdpB6LZz1RIJ+RgL##l#$o145i#ghAQmOiM!vg|a*frTuLJSxi=xs>6^gH(Gb zF2#CX5M*fE*6>+A#umXvc3tftxx&}vx_1kVl?ijC`dsx3^Ek+h7?s8b*@8$TWtQiNR~ zK!6mWI5Q&LZbLEga*HG?t|#cBpuQY_-HR5r6lBo4h9p#pT1A=5O2ltbj-3RDrrxK! zw7gYoG}2+fwgJ9k=lyji;0uJb+aL@<9Jzf46H6PC{NlR90TE?p)LvxfKW<1hFc$y~ z`i$XDdF3JhWX`|^p74&XXimd{sQ(>dcmK%2n%WbfHq==107VSngt=ii*T3%r8bRc| z^*YQfxsg41_4BShkN;(i{G?b47de569=X~y4+uM)K3E(P=)-;5ji9U3P*`qo_$j1qo2Kq(%rvZr?;7Wb)_MPM9xsyTUoyf62!Aet9bK8K&$ ziIP`EzxO{~KRZaC`K~73F_ZeC=q+j^L%b2#uEAiuR_$nMW)w*yyA=^Wo@vbn_czX; zOKIu^Tq2cLAtuX6ki#mv+w*6ddfCwu&;REj^_d7pVqmXzW2f60OhCmcukht4x>3;V z!3Xrta1+a`i6EWtN3`u4;qrSZ9l6h0X-4SY*UID_Y(RhIT$Q+7P^T`RgbE3|YUza^ zt8k>#L!Q^a0!15H*Yfes!SBjOcx31!oX^yd<1DmN?;~ZFePxf4s-`oOyadQCT z#7&)Dglr$qLD7Uu??8t{VMv_bm`a3nth~aD!*^TWi(v!>d=(Ra-P|?D_+T-#;8l+! zpKxkOPZSk%a*n22%HsUBPr_>N8Po|0U?JavjDio;|K=uscOr0ypIw152)y}HLTDdT zRwq(~3Ln<49DTk!#r`ilU1wXDALv8Yx`@H8W%7^sN^hD1MJA(*#V>AQQ8=Ch)W4|_ zeIPj!xDYFH`GTAQu@a$+Vq)dmnk_ZAc#`ezedwOOz0U$;+%qUMe>x=fGisCd*j~uMeH1|ry=HSs7@izIYtc&N z(!LShzbVwJ`VVJ@P{Va9LTC-iY3=(sq-im$JO0kXM(yJMvXVQ2|FU5| z;~?wg_#1XB^e||I0K$t*@fNsr`Z0mLjiMp+OZ!q_&_M=$t!U+zjgHO6ZA>e zQ3aJMVK>PFbkNr{*9FXNeMv4aFa#fLQiD-b3ke>Q6ccSc(9Hik;$D+b=AY07C>DZR z8sL&QRu3gQ89men*ghaE;!=6N-UCZC-+}f5y zplBaBKHlXoyGJ*%@C_-mYFH64fh(Yjao$PSVrujn5rny$8TnY#;X=)kKHrp|UvO_K ztbap=B4m(5*w!CC7XH`0wG4As>Pqv(mKagl&kVB4#u!@agObLcwh#O&*~0FwpjXT-RfJ5Dz$C?c`DDiz3AD`u>#pgGuCyhcQ%_b zEazU8Q6|-aIj+JIQEJvT)k6d(;(?}yWu5e!Uu zb4VGpG%d5(K%gwB-}&VJYP9CA0VXuwv763t+je7SMl5j)7Y5USchB{vdJ2zqjWi?; zsL)G=mN~f_AgH0%LRxkb+-WtZ^aBNe;QNsbyu5qs2_4GBUub5ofD(B3N{n4o)#h}lhxXsTi|G?bY$LcsE@T9Pqm!e0W9$wYcRWNBb=pT5_LP%#b*Vwy z$)jKAprC`zIdq-%x_0~2m-0vmMblXwhO;!2*j$uLY}w|9yd`Pg9}392+R7M_rPoh7 zCAo~t29s|{j4Cik!S6JYF3V9PX>miXa);TdZ#>N9AYcW+R34fu$yCZ+@4Et?9m!J4 zWLD=;Pnh^j(~7ok7>xmn$&t7oMSP8*KuRFTR)-H`q)$r-&VA z8C3js;(4B~{oGRWGRa#unFJ-ifXz0$C||dLX}PhczX#Ia#240AsDT_0u#oW*uf!-1 zGLHp*MJA0JBW3SCXET!zQw1<$_;NhQ-u*qSu|HRu6d&1*No@sMGJb5tq^GhUIbuH> z-Bwws(8e~G+~C=z1V168lJ>p4^2kns?i^+T5=Xo~c_2woYQ@}tu-!mQbwpLkKt?aS zR$PuvRe|P7fa=WyiG)E*r1wX6i{cnw@jT*1meiCjp3#fjd37Z}FDg1XFS_vx5jkTj zg|4yuVQkniN$O~-`3j`LawsYuU5cK%VcN8yl@cil3y@V3em+QwbxKc{iY6H6O+e`S z4Q+=K6+Lq(o$jtC+X7}MzkYAH@@{owt84~`X$?C5`b*8Npj*9e7Q96m(rQ2~*?#ZLO)U1pG*{)WlA4WbELFZvW;4ua>XPFma?*{2+gP($iMKYv3B)-MBEvJ>U%`;)xu+$fC0!^eg7)EidC zI76r!q^X3Fw($p($HG9rsfKG+O)o6O6(dYQ$s*8*lTBL~#SwOwZrFanrTftDs%`I zyl3TFtbbjZ>z=^Ts~H2rpf|uK!qZ~1#0s*|vNAzl^Ks?p^DWF|5#yti8)$Gzce-bS z+U12dxK-;L?oA+_>AX4E@V$yGQa;s10t+V5HI`qe+>wWZ?Map;*|>DmUt$fea8I5Z zlj~pl73NE)!cAuM5Ot`4%RtICvZ1k7^#X(o1wJ4DIy>}ID!^A0Y=Ien&A|yn^1Y_FGh7qSx=efZi`a|*jKC+>%uN~d(6oWGMK^IZM_ck6GG7R z0ASzOe5+?1SpK5Y+%E^hmo)ufVM%ar)T_rLEB=HYGl?7S0AEw&Sv$NCj=si-3)c3^ zoo}XWU2fj@sP#6sBm!g(XW4BW-qYSM<+$-)iTUPLjCLF>9U4n)0DHZQe(LnA*lR9c z-#{ZMl=`I>0YO&e&{c)}U2K|#hvT9YwkX8aZpQ2{ z;kE95+JYp$sYlxH$Dz^S)hV4Cp9sKWnFvmhxLZXJsp<=6Xp?V6C(>`jutEn7uCbiX zLr2oZ3A0P`1#iw2!l`i=#V(%d2_k_H2}8ODVrp6TO`wNi)`%Dg?qO*z!4GK7&&L;Z z;1@K*<1B7vCsXq4L>?+~Tpen>R}{pQK*^`+1R$3(YFL7cvYn7|*zQR3{zXAP&?0@a zYRdqbyd{;VqpzxAAX;RFngQIdl`q5p3lz|Ac^spYduWwAy!0X6KNIf_b>plO_pmky zey*zdPS0v#kBUxQyH8tQ4H116&L)0{Qdh%+$A|fcEA|mGD8~2owDv>SCe&ztNdRTL zEn`6X@%o$i_UV4zrPw0(B>PUyRT4MCMsT*a^--+TLn(KQErQfBXj#bSOhR8q;*tuE za>%MdXT033X~dk1X##h!Yb=C5g!Nt!Y`xuPz+R656eTR2QW(YPKjxxPJ`s5{?N0F! z2BB7C23nkb1rw57O$aDbh52gMb&i;s>%4gw4ev{vCZ4w|%@mQaUdRdLA*gqaYx~f% z7hZ&v+ee}c5YCrO7J~LQrUvLF9?LzWu_Eq5xDeK2wlhxjW)TOML!!r}AYjfHIv{H6 z4%DJ(|9+=oNR;#(l88U-;C2?P^bcMNU4UvqPgoFmG|@xtPay7RIY~4yYA$mL4*|(& z{ty%xRnZdj$8xC$@;UEu(my-^7oNE{3o85K+JO|+dliR z85`O%lx=;4gwB8uz@0M1@!F^pQRl6;&@pTe`qLhQKcCF|lqPNAHlL+`nWoB;;xYs2 zMAh~LY6$5l1BEPs!nVE8+*DNU8U2{&B8nIsq61=S)&=9p0sabmbDq;Lw#O{G2$PhV zY_JrWQP5LQb3#fVTSfboLG`y}`NLFQ7;||T=EPjveKBeDL!rh35Z1hH%)XLLLxl#o zNZqQ7g^!mG=ha%+D=HsrAZ}gqlCUwL1(`<_S{n2|7eMd95 z_Rjh?dE&b?-+XNR1)BT>{R?i2iLl>63JgsY0xw7XE7)|KSUGGmD;12c#q^?6Wn;*AkWaFAdaa6XBhMzTx|zC-Wo8^By)#}fY)nd zF}dvaj3Y;1E>ub_nF`~!yd|L-NAa33RFkfUTgN9&G*>T_$%CwYXul_I_%6nG$@myX z!h?6^MoyNyzF;%_6CIx0mm^BEXctg2)5O#7dZoUdBT8LJV_@3AfQl+$OMYywC{Spn z8>R~}S#EYRW|Gz5DEbt8h#YjJhbQgbdnV5Qq;6R81O1vIe->@@OUb zGEH2ub?WB}kCWRhDMhA%fZ<{{#-A)?CP9X*>%_+yM>g`KEmen1&4c!Dw2S-vJu7BS z?Xj4NfqRHwR{33HDO3}}4E-sMRHfj`+$whT)n_gz6Iw(A2Q}&Z!bxz7#jh&bCp2O; zI=?OBg0Xiv&gaH|C0}Q)ywk5uP>G8!!e^W;2`fe$lQ*9;{OoOV&ITFExEvb}(`)5u zoY$-;wkdwNw-~2}czJA`PLP%$2jwp*L@2#AS82v%D3|48==uBSfE~E%fnv z*?nGU5@IxRDU_80ppno{BJk-Cd`F4+KdB1=>1ExZ%TM?dHxz~)yndp}lU@5Of30>U zkVK%ysj+9LbwXVl2KbBZTB4az7e8HHV@SVxUX$cTIUvVlu80n&BhXA zlE{0ocrC06&2;I#5$-&Of_`j37 zWT0neWBfmqy0mdKk=NO2q`AGF9o->p_p*1}!(RWdgT9rW-@m;bAZ!PRzrFR~*y}jk z_N>#b;iDxP*Q%bt z&JMs|AA;SFV+`ubZ}^pF`v{jB>o4~Q2fUN>oAfC){zAtG@S_Kd3BVtJqSxG&lhc^N z1Hn&sVSaaf3+4z&&H*Gq3kLwp1isc6+3rvC2h7q0zUA@fH}Dq}@5b~+71|!OqW7`> zhYQp->EFZ8uTsCI*F!UN{d@T=rw#smcR(KCukZHdw-)pp`RG>7F9yza_s8*QNB4&_ zURq66R9Q9kyhro?kLE`I00M+d9gunZ7s~P!=-X~TuelQQeUIuHpYczF0$}X#Q_|Fy zR-euvkn5_?HTXww&yTw3PeekT-SeZ--titly`6mn5Qn?F8^9N?mhKN-2igrCJw9lQ zuk!EagzuMc3k1j`fI40^4TlCq$YxP%aO$Td%9K(+!?aYUB|Vj$Ag#=k7M6ZMXq7+d z$EgrorqEjsWie0Sba2Z|>yd3H+4O-RIVfTRBT~GGw6N?>8CCU{rB8PmD))*whEBg3 z@`fhafVNM!%cQmjtcp?A(aevl;^SJIGMZuv(MWT30Y0iBqn^?4j9^M%I4q^)F92xM zq;fR90gkGh9^GeCBP4PhenYgQn2@=FmKxr1Gj6`?q`>Y-&w+1#sZh&?DU;QPY`oLs znv#N)mp(VyT=ZoFJ2fUowDe=~bSj;4o9K(cYIoZ!j`k$ekk^CU9YH=Uxv>C%buaBe zDd#;hNpcy{rNz6vqDHQb3j24!hVOpAb@}n>+RDa*Il*iysY1LN@d6_LSEtuuuS!Au z0lq+cSiar0KeQA495SJaHBLBbx;tFhurGiB3A9ydd`V!1hacM($6NF{mVYMEKyk&7 zn$Nx^^ffx4^`cp68aoTQhm_94jbC@Hh!`_OEuBxD`(aTxq1wI&0QUhq!(8}Gi=)e| zlNJu4y>MFDd^#@C+bG1&XZ=@mk3|TWN|a;!>&EfUkN$m~O% zWXXCNvS-ieC!q<)_b9ezbbRzf%_8YQRu-(;7 zu>Z?SwTXjP&_i!fqhkUpbPA0Tp18OVq9#M}0|1AxTN%#7YG~&>2L1GHI|l9SX-t81 z(w}-5HD=hij3vfYLi>To*eQ6{l?ijjTG7F3@2lM5%F`NB%P@Jtel~4jY;SgN!K+#v z7VVfMBqb)7!pp4^v;yx9$9Z>Wpv4j4##&UHJb`8Dq#I9Y!8Mdcg@yhscE=^pp6s(HP*3VfE3wzU!sWqs zF%vPBzbMfuIDnfbz)#QJB*3m`ecu24JqKm*K5LLy#%;;wV=)s0cUqyouNGnEJOx8j zs^M=oYBhZ)2;)-JRB+o0!4Zkjoi0~iHZwo{njtlXR5oH>gMuHNKP-TyI5c9c5r`d!1)>lMXKBzh@|nmWltJg^!Nz>`?Q^nL$HNOIU;V zRk_t6DhtPMJ&AUlRb`H*Gt~6oMOH<^?@eO}b@dI{+`c-R<~+WYVL299{JjH?TbJa$9k=CAY+K#IV(}nezQSj`+zv|BuhWTp zL^VC=ou&uL1!d68ucp;n-^)2pOQWH2v^4PYsK}0J%r$FZbEbJ$6P1} zm6-2NqVei+%pn5qG#BcO7^~QtT54(_9;7%6QY14+!5gmy@44j%(q(?Na>x2ep~QVS zO@JN9-sJD>P$E(QgfwIl75B++L4S1~MH>JmyYW;sA(cb~@OqrTtmhM~Te$un`!&aR zY&xko#2I-QG5(#DV^g}NormcTVXq41^*WKyc}hw-{2g@jm++jOrj{gO`p($CoEM(? z5qF&FiJQR{8`zI;1t(L3uvas0s`Zd`>>-q;#a_l7rW~mQM@uSri2=K$Q>`zEiOGod zwu}fE-6ecMp2%mdHWU)ZfO?5ySII*5tU01-d$3(}TU^1O_6q7$gFx{EIXNe;kt)4> zo+<)9c$7yyw~@roHqXhG>*(Nxh^mCocFB7DFlv;H#=&1Tm0*+v7NMH)v`CLl2aSjs zaD=o^FQo)U5J4f*V!T!Vi=Wn0r z4;Eduum=Q7iZX}w?A!Cg^*3aNk()=sxEnZ|=mEIU6@3=DUn?$M!R2;q$V6SS_tYZD zzI4xc21Q=j!=#YYrIALqHiVMi-9ThN!qiW}c47e}XN zLSSi^4br9ho` z!!yR8SPnFz^+j(ePEE?58ZQc!BVi}mE}nwS8>%o^L3--=aE9H)%NW-469f(jKVPKE zp*S3*+47L;<6~U`?{+=us^4NE3zEHJIfR%j&?n69MFT_rPetr&@Y>-^pp@&2v`IES zG@4TG-q|03I<{Ci6Y3DH0_l;sy$Qjp3Ya%E|7i(8snjEyE1HE3%|HdFciC6?5ooZi z4F`Nj^)*{~^Zi^cU9bV#;t6tTznzYN z%!uYHFg?^3R&?=-E|la?%HxeG*!4$xGPES)FloW1!N9CM-iydLUik*5T~L2HDQq>a?DV?E^i;t8TUSJO1%(W`(dE1{Pv)j?`g(mY*D0 zgV|g|yJwY_F+xt-W++LhFC3xE15bn7{zztIe(Id8TxB7dEf_Cse*}}v&^`-!LE%}M_Nvq?Pxlg9Z-T()@bMJ>7ex$!|>dnvviJ`uB zv|Nl>IYc<4Le>54Sq0*JHW%A-=oRVf3k_aOoa7><&`8bK4a(91V9;BisCnfrvXr&D zB{UX5v>5gSU0>4+4bsM!R2F!?`a_+Af6i&m3}hJa%{W|4Q7}$ZDMzzpJz8d@2OYy- z8yuBtNAG&5!*vWN#rOY8he8?}YwoGEp-~m`Jh|_L&=-TlW*Y9)kCRk%#7=HUE?0B5 z20ucy*<}iVFOu#^5V|zMS|0;xLCGv#`jphYPuT&YP2acQY(8WR1P+P#Xnpe7Hpva` z3pI&U@0Nq{pcwcvG}0T`bZQ8VX9F4S*QpQ?bdH*6SbWr=S?esrkJ5Tr!Gn9d^VLH} zzXzk`c};6;3uIWF$Jh_XckccinQqm;`Jp|YZoH=Vx1c-soQ1emDdETG-?cb-OGGYY zRkiRJ4EJpREMrX9cuug)Z(9?e>SgAHWcmQuAY+0S^8QmI{(Sz~Ccj2DCc zy`*l2P?%whARpauDQg;D63@pgq~;=8%Bm5ica{OM?6J_Vb@jdE#4he-Rrv#>$Ns6% zE2;`Cf=V>o<2%7Xe$Ej|M1pEF2F9b{*Rd>*2ZY0~{1Z*u7)#3Dr_)aARUt#vF1H~} zSOOehx;s@a3zs+^w$bJffAgQsU}~!G&L~8g>Tdz}og~@%_r@5uQ0MPG$(;EO!mi>? z!jZpv;{3MU{T^%n9w?sD_>E$AV5Hi>Eb6l{9y}NmcJaxzeYlYGwD!A!Aa-S?AR>?Z zGgbGo`b~V@b1$lX^6qLAltF@TwafLjoloXbXV(ntH~elr`eXx*$g4@To(lIP#n`@9 z)ME>R;mJhunTj~m1M#=3jfKm50Bb}ylAH+3-*@zMW`~?SHC8FGTYagSqkrg5zaEt zT<8F&1IvEQ;Bs`ug}bR_`4PZmQEc`^(jo zh%XFX8bTLX*LK^Fb>Mc)d=J&ll4$4bm46Z5ZHgDP&OX=X*lU?)&qdQh)+-0{j3(2M zkc+|g@MI3Ne#* zh!wFQG`gm(3<-X6kv+I78Xhg&FnD6%T0mT4EeAF4IcKGGb#e`msBvRz0NaV&lER~l z1A`c&<0_IEO(21+4`M1Pqw%I{qFyJ#K|(y$c9E5mnIqYPN6tL@K3i1Wa_{z5;)^*i zKQF*<3eONa)zfeU&7zw9KJZl#QMmHSy=Q<7YtMf=Y+XmV^xI9F3Q8x=KvuwMW_B$& z_MNOr48zsXHSq9z&!YK@jKKx_nF`m}Z^?Y}czBE^+&A&S{t=&uzIpTa1g1eBx_#b5|w3&9(=R?uMmAg(`JL0s3FRX^^6A$ia@#0PmMdz*jq<#h1S#ZNM zYFUV3YCLb=FZVwpUm=Q`0sxctU-rS3j#N%rJ73vg%^q4>`lL}V^d~rjz%m-Gflwx& z#Ndhy*saH?PR(E8l&;;$=lk;qPf?q3vQ~iW4tU>&beH1dOC6~|Ih8h(25$^-71MM| zq}}J;S-Z(tw1r=4K8lSYWUW02-;pzC#f0?iat)6J3=eX^=N?IvPE^NQLW(ci%p(_l zhx08n8H=hkD6E8N3*%Jd90%uI_E^)HbBSSI&!4r~*y|u3q@0zR3|F0$Ak%cmH4M-3 zxI}QK!{qc_$RxwB_~{O9U2C;z4%<5-okP?BMfw=AgEXQoA}0R9qScr6w~OIC$L4g* z`TJ^-hxfs0yv~`MhoBtQjffjhIO_32j-eObX^&*(LY>5EQ&J)S#O13==Bi$$)bRh_ zh}37%i=3Bh*G<HTQ8LItlFYE8G}{OHCR2$W)MGrR4Trd z#IC|@2%dZSe--$KFjFQ!_gh@CE0XF#Qzd-?Lrs!+wW(;K^<6wbWi(U0o~(UKkh64M zt3CKtk6}mVEn5zSQG&*=)<%0$T-abK&NiQU<@iRTIpZ&`lvRh)s@-e0AreAc4z>wR zB4M<=X&{BJ|DFnNauBw*#A+ts5dQjIeob5(du!EL=w7nxk%ZTnm1)%4_Y4A^YDyOE zq7W26Fh}4SVtZ_Q*{iuyH*G|GYU( zocjVDbaV`GOOx3#U(XgpKt}Q4treL9KZphL&^)0#RhM_tr`mj}z{+_k$jI)X&$xt% z%pUNfYqp|H&cqryIur#fdM~5VF3BhYBcu#@vCPge22ll&rYvXNx^OK}gO`aVWR%Va zf%8Tw|4g@Z?tI#7JBN=D$=H6hR#U4ewJj61@F}6+EsiqDjqO2vhm|w8$K*Oxe`~A@ zNTQP>IH9ld#}9w~TY1#ZkaB`r(LyF;>Zh@s%IcA+Ni(s=A`%9Je5P;VxiNw@K)>FJ z!93`_3YLCP#C4y~w+>7uN%hl39oHTKKGDk#3IT(wc+xVkYp&G38^x6@A0I10vN+|E z`cppgED@!|JN%GhLUF9vlu0y=Dwcy=e(c4{5dKjma|QfEGHm>C4eP_+T=SK)dKjTJ zPwLP?AXO;X%$cZ>sCOK)z!X-C!2gg}!k=GuY_w|*K_$TAId^|7#y5aoYq}w*HOV)u)Z%}OZ)))j0%M-ZKF;F63Gd(9k*_Tx?eEVu{KSwvd z(r;M$;+j0Vh~xo7GPzRIuYsQ-$Jcny8R;xte=(Yj8orWDd^j+2^+PG@3S%*((qB>u zrm&8z+5TMl1$jv&n}gP5`A8P8p)BBO_kG{Flt4f(yhNoZbKOmZLPbT-#I%ckb1`Qi zL8tu7dO7!B6SJ73{(NxOsKL#-^pN(q^jENMEeFVJZcde-pIPiLZNl8|;3M-TmUivI zeT`9eK1rA@X)iT9PrAn%@|>TfEh!>45Z)!$$Z0Q&vb8`jxqAK+3c-*R!aBR$9>Rz< zhNTUYP8KQym_J(?gFa7Vu?bkDCyI3EL5F-90%z_3tQE_mjI}cdHKwJO?`Vr6V+G#p zoURy|sqRaT?u*G)Qqx@IT-blMGGOFoMEP0?N3rvrybjz@9IUMVjOkGi_L2+lv*Xvk zp3H1rmt`HAf{Ln`nXAn81ik}0WJTj=U!VjQ9va2#UfrDOXe%B+k9bM)5fUJ1G0>fl zl6fLwITwY`1$#TNfAe*B0D%NfBTqZ0&lJGsL36<>&OY#<$%g-0(=qgl#)DCc|d1x)xIhQVgxN|yB{&1=tp zoNs7?EwiV~L=VimAk$lw!#5a0nGkm88f?|9DE`Yw@;=@8Vm|LW3z!PJ*DP(laFsa8 z_juUGaQ1KEasZnRKe;9z#sX{&?z1H9rhg&c9xboRDRlHbxeYz#cbT9L9!4MhzD8vL`oYkADym0sTUbnyVZ5pzY(j&N3W(fJVGQJppB{ zsMCh@tW^RNJ%+WvLm5Srk``&&OFa<0;m@?Kg3%zF#0PW#ID zK2lJtRHg1CoUgVdLoK656@wW9On;bI(zr>Pu!stmP%O?NO$+7!&3qE9#4|QVVo4i% zB%8UOyr41HJ*PvAzbI&!$wV9quZDjhky?(8)hg^-_f%)-Kz7`kYcD2)5t3MrjOYC& zIp=7`*kNjHuEq;mi~U+$1p9hm)bi=7;-?oS(#uKyIZg)X788!d5%$Y|#}QZWF6mQL>4* zVwb*BmS>FMiHe{UkbKVp$^H5S$6sLE+8zVU-FqIiZkH^<^_T15+ zqe_YV?Fxw>9VSNZM5VEkrR7qMY#J!yek!BDyGZs5fAG(*SA9hdi}H=}s`RY4`esbU zw?W=a^>%K`f>)~9Wij~b*Tp(%GPNG{-L-?xTIvJLVvn@gzn9~^qwoIp2kc~YVi#NN z7W%i$kVB*utIqW*givD!DuMKS*Gz?<6k)gEr&-Fm=)%bVf?_Zw#IT%AZ0G^`&J4A? zsZ=ddQtRyj8zvrz!iRoTz*Wr-9TG5T%@WGq;yGj5d6$O7joK+%mg>EMzNxpXqngUJ zy?Ujx$WT->fBUOtCn=~tQ4@)N5uoVocEbxwE@3oQN-K0GLtmF`d?cDelx%Z^BEn$~N&x94qFPruP$Fz< z?-t&S`x2x67GR80fl|Y$H%*`7?Mb}ivy-mU)a{bS^{<8KpXKvr&`zW$4Joa5J2r7P zG(n7Ns$)559EGb4N-d}loa_L}n_|BdtcALYoN>EIl#a8WJ3Kc^(om~pObN)5r)~Hn=n?hdDuO!BNRPaBN zWMf=>HVZlSg><|7j6=%yD=jD54OsLty`nonHWJ8!Q8^Og)thBvEAcRG`GO$|;z3jp z?sFq`f%r1Iccz?$&Y86o?v?y2_+?)tJv#7Htk?Uv`R@XaTJei14h4;T2(GK2?sR;? zjal`mdy6pnVnzz71}aR=0o`$_IA3&ukpy~UxMJeFfjW@)9N*EC)8f&j5B%~;3-sDk z!%1|bPWa^s*W%bVan${`ZQIMYI-nZ$tBU42eLGmyWXa8 z%pU4Z&}l7?%bxU+C7pADktUxdiZBLd@a{7f@hPtjhShvU;+Powq-F|c zK$jw0+itq%*j=!^G=FKdMOhGaQlYlr-3?*o#4y}DdJGf*wNb&bLGKeDEM5LZ5iXV0 zol;zF%Hu(W{gYp7^o*wq7iOZ{9b-hsW`tO}q1AS@7}1Idb$m6w=L$9|cq$6D3t_G3 z2x3iZIU$tkpq!f3?hrnxJcgT%!%@Llem&mxB-Ib3t=5%QJfjW;6;k=^0B# z3#@jWdUd$@1m)_W(MKSw|7p@5nLL-fzlVLv?zwUzu5rHx*{pjXHy&L1thrw88j@Zz zCxTk{ZCtqXOm^|6)&HIbsB2;UG?FOswW4cDpT{>C{oZQhb68E}NnEu(3oC)~5Xb$- z(vz-rb^VG`WGu)kvo=HMD*}h3ot+T*(iFgu9*F@nNh7p-{DsmJd1nnxuZ2~dNw~l6 zgfbhxIOSKAR+&B5CWQ=c7$SRKbaJFD3cIA=nZ^vwTn`!dqB0PgQ-~NaSNIsT5EO3;j1K@ z2H)jq^J+Ey#_K4pqMDkVgQG$IINHX72|$$CokH{!sg`)sAZfAv1~z{H5NBYJZLn3_ zTHbf@6-GbdyHjs_OGQEpvmRaTlI*bIwBE=wNFwX!NARY6^CWDv zmiX`xv6Y=1rN@skB5}rkYiYsF!1z0THF&UixvidBRex8YuJt8&rXaKk{i_gyM{yKa zE2nP==_#Lf8em^MxySls1~Vw~kXbToqUhecptvI}TX}GILlhiw%KL8mB4&4+LbD8WrjM0=*q`dGk(;R?s3I@UitT75}w%L5?_g6;f8S9$RlCxAnTK$_mHi8e!FW z)WTi4K9n@P#ldGZsN5JOeR$S+j6L($IN5ArjF$0~8qFx%rX%E)R9>zWVHcNd!(YW$ zwj6N?QZOBs=9m@d)NUDX?XdAsZE00Qo9{rEZ-cn5z`u9waT0gr{8Bcy7kPN+88%n{ zHv8k1hpNcfan<8fG0f8AJwHrL)j*z_TXd-)=Ah5dxO29%MG^}ewKU$)MW6L-s`lyT z2dTRscEjf%x7VPqAi5$Z*gGEyQExV|{GQdY8?h=#Lwjv)wE}{SI++5*G*lu7`r9>|^ zOkk+fXyrv>8iCmBsHQX<;OmjiFVxCsqRb8p&Qp|OT5(c${OQ^}&NK+cZPPnQYO4X%= z8r_0poccSkRFw&~(LgE0k=eIyV$ECnYXGdwY~kQJB*Md2E9KER{#djqM9;NMuzQ=7 zF2+ly-j?ILjwB9xLE{ie)BKv6S}YS^q3v%dh9^Aq`AkMZyWALfPfq;B-pu=osmH0R z+H&{mgbhs+SseEf&5MsjyK8byTvcH4+;w(bf;W>~LLWx4!?}=3orC>T?)@>(m2_#m z+#!SiC$PR6`PjQ_^%Mt#sd~d2V$q|0M4_VX!QkA}qz+%{QtzkL zKeFgQ0yFjzkPb~(8<|%Z3A-x_puSSnQ)2e|j)~fQTV8m}qKDKvl4DXlefw?%3s^4R z&z>HcWEq;)&+=QU%}`jIT~qMeO&x*-hLq?m>MM;%gDOrWM}D=mzykpxt&c znK;}3?Bgf}(9C#U(#miUSn3!r=VrBC@?M`X$)fvD7miY2%5V#P|Ky zaub)qOK`V8@@T|$FeW7QB%kVVkzCqRO5N?*(|tzymZ%pST%gXMo-r9ujqF{p}J8bC^0 z4=346lyuCMml5PAO*qCe^0~*{^+`|c)7m@sj9haKCjWCnVq(vopfq-+uP^nS#X$G& zICOy@$eu*eUUd{<3|*K%peye^YTS*43k92MkOitl_W?qktc82d9*ifJx665nOjjV? zaE%XC27%xF)a#ps|B04`6Gdv@g;2Z3z;p@=(ef^D7?5&5o z5f^^d_$C-GPZP`RBldv#4#uQ(^A?!H>sBZLSg3z`W_&<{EiU(O`ksW8M3((z0+aPl z=K|C6vHl_jc-t-E)dFSEna`PQO(~Izd=eqOH?ehO#MaTAg6!b}z#(TJEfn8^=N9~w zDsMHPPD?UsDr5H~CNvxyWUe-?@q;Uin5P389q-#9%D||T1mpDe*PLdE1?h9zId;%z z5fu#8@uIRBi+d>^Tc57pL#TM(*nSLbIetWQX@hw44+jzz2v2c~g)Q}wHT}%aiQUqY zCsdFCr$EE(pC=I^b(#ONu zA+5U2-zZOSI*E9N?329F;LZf$>``cQ?z4Y-@#7q8>>e1GU=}PGnyy{ZTZLh0`WQHb z;J?=1IY(C*G0Q{8M6uiLw=T`_rEIUPU0^OS!MvcD*JwFP)unKf)rB|{&{$XLdgs7W zH`ywbr7p>^YU4V|I4k`p{Ij{Z{xfpMA~yml$<@`BHs*LhflafgoODFxA3P9>CiddQ z8WH^OSlwEo6i$w!XS;yl!hqnW%t}zu9_IX@dma)1?+$B4iY8mEwjYg@koy5T3=c$;3dJiNY;ZcOm96su~ zQWh%PVvGin##U(FfWESbJdsIn+H&mbq&N>IAPPtV=)9yMrD5N^`ae%N34`}9+2)f< zLfvR$oln4(F5d;19&T%|OJDc6hhJi;yk30bb;FJu*6yS?ee@1EZ*N2G2!8csA;_+1 zWGS(R&F4!pwg9&R`x?+xUZcy18i-yM5H!}wI9xQ8QEEuKI~(3RdyG(#MVz1C*3i*D zk)NWwa=C0)xoIr5`_u6RA@B-U*6Zl&TUS@WzNJYxtvZh8t*fQcohzJe`s9n=(9RvZ zTr8l~c3&Qrl6hlY2*AX#cFVcWo^{NEKVXy*NU8rXyONRd|HZBpaB()bb0n8Ba3_#) zadI|sv@x)?rJz%`aJDufr=U}EF|;%>a{g~5^q&jo|13pp42+xz7+Gl<2(-!N9L)%9 z?2JtaIA|HQDJbaV9qo)=j7%KK?Tt;9P1I=^XqoA088|5Dq)a^A>>Q1q$SI)yrzZ;>SAqe_}?i^90|yk%`Kb={;U6UKhy-OCXP=3Elt2g%fQUQ!okSOLc>VQ z`oH%1pS_7XfwYCI3Bi9WveU_0xXUTg+Buq05Lnq8IGa0}7!bJr-yT?LnP}-L{s*D- ze}(mRomWs3+7^aG6$BzxLcao`9TGu$GayAE2=Peh2Q&mxx=QE-B1j-q0Tn`#jvPV> zn9xO(t{^BSlqdosU7GZAalA8k?wPrJKde~~`(H2L|FmZQeIkH#I~U5wsI6rP2fK^N zY1=chQcn!7V*KyR+`M`7B%=H8_na2YCwTNQF9k9I31EJ%M!8#P%HtzRFy#$SU*~n) zszBkqMrD>=apx@{7>G$KFtbV!%KMQ-BTeuIkwQ(f$GnoJ5g=n`#)O&U042mi>LEZz z)8c?rzf?vEqhxmG=%jpbKiOTIVFuuHb+9)%-Hf`e=l%)RV=+O}B2{>U5{Y-rHTLtn zyOXcQnj0q^Y8Df($Sx%1U+DkgBxio7U9x8g%Wq#g*$_%!I&KJUSv?NR9o;~|dt$8u?h&g?4zZ`R$Pp(#fN_8~&TPDPeDJ}=;63?*un565Nf<$E;_J#;p z=ktkFCVz3d7@OIIsF^!1Z6q269I#!wt#RNHELJpMmSygsVw-xgFEMLaJ#n0?L2akc zRQY^88`lRjtg2B{jo&_y>LDFa<6!{v2=c$~O&MIh06(Y+k^R{Ay?6G&xHfk+<=TpM z%wpZ8D{<9j!5pb`48F_JVEWIs_~4O~twSCAeXOoisQ)~!?dXQloJUvUw*8}dHxB>W zI_+Yz{P}3lES83c?OAx! z*H;SkS;ZK*_7r;KdqL)cFVsm(bXJO!Z>Mc;06TW$jRtMcl5g>76@^MAvR2p{%f>O+ ze7^KpUYAAnYdKqP-Q7)k+xSal(eAg!PtWucJ?A(M1l`(~YX^`A#Lkxf5{kR5(rMti zz++N(L#PkzG|HkZRyDE?gt^PvFW>5W=Sa!GLyIb$YoZSK4UyJcaFyO$!K{!S#VKdC zHa3A1L!zE(NoCDZQLv^!PGSlDN^sr{_NFbTlKrv9bCdKnaHsq1?app?2|I%)8Y0`~ zi^%59{*3g|-IB{gX9ejki+iOWOOrig1@EYLz6$OesE0YUA%>TmUS4so-nxJ->p?68 z{cWpfqf=D=Q)Hscx*8xPIaO?#VXpIPrF=*a--mxN$ZZ$KLD1rB@0oN2Lgk7Sr?j}2 z>PymhrnmW{(?*}DwSkT(0+QH0o3j-N7GGt}qF z-r!l)<^r{sA2fa%e-6o>-7AR(I`%b$du#@gy97A^twI=nty_w%{nH)1B1^)ATKePS z^(-Ei{TCLUWv`inJ6DK+PUo2pZj)bIg$G!9Q3`F&3o8m4U{`)GD}j@8isDMNoX7Nw z-clt?1*zRrj%-ppP%lr`4o|zdZZr(!J^+q8lz3UMW=m9Ft$QU7R;b`!An@MGlSO}G zzq3qvzEzta(wMX|(sIVA8YA;MYh=kYRS5Iq*hv=ORE$l=BEQB~I@~xs6S_NI6}zBg zMKN~#T>sQ$M%JIG%<2+qHH<&Yql`r|m58}U`{3N7Ki+vRQ&>JrcRp5a(zl72;)1x8 zWH-;a6laKjd2+w>nI;AB*ZV{D4n3@{lyk1~ItqZj;HO~d+9(&dojo>%zARyGOP}Ei z+DZTK$xPg-J)9f=_340yS@xcc0k(5fl>rJzas4^*X{%Z=(SH-*IABu zW~Bi50SVR_^)?b_e_Tid?)p9er>ygzP15gZK(RZrS86I|TI73uN$TQ#5umBi!M^*0 zr~k{q)G=!AMBjBI1z_&4F1d3LoE)Enfr7v|?3y;y#$9odt9gngwz+YhQitS^F1ll7UIDA40sdIbj_iJgS;omycj?39+2B#+_?Vr^#I?c90UI!Ftid zh%*Q{6Ei;m0AK@XP#qDRnH@Dzi~xWqGXTH^5CwR-`TL)Kso>@V^$+q#TC)R~g0<|N zIUMggV}dvU49xRP001-k^tpF>pQVU0ch?UrAvh8&nXj)Uc?XF!(=XEYcv28+xsc-ZKy&(5v{*e*+{99R zb&OILH%n?%f#@1}P^Bh4C*~n<5=EM+h5LS3hCf!g-u&rky;$qYNnUOVJ7lx9Dm4#p zYr9%kC!m{`%=gCLpq^KVc)=ANV_pU}0dadwraTIXrg=7jn{b2RdC~7ytW}25onzhB zr3s*aX?M3O%6D=`g`}YqfOej))<$~or{tfrIVb4=^@0b6P#iV#+iaKbeR}uE%fN`^ zuS=|{;4y9H0qUbj9klmrK6TJ+wl)+HW;uK2C0U>NUW4%R3oL@Ap(Lu7cU9QMDCg!5 z66Y<$hhq#XtmxCGu#n%?^YFafN#I%_AylpUM-xIOzOl5_%J$xS)d7k#?p^ng0~HW} zx7{)<6qT9_S>wes#V>8>)TM{zUlV<&r5s0$yV86%cN(3VF(N&uTTonCC?g~!(@>8i zW?R9*qK}Vbs{_qK(59-Uc9Qjo$Qhgc>Qb83nE16Gx%dZb&@pO?1Q%!?e4{KWfK5ah z)^)MmEOMS5FGn&n{Fpx@=r-H!mIp}KV8NAR7u z?54zmpgspu>ywX9>Qr1Te%J=Ry0u1YcN!+s1pjV579Ns7z!@0Dnf}Xib@6!ML<|6M zg8#)b=oHf5rwI3}{)cR|8`KYd8)?n*o9us3{ZGmNwa0HshEuU3oaE_{f0*U}NwTp1 Pn$LK8OHS)@+^_E6D-qe2 literal 0 HcmV?d00001 diff --git a/modules/files/src/test/scala/docspell/files/ZipTest.scala b/modules/files/src/test/scala/docspell/files/ZipTest.scala new file mode 100644 index 00000000..a0e0505c --- /dev/null +++ b/modules/files/src/test/scala/docspell/files/ZipTest.scala @@ -0,0 +1,30 @@ +package docspell.files + +import minitest._ +import cats.effect._ +import cats.implicits._ +import scala.concurrent.ExecutionContext + +object ZipTest extends SimpleTestSuite { + + val blocker = Blocker.liftExecutionContext(ExecutionContext.global) + implicit val CS = IO.contextShift(ExecutionContext.global) + + test("unzip") { + val zipFile = ExampleFiles.letters_zip.readURL[IO](8192, blocker) + val uncomp = zipFile.through(Zip.unzip(8192, blocker)) + + uncomp.evalMap(entry => { + val x = entry.data.map(_ => 1).foldMonoid.compile.lastOrError + x.map(size => { + if (entry.name.endsWith(".pdf")) { + assertEquals(entry.name, "letter-de.pdf") + assertEquals(size, 34815) + } else { + assertEquals(entry.name, "letter-en.txt") + assertEquals(size, 1131) + } + }) + }).compile.drain.unsafeRunSync + } +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala index 56927b82..2dfe799a 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/CreateItem.scala @@ -64,7 +64,7 @@ object CreateItem { } yield ItemData(it, fm, Vector.empty, Vector.empty, fm.map(a => a.id -> a.fileId).toMap) } - def insertAttachment[F[_]: Sync](ctx: Context[F, ProcessItemArgs])(ra: RAttachment): F[Int] = { + def insertAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { val rs = RAttachmentSource.of(ra) ctx.store.transact(for { n <- RAttachment.insert(ra) diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala new file mode 100644 index 00000000..22b39f34 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -0,0 +1,169 @@ +package docspell.joex.process + +import bitpeace.{Mimetype, MimetypeHint, RangeDef} +import cats.Functor +import cats.data.OptionT +import cats.effect._ +import cats.implicits._ +import fs2.Stream +import docspell.common._ +import docspell.joex.scheduler._ +import docspell.store.records._ +import docspell.files.Zip +import cats.kernel.Monoid + +/** Goes through all attachments and extracts archive files, like zip + * files. The process is recursive, until all archives have been + * extracted. + * + * The archive file is stored as a `attachment_archive` record that + * references all its elements. If there are inner archive, only the + * outer archive file is preserved. + * + * This step assumes an existing premature item, it traverses its + * attachments. + */ +object ExtractArchive { + + def apply[F[_]: ConcurrentEffect: ContextShift]( + item: ItemData + ): Task[F, ProcessItemArgs, ItemData] = + multiPass(item, None).map(_._2) + + def multiPass[F[_]: ConcurrentEffect: ContextShift]( + item: ItemData, + archive: Option[RAttachmentArchive] + ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = + singlePass(item, archive).flatMap { t => + if (t._1 == None) Task.pure(t) + else multiPass(t._2, t._1) + } + + def singlePass[F[_]: ConcurrentEffect: ContextShift]( + item: ItemData, + archive: Option[RAttachmentArchive] + ): Task[F, ProcessItemArgs, (Option[RAttachmentArchive], ItemData)] = + Task { ctx => + def extract(ra: RAttachment) = + findMime(ctx)(ra).flatMap(m => extractSafe(ctx, archive)(ra, m)) + + for { + ras <- item.attachments.traverse(extract) + nra = ras.flatMap(_.files).zipWithIndex.map(t => t._1.copy(position = t._2)) + _ <- nra.traverse(storeAttachment(ctx)) + naa = ras.flatMap(_.archives) + _ <- naa.traverse(storeArchive(ctx)) + } yield naa.headOption -> item.copy( + attachments = nra, + originFile = item.originFile ++ nra.map(a => a.id -> a.fileId).toMap + ) + } + + def findMime[F[_]: Functor](ctx: Context[F, _])(ra: RAttachment): F[Mimetype] = + OptionT(ctx.store.transact(RFileMeta.findById(ra.fileId))) + .map(_.mimetype) + .getOrElse(Mimetype.`application/octet-stream`) + + def extractSafe[F[_]: ConcurrentEffect: ContextShift]( + ctx: Context[F, ProcessItemArgs], + archive: Option[RAttachmentArchive] + )(ra: RAttachment, mime: Mimetype): F[Extracted] = + mime match { + case Mimetype.`application/zip` if ra.name.exists(_.endsWith(".zip")) => + ctx.logger.info(s"Extracting zip archive ${ra.name.getOrElse("")}.") *> + extractZip(ctx, archive)(ra) + .flatTap(_ => cleanupParents(ctx, ra, archive)) + + case _ => + ctx.logger.debug(s"Not an archive: ${mime.asString}") *> + Extracted.noArchive(ra).pure[F] + } + + def cleanupParents[F[_]: Sync]( + ctx: Context[F, _], + ra: RAttachment, + archive: Option[RAttachmentArchive] + ): F[Unit] = + archive match { + case Some(_) => + for { + _ <- ctx.logger.debug( + s"Extracted inner attachment ${ra.name}. Remove it completely." + ) + _ <- ctx.store.transact(RAttachmentArchive.delete(ra.id)) + _ <- ctx.store.transact(RAttachment.delete(ra.id)) + _ <- ctx.store.bitpeace.delete(ra.fileId.id).compile.drain + } yield () + case None => + for { + _ <- ctx.logger.debug( + s"Extracted attachment ${ra.name}. Remove it from the item." + ) + _ <- ctx.store.transact(RAttachment.delete(ra.id)) + } yield () + } + + def extractZip[F[_]: ConcurrentEffect: ContextShift]( + ctx: Context[F, _], + archive: Option[RAttachmentArchive] + )(ra: RAttachment): F[Extracted] = { + val zipData = ctx.store.bitpeace + .get(ra.fileId.id) + .unNoneTerminate + .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + + zipData + .through(Zip.unzipP[F](8192, ctx.blocker)) + .flatMap { entry => + val mimeHint = MimetypeHint.filename(entry.name) + val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint) + Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >> + fileMeta.evalMap { fm => + Ident.randomId.map { id => + val nra = RAttachment( + id, + ra.itemId, + Ident.unsafe(fm.id), + 0, //position is updated afterwards + ra.created, + Option(entry.name).map(_.trim).filter(_.nonEmpty) + ) + val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id) + Extracted.of(nra, aa) + } + } + } + .foldMonoid + .compile + .lastOrError + } + + def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { + val insert = CreateItem.insertAttachment(ctx)(ra) + for { + n1 <- ctx.store.transact(RAttachment.updatePosition(ra.id, ra.position)) + n2 <- if (n1 > 0) 0.pure[F] else insert + } yield n1 + n2 + } + + def storeArchive[F[_]: Sync](ctx: Context[F, _])(aa: RAttachmentArchive): F[Int] = + ctx.store.transact(RAttachmentArchive.insert(aa)) + + case class Extracted(files: Vector[RAttachment], archives: Vector[RAttachmentArchive]) { + def ++(e: Extracted) = + Extracted(files ++ e.files, archives ++ e.archives) + } + object Extracted { + val empty = Extracted(Vector.empty, Vector.empty) + + def noArchive(ra: RAttachment): Extracted = + Extracted(Vector(ra), Vector.empty) + + def of(ra: RAttachment, aa: RAttachmentArchive): Extracted = + Extracted(Vector(ra), Vector(aa)) + + implicit val extractedMonoid: Monoid[Extracted] = + Monoid.instance(empty, _ ++ _) + } + +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala index 40a91fa0..7eb789e8 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ItemHandler.scala @@ -1,7 +1,7 @@ package docspell.joex.process import cats.implicits._ -import cats.effect.{ContextShift, Sync} +import cats.effect._ import docspell.common.{ItemState, ProcessItemArgs} import docspell.joex.Config import docspell.joex.scheduler.{Context, Task} @@ -12,7 +12,7 @@ object ItemHandler { def onCancel[F[_]: Sync: ContextShift]: Task[F, ProcessItemArgs, Unit] = logWarn("Now cancelling. Deleting potentially created data.").flatMap(_ => deleteByFileIds) - def apply[F[_]: Sync: ContextShift](cfg: Config): Task[F, ProcessItemArgs, Unit] = + def apply[F[_]: ConcurrentEffect: ContextShift](cfg: Config): Task[F, ProcessItemArgs, Unit] = CreateItem[F] .flatMap(itemStateTask(ItemState.Processing)) .flatMap(safeProcess[F](cfg)) @@ -27,7 +27,7 @@ object ItemHandler { last = ctx.config.retries == current.getOrElse(0) } yield last - def safeProcess[F[_]: Sync: ContextShift]( + def safeProcess[F[_]: ConcurrentEffect: ContextShift]( cfg: Config )(data: ItemData): Task[F, ProcessItemArgs, ItemData] = Task(isLastRetry[F, ProcessItemArgs] _).flatMap { diff --git a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala index b79aa40a..048d4ac2 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ProcessItem.scala @@ -1,23 +1,20 @@ package docspell.joex.process -import cats.effect.{ContextShift, Sync} +import cats.effect._ import docspell.common.ProcessItemArgs import docspell.joex.scheduler.Task import docspell.joex.Config object ProcessItem { - def apply[F[_]: Sync: ContextShift]( + def apply[F[_]: ConcurrentEffect: ContextShift]( cfg: Config )(item: ItemData): Task[F, ProcessItemArgs, ItemData] = - ConvertPdf(cfg.convert, item) + ExtractArchive(item) + .flatMap(ConvertPdf(cfg.convert, _)) .flatMap(TextExtraction(cfg.extraction, _)) - .flatMap(Task.setProgress(25)) - .flatMap(TextAnalysis[F]) .flatMap(Task.setProgress(50)) - .flatMap(FindProposal[F]) - .flatMap(EvalProposals[F]) - .flatMap(SaveProposals[F]) + .flatMap(analysisOnly[F]) .flatMap(Task.setProgress(75)) .flatMap(LinkProposal[F]) .flatMap(Task.setProgress(99)) diff --git a/modules/store/src/main/resources/db/migration/mariadb/V1.4.0__attachment_archive.sql b/modules/store/src/main/resources/db/migration/mariadb/V1.4.0__attachment_archive.sql new file mode 100644 index 00000000..4218c815 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/mariadb/V1.4.0__attachment_archive.sql @@ -0,0 +1,8 @@ +CREATE TABLE `attachment_archive` ( + `id` varchar(254) not null primary key, + `file_id` varchar(254) not null, + `filename` varchar(254), + `created` timestamp not null, + foreign key (`file_id`) references `filemeta`(`id`), + foreign key (`id`) references `attachment`(`attachid`) +); diff --git a/modules/store/src/main/resources/db/migration/postgresql/V1.4.0__attachment_archive.sql b/modules/store/src/main/resources/db/migration/postgresql/V1.4.0__attachment_archive.sql new file mode 100644 index 00000000..26dc7a56 --- /dev/null +++ b/modules/store/src/main/resources/db/migration/postgresql/V1.4.0__attachment_archive.sql @@ -0,0 +1,8 @@ +CREATE TABLE "attachment_archive" ( + "id" varchar(254) not null primary key, + "file_id" varchar(254) not null, + "filename" varchar(254), + "created" timestamp not null, + foreign key ("file_id") references "filemeta"("id"), + foreign key ("id") references "attachment"("attachid") +); diff --git a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala index c8268dfe..f384b04a 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QAttachment.scala @@ -3,14 +3,17 @@ package docspell.store.queries import fs2.Stream import cats.implicits._ import cats.effect.Sync +import cats.data.OptionT import doobie._ import doobie.implicits._ import docspell.common.{Ident, MetaProposalList} import docspell.store.Store import docspell.store.impl.Implicits._ -import docspell.store.records.{RAttachment, RAttachmentMeta, RAttachmentSource, RItem} +import docspell.store.records._ +import docspell.common.syntax.all._ object QAttachment { + private[this] val logger = org.log4s.getLogger def deleteById[F[_]: Sync](store: Store[F])(attachId: Ident, coll: Ident): F[Int] = for { @@ -20,9 +23,12 @@ object QAttachment { rsFile <- store .transact(RAttachmentSource.findByIdAndCollective(attachId, coll)) .map(_.map(_.fileId)) + aaFile <- store + .transact(RAttachmentArchive.findByIdAndCollective(attachId, coll)) + .map(_.map(_.fileId)) n <- store.transact(RAttachment.delete(attachId)) f <- Stream - .emits(raFile.toSeq ++ rsFile.toSeq) + .emits(raFile.toSeq ++ rsFile.toSeq ++ aaFile.toSeq) .map(_.id) .flatMap(store.bitpeace.delete) .map(flag => if (flag) 1 else 0) @@ -32,20 +38,45 @@ object QAttachment { def deleteAttachment[F[_]: Sync](store: Store[F])(ra: RAttachment): F[Int] = for { + _ <- logger.fdebug[F](s"Deleting attachment: ${ra.id.id}") s <- store.transact(RAttachmentSource.findById(ra.id)) n <- store.transact(RAttachment.delete(ra.id)) + _ <- logger.fdebug[F]( + s"Deleted $n meta records (source, meta, archive). Deleting binaries now." + ) f <- Stream - .emits(ra.fileId.id +: s.map(_.fileId.id).toSeq) + .emits(ra.fileId.id +: (s.map(_.fileId.id).toSeq)) .flatMap(store.bitpeace.delete) .map(flag => if (flag) 1 else 0) .compile .foldMonoid } yield n + f - def deleteItemAttachments[F[_]: Sync](store: Store[F])(itemId: Ident, coll: Ident): F[Int] = + def deleteArchive[F[_]: Sync](store: Store[F])(attachId: Ident): F[Int] = { + (for { + aa <- OptionT(store.transact(RAttachmentArchive.findById(attachId))) + n <- OptionT.liftF(store.transact(RAttachmentArchive.deleteAll(aa.fileId))) + _ <- OptionT.liftF( + Stream + .emit(aa.fileId.id) + .flatMap(store.bitpeace.delete) + .compile + .drain + ) + } yield n).getOrElse(0) + } + + def deleteItemAttachments[F[_]: Sync]( + store: Store[F] + )(itemId: Ident, coll: Ident): F[Int] = for { ras <- store.transact(RAttachment.findByItemAndCollective(itemId, coll)) - ns <- ras.traverse(deleteAttachment[F](store)) + _ <- logger.finfo[F]( + s"Have ${ras.size} attachments to delete. Must first delete archive entries" + ) + a <- ras.traverse(a => deleteArchive(store)(a.id)) + _ <- logger.fdebug[F](s"Deleted ${a.sum} archive entries") + ns <- ras.traverse(deleteAttachment[F](store)) } yield ns.sum def getMetaProposals(itemId: Ident, coll: Ident): ConnectionIO[MetaProposalList] = { @@ -56,8 +87,12 @@ object QAttachment { val q = fr"SELECT" ++ MC.proposals .prefix("m") .f ++ fr"FROM" ++ RAttachmentMeta.table ++ fr"m" ++ - fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id.prefix("a").is(MC.id.prefix("m")) ++ - fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId.prefix("a").is(IC.id.prefix("i")) ++ + fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ AC.id + .prefix("a") + .is(MC.id.prefix("m")) ++ + fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ AC.itemId + .prefix("a") + .is(IC.id.prefix("i")) ++ fr"WHERE" ++ and(AC.itemId.prefix("a").is(itemId), IC.cid.prefix("i").is(coll)) for { @@ -73,14 +108,18 @@ object QAttachment { val MC = RAttachmentMeta.Columns val IC = RItem.Columns - val q = fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++ - fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id - .prefix("i") - .is(AC.itemId.prefix("a")) ++ - fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id - .prefix("a") - .is(MC.id.prefix("m")) ++ - fr"WHERE" ++ and(AC.id.prefix("a").is(attachId), IC.cid.prefix("i").is(collective)) + val q = + fr"SELECT" ++ commas(MC.all.map(_.prefix("m").f)) ++ fr"FROM" ++ RItem.table ++ fr"i" ++ + fr"INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ IC.id + .prefix("i") + .is(AC.itemId.prefix("a")) ++ + fr"INNER JOIN" ++ RAttachmentMeta.table ++ fr"m ON" ++ AC.id + .prefix("a") + .is(MC.id.prefix("m")) ++ + fr"WHERE" ++ and( + AC.id.prefix("a").is(attachId), + IC.cid.prefix("i").is(collective) + ) q.query[RAttachmentMeta].option } diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala index 085e0799..d6fc65c1 100644 --- a/modules/store/src/main/scala/docspell/store/records/RAttachment.scala +++ b/modules/store/src/main/scala/docspell/store/records/RAttachment.scala @@ -41,6 +41,9 @@ object RAttachment { def updateFileIdAndName(attachId: Ident, fId: Ident, fname: Option[String]): ConnectionIO[Int] = updateRow(table, id.is(attachId), commas(fileId.setTo(fId), name.setTo(fname))).update.run + def updatePosition(attachId: Ident, pos: Int): ConnectionIO[Int] = + updateRow(table, id.is(attachId), position.setTo(pos)).update.run + def findById(attachId: Ident): ConnectionIO[Option[RAttachment]] = selectSimple(all, table, id.is(attachId)).query[RAttachment].option diff --git a/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala new file mode 100644 index 00000000..ee4d891f --- /dev/null +++ b/modules/store/src/main/scala/docspell/store/records/RAttachmentArchive.scala @@ -0,0 +1,90 @@ +package docspell.store.records + +import bitpeace.FileMeta +import doobie._ +import doobie.implicits._ +import docspell.common._ +import docspell.store.impl._ +import docspell.store.impl.Implicits._ + +/** The archive file of some attachment. The `id` is shared with the + * attachment, to create a 0..1-1 relationship. + */ +case class RAttachmentArchive( + id: Ident, //same as RAttachment.id + fileId: Ident, + name: Option[String], + created: Timestamp +) + +object RAttachmentArchive { + + val table = fr"attachment_archive" + + object Columns { + val id = Column("id") + val fileId = Column("file_id") + val name = Column("filename") + val created = Column("created") + + val all = List(id, fileId, name, created) + } + + import Columns._ + + def of(ra: RAttachment): RAttachmentArchive = + RAttachmentArchive(ra.id, ra.fileId, ra.name, ra.created) + + def insert(v: RAttachmentArchive): ConnectionIO[Int] = + insertRow(table, all, fr"${v.id},${v.fileId},${v.name},${v.created}").update.run + + def findById(attachId: Ident): ConnectionIO[Option[RAttachmentArchive]] = + selectSimple(all, table, id.is(attachId)).query[RAttachmentArchive].option + + def delete(attachId: Ident): ConnectionIO[Int] = + deleteFrom(table, id.is(attachId)).update.run + + def deleteAll(fId: Ident): ConnectionIO[Int] = + deleteFrom(table, fileId.is(fId)).update.run + + def findByIdAndCollective( + attachId: Ident, + collective: Ident + ): ConnectionIO[Option[RAttachmentArchive]] = { + val bId = RAttachment.Columns.id.prefix("b") + val aId = Columns.id.prefix("a") + val bItem = RAttachment.Columns.itemId.prefix("b") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + + val from = table ++ fr"a INNER JOIN" ++ + RAttachment.table ++ fr"b ON" ++ aId.is(bId) ++ + fr"INNER JOIN" ++ RItem.table ++ fr"i ON" ++ bItem.is(iId) + + val where = and(aId.is(attachId), bId.is(attachId), iColl.is(collective)) + + selectSimple(all.map(_.prefix("a")), from, where).query[RAttachmentArchive].option + } + + def findByItemWithMeta(id: Ident): ConnectionIO[Vector[(RAttachmentArchive, FileMeta)]] = { + import bitpeace.sql._ + + val aId = Columns.id.prefix("a") + val afileMeta = fileId.prefix("a") + val bPos = RAttachment.Columns.position.prefix("b") + val bId = RAttachment.Columns.id.prefix("b") + val bItem = RAttachment.Columns.itemId.prefix("b") + val mId = RFileMeta.Columns.id.prefix("m") + + val cols = all.map(_.prefix("a")) ++ RFileMeta.Columns.all.map(_.prefix("m")) + val from = table ++ fr"a INNER JOIN" ++ + RFileMeta.table ++ fr"m ON" ++ afileMeta.is(mId) ++ fr"INNER JOIN" ++ + RAttachment.table ++ fr"b ON" ++ aId.is(bId) + val where = bItem.is(id) + + (selectSimple(cols, from, where) ++ orderBy(bPos.asc)) + .query[(RAttachmentArchive, FileMeta)] + .to[Vector] + } + +} From 6b1156182c5f949c5ae06f9a01b312045002b26f Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:42:40 +0100 Subject: [PATCH 2/6] Add support for eml (rfc822 email) files --- .../main/scala/docspell/common/Binary.scala | 24 +++++++ .../src/main/scala/docspell/files/Zip.scala | 11 ++- .../scala/docspell/joex/mail/ReadMail.scala | 62 ++++++++++++++++ .../joex/process/ExtractArchive.scala | 70 ++++++++++++++----- 4 files changed, 142 insertions(+), 25 deletions(-) create mode 100644 modules/common/src/main/scala/docspell/common/Binary.scala create mode 100644 modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala diff --git a/modules/common/src/main/scala/docspell/common/Binary.scala b/modules/common/src/main/scala/docspell/common/Binary.scala new file mode 100644 index 00000000..34f2059c --- /dev/null +++ b/modules/common/src/main/scala/docspell/common/Binary.scala @@ -0,0 +1,24 @@ +package docspell.common + +import fs2.Stream + +final case class Binary[F[_]](name: String, mime: MimeType, data: Stream[F, Byte]) { + + def withMime(mime: MimeType): Binary[F] = + copy(mime = mime) +} + +object Binary { + + def apply[F[_]](name: String, data: Stream[F, Byte]): Binary[F] = + Binary[F](name, MimeType.octetStream, data) + + def utf8[F[_]](name: String, content: String): Binary[F] = + Binary[F](name, MimeType.octetStream, Stream.emit(content).through(fs2.text.utf8Encode)) + + def text[F[_]](name: String, content: String): Binary[F] = + utf8(name, content).withMime(MimeType.plain) + + def html[F[_]](name: String, content: String): Binary[F] = + utf8(name, content).withMime(MimeType.html) +} diff --git a/modules/files/src/main/scala/docspell/files/Zip.scala b/modules/files/src/main/scala/docspell/files/Zip.scala index 55d4cef9..fd786fa9 100644 --- a/modules/files/src/main/scala/docspell/files/Zip.scala +++ b/modules/files/src/main/scala/docspell/files/Zip.scala @@ -6,27 +6,26 @@ import fs2.{Pipe, Stream} import java.io.InputStream import java.util.zip.ZipInputStream import java.nio.file.Paths +import docspell.common.Binary object Zip { - case class Entry[F[_]](name: String, data: Stream[F, Byte]) - def unzipP[F[_]: ConcurrentEffect: ContextShift]( chunkSize: Int, blocker: Blocker - ): Pipe[F, Byte, Entry[F]] = + ): Pipe[F, Byte, Binary[F]] = s => unzip[F](chunkSize, blocker)(s) def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)( data: Stream[F, Byte] - ): Stream[F, Entry[F]] = + ): Stream[F, Binary[F]] = data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker)) def unzipJava[F[_]: Sync: ContextShift]( in: InputStream, chunkSize: Int, blocker: Blocker - ): Stream[F, Entry[F]] = { + ): Stream[F, Binary[F]] = { val zin = new ZipInputStream(in) val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) { @@ -42,7 +41,7 @@ object Zip { val name = Paths.get(ze.getName()).getFileName.toString val data = fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false) - Entry(name, data) + Binary(name, data) } } } diff --git a/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala new file mode 100644 index 00000000..2f4f8b54 --- /dev/null +++ b/modules/joex/src/main/scala/docspell/joex/mail/ReadMail.scala @@ -0,0 +1,62 @@ +package docspell.joex.mail + +import cats.effect._ +import cats.implicits._ +import fs2.{Pipe, Stream} +import emil.{MimeType => _, _} +import emil.javamail.syntax._ +import cats.Applicative + +import docspell.common._ + +object ReadMail { + + def read[F[_]: Sync](str: String): F[Mail[F]] = + Mail.deserialize(str) + + def readBytesP[F[_]: Sync](logger: Logger[F]): Pipe[F, Byte, Binary[F]] = + s => + Stream.eval(logger.debug(s"Converting e-mail into its parts")) >> + bytesToMail(s).flatMap(mailToEntries[F](logger)) + + def bytesToMail[F[_]: Sync](data: Stream[F, Byte]): Stream[F, Mail[F]] = + data.through(fs2.text.utf8Decode).foldMonoid.evalMap(read[F]) + + def mailToEntries[F[_]: Applicative]( + logger: Logger[F] + )(mail: Mail[F]): Stream[F, Binary[F]] = { + val bodyEntry: F[Option[Binary[F]]] = mail.body.fold( + _ => (None: Option[Binary[F]]).pure[F], + txt => txt.text.map(c => Binary.text[F]("mail.txt", c).some), + html => html.html.map(c => Binary.html[F]("mail.html", c).some), + both => both.html.map(c => Binary.html[F]("mail.html", c).some) + ) + + Stream.eval( + logger.debug( + s"E-mail has ${mail.attachments.size} attachments and ${bodyType(mail.body)}" + ) + ) >> + (Stream + .eval(bodyEntry) + .flatMap(e => Stream.emits(e.toSeq)) ++ + Stream + .emits(mail.attachments.all) + .map(a => + Binary(a.filename.getOrElse("noname"), a.mimeType.toDocspell, a.content) + )) + } + + implicit class MimeTypeConv(m: emil.MimeType) { + def toDocspell: MimeType = + MimeType(m.primary, m.sub) + } + + private def bodyType[F[_]](body: MailBody[F]): String = + body.fold( + _ => "empty-body", + _ => "text-body", + _ => "html-body", + _ => "text-and-html-body" + ) +} diff --git a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala index 22b39f34..62ea43cf 100644 --- a/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala +++ b/modules/joex/src/main/scala/docspell/joex/process/ExtractArchive.scala @@ -7,6 +7,7 @@ import cats.effect._ import cats.implicits._ import fs2.Stream import docspell.common._ +import docspell.joex.mail._ import docspell.joex.scheduler._ import docspell.store.records._ import docspell.files.Zip @@ -74,6 +75,11 @@ object ExtractArchive { extractZip(ctx, archive)(ra) .flatTap(_ => cleanupParents(ctx, ra, archive)) + case Mimetype("message", "rfc822", _) => + ctx.logger.info(s"Reading e-mail ${ra.name.getOrElse("")}") *> + extractMail(ctx, archive)(ra) + .flatTap(_ => cleanupParents(ctx, ra, archive)) + case _ => ctx.logger.debug(s"Not an archive: ${mime.asString}") *> Extracted.noArchive(ra).pure[F] @@ -114,30 +120,56 @@ object ExtractArchive { zipData .through(Zip.unzipP[F](8192, ctx.blocker)) - .flatMap { entry => - val mimeHint = MimetypeHint.filename(entry.name) - val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint) - Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >> - fileMeta.evalMap { fm => - Ident.randomId.map { id => - val nra = RAttachment( - id, - ra.itemId, - Ident.unsafe(fm.id), - 0, //position is updated afterwards - ra.created, - Option(entry.name).map(_.trim).filter(_.nonEmpty) - ) - val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id) - Extracted.of(nra, aa) - } - } - } + .flatMap(handleEntry(ctx, ra, archive)) .foldMonoid .compile .lastOrError } + def extractMail[F[_]: Sync]( + ctx: Context[F, _], + archive: Option[RAttachmentArchive] + )(ra: RAttachment): F[Extracted] = { + val email = ctx.store.bitpeace + .get(ra.fileId.id) + .unNoneTerminate + .through(ctx.store.bitpeace.fetchData2(RangeDef.all)) + + email + .through(ReadMail.readBytesP[F](ctx.logger)) + .flatMap(handleEntry(ctx, ra, archive)) + .foldMonoid + .compile + .lastOrError + } + + def handleEntry[F[_]: Sync]( + ctx: Context[F, _], + ra: RAttachment, + archive: Option[RAttachmentArchive] + )( + entry: Binary[F] + ): Stream[F, Extracted] = { + val mimeHint = MimetypeHint.filename(entry.name).withAdvertised(entry.mime.asString) + val fileMeta = ctx.store.bitpeace.saveNew(entry.data, 8192, mimeHint) + Stream.eval(ctx.logger.debug(s"Extracted ${entry.name}. Storing as attachment.")) >> + fileMeta.evalMap { fm => + Ident.randomId.map { id => + val nra = RAttachment( + id, + ra.itemId, + Ident.unsafe(fm.id), + 0, //position is updated afterwards + ra.created, + Option(entry.name).map(_.trim).filter(_.nonEmpty) + ) + val aa = archive.getOrElse(RAttachmentArchive.of(ra)).copy(id = id) + Extracted.of(nra, aa) + } + } + + } + def storeAttachment[F[_]: Sync](ctx: Context[F, _])(ra: RAttachment): F[Int] = { val insert = CreateItem.insertAttachment(ctx)(ra) for { From 439aaee27bf5d05b289217a7004955042ec1f7f6 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:42:48 +0100 Subject: [PATCH 3/6] Search archives when looking for files via checksum --- Changelog.md | 17 +++++ .../scala/docspell/store/queries/QItem.scala | 75 +++++++++++++------ 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/Changelog.md b/Changelog.md index a05a9c67..5ef31ddb 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,22 @@ # Changelog +## v0.4.0 + +*unknown* + +- Support for archive files. Archives are files that contain other + files, like zip files. Docspell now extracts archives and adds the + content to an item. The extraction process is recursive, so there + may be zip files in zip files. File types supported: + - `zip` every file inside is added to one item as attachment + - `eml` (RCF822 E-Mail files) E-mails are considered archives, since + they may contain multiple files (body and attachments). +- Periodic Tasks framework: Docspell can now run tasks periodically + based on a schedule. This is not yet exposed to the user, but there + are some system cleanup jobs to start with. +- Improvement of the text analysis. For my test files there was a + increase in accuracy by about 10%. + ## v0.3.0 *Mar. 1, 2020* diff --git a/modules/store/src/main/scala/docspell/store/queries/QItem.scala b/modules/store/src/main/scala/docspell/store/queries/QItem.scala index a5c055cd..315b7cf7 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QItem.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QItem.scala @@ -40,7 +40,11 @@ object QItem { val ICC = List(RItem.Columns.id, RItem.Columns.name).map(_.prefix("ref")) val cq = - selectSimple(IC ++ OC ++ P0C ++ P1C ++ EC ++ ICC, RItem.table ++ fr"i", Fragment.empty) ++ + selectSimple( + IC ++ OC ++ P0C ++ P1C ++ EC ++ ICC, + RItem.table ++ fr"i", + Fragment.empty + ) ++ fr"LEFT JOIN" ++ ROrganization.table ++ fr"o ON" ++ RItem.Columns.corrOrg .prefix("i") .is(ROrganization.Columns.oid.prefix("o")) ++ @@ -179,7 +183,11 @@ object QItem { // inclusive tags are AND-ed val tagSelectsIncl = q.tagsInclude .map(tid => - selectSimple(List(RTagItem.Columns.itemId), RTagItem.table, RTagItem.Columns.tagId.is(tid)) + selectSimple( + List(RTagItem.Columns.itemId), + RTagItem.table, + RTagItem.Columns.tagId.is(tid) + ) ) .map(f => sql"(" ++ f ++ sql") ") @@ -207,21 +215,28 @@ object QItem { REquipment.Columns.eid.prefix("e1").isOrDiscard(q.concEquip), if (q.tagsInclude.isEmpty) Fragment.empty else - IC.id.prefix("i") ++ sql" IN (" ++ tagSelectsIncl.reduce(_ ++ fr"INTERSECT" ++ _) ++ sql")", + IC.id.prefix("i") ++ sql" IN (" ++ tagSelectsIncl + .reduce(_ ++ fr"INTERSECT" ++ _) ++ sql")", if (q.tagsExclude.isEmpty) Fragment.empty else IC.id.prefix("i").f ++ sql" NOT IN (" ++ tagSelectsExcl ++ sql")", q.dateFrom - .map(d => coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr">= $d") + .map(d => + coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr">= $d" + ) .getOrElse(Fragment.empty), q.dateTo - .map(d => coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr"<= $d") + .map(d => + coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr"<= $d" + ) .getOrElse(Fragment.empty), q.dueDateFrom.map(d => IC.dueDate.prefix("i").isGt(d)).getOrElse(Fragment.empty), q.dueDateTo.map(d => IC.dueDate.prefix("i").isLt(d)).getOrElse(Fragment.empty) ) - val order = orderBy(coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr"DESC") - val frag = query ++ fr"WHERE" ++ cond ++ order + val order = orderBy( + coalesce(IC.itemDate.prefix("i").f, IC.created.prefix("i").f) ++ fr"DESC" + ) + val frag = query ++ fr"WHERE" ++ cond ++ order logger.trace(s"List items: $frag") frag.query[ListItem].stream } @@ -247,25 +262,39 @@ object QItem { } def findByChecksum(checksum: String, collective: Ident): ConnectionIO[Vector[RItem]] = { - val IC = RItem.Columns.all.map(_.prefix("i")) - val aItem = RAttachment.Columns.itemId.prefix("a") - val aId = RAttachment.Columns.id.prefix("a") - val aFileId = RAttachment.Columns.fileId.prefix("a") - val iId = RItem.Columns.id.prefix("i") - val iColl = RItem.Columns.cid.prefix("i") - val sId = RAttachmentSource.Columns.id.prefix("s") - val sFileId = RAttachmentSource.Columns.fileId.prefix("s") - val m1Id = RFileMeta.Columns.id.prefix("m1") - val m2Id = RFileMeta.Columns.id.prefix("m2") + val IC = RItem.Columns.all.map(_.prefix("i")) + val aItem = RAttachment.Columns.itemId.prefix("a") + val aId = RAttachment.Columns.id.prefix("a") + val aFileId = RAttachment.Columns.fileId.prefix("a") + val iId = RItem.Columns.id.prefix("i") + val iColl = RItem.Columns.cid.prefix("i") + val sId = RAttachmentSource.Columns.id.prefix("s") + val sFileId = RAttachmentSource.Columns.fileId.prefix("s") + val rId = RAttachmentArchive.Columns.id.prefix("r") + val rFileId = RAttachmentArchive.Columns.fileId.prefix("r") + val m1Id = RFileMeta.Columns.id.prefix("m1") + val m2Id = RFileMeta.Columns.id.prefix("m2") + val m3Id = RFileMeta.Columns.id.prefix("m3") val m1Checksum = RFileMeta.Columns.checksum.prefix("m1") val m2Checksum = RFileMeta.Columns.checksum.prefix("m2") + val m3Checksum = RFileMeta.Columns.checksum.prefix("m3") - val from = RItem.table ++ fr"i INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ aItem.is(iId) ++ - fr"INNER JOIN" ++ RAttachmentSource.table ++ fr"s ON" ++ aId.is(sId) ++ - fr"INNER JOIN" ++ RFileMeta.table ++ fr"m1 ON" ++ m1Id.is(aFileId) ++ - fr"INNER JOIN" ++ RFileMeta.table ++ fr"m2 ON" ++ m2Id.is(sFileId) - selectSimple(IC, from, and(or(m1Checksum.is(checksum), m2Checksum.is(checksum)), iColl.is(collective))) - .query[RItem] + val from = + RItem.table ++ fr"i INNER JOIN" ++ RAttachment.table ++ fr"a ON" ++ aItem.is(iId) ++ + fr"INNER JOIN" ++ RAttachmentSource.table ++ fr"s ON" ++ aId.is(sId) ++ + fr"INNER JOIN" ++ RFileMeta.table ++ fr"m1 ON" ++ m1Id.is(aFileId) ++ + fr"INNER JOIN" ++ RFileMeta.table ++ fr"m2 ON" ++ m2Id.is(sFileId) ++ + fr"LEFT OUTER JOIN" ++ RAttachmentArchive.table ++ fr"r ON" ++ aId.is(rId) ++ + fr"INNER JOIN" ++ RFileMeta.table ++ fr"m3 ON" ++ m3Id.is(rFileId) + + selectSimple( + IC, + from, + and( + or(m1Checksum.is(checksum), m2Checksum.is(checksum), m3Checksum.is(checksum)), + iColl.is(collective) + ) + ).query[RItem] .to[Vector] } From d78bd4142c8b84a05d22e065cd3d41feef3bf983 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:42:58 +0100 Subject: [PATCH 4/6] Update documentation --- .../docs/dev/adr/0012_periodic_tasks.md | 25 ++++++----- .../docs/dev/adr/0013_archive_files.md | 44 +++++++++++++++++++ modules/microsite/docs/doc/joex.md | 19 ++++++++ modules/microsite/docs/features.md | 3 ++ 4 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 modules/microsite/docs/dev/adr/0013_archive_files.md diff --git a/modules/microsite/docs/dev/adr/0012_periodic_tasks.md b/modules/microsite/docs/dev/adr/0012_periodic_tasks.md index 5edd559c..ccb7ec32 100644 --- a/modules/microsite/docs/dev/adr/0012_periodic_tasks.md +++ b/modules/microsite/docs/dev/adr/0012_periodic_tasks.md @@ -15,7 +15,8 @@ immediately – as long as there are enough resource. What is missing, is a component that maintains periodic tasks. The reason for this is to have house keeping tasks that run regularily and clean up stale or unused data. Later, users should be able to create -periodic tasks, for example to read e-mails from an inbox. +periodic tasks, for example to read e-mails from an inbox or to be +notified of due items. The problem is again, that it must work with multiple job executor instances running at the same time. This is the same pattern as with @@ -38,14 +39,16 @@ For internal housekeeping tasks, it may suffice to reuse the existing `job` queue by adding more fields such that a job may be considered periodic. But this conflates with what the `Scheduler` is doing now (executing tasks as soon as possible while being bound to some -resources) with a completely different subject. +resource limits) with a completely different subject. There will be a new `PeriodicScheduler` that works on a new table in the database that is representing periodic tasks. This table will -share fields with the `job` table to be able to create `RJob` -instances. This new component is only taking care of periodically -submitting jobs to the job queue such that the `Scheduler` will -eventually pick it up and run it. +share fields with the `job` table to be able to create `RJob` records. +This new component is only taking care of periodically submitting jobs +to the job queue such that the `Scheduler` will eventually pick it up +and run it. If the tasks cannot run (for example due to resource +limitation), the periodic scheduler can't do nothing but wait and try +next time. ```sql CREATE TABLE "periodic_task" ( @@ -65,11 +68,11 @@ CREATE TABLE "periodic_task" ( ); ``` -Preparing for other features, periodic tasks will be created by users. -It should be possible to disable/enable them. The next 6 properties -are needed to insert jobs into the `job` table. The `worker` field -(and `marked`) are used to mark a periodic job as "being worked on by -a job executor". +Preparing for other features, at some point periodic tasks will be +created by users. It should be possible to disable/enable them. The +next 6 properties are needed to insert jobs into the `job` table. The +`worker` field (and `marked`) are used to mark a periodic job as +"being worked on by a job executor". The `timer` is the schedule, which is a [systemd-like](https://man.cx/systemd.time#heading7) calendar event diff --git a/modules/microsite/docs/dev/adr/0013_archive_files.md b/modules/microsite/docs/dev/adr/0013_archive_files.md new file mode 100644 index 00000000..3a959c16 --- /dev/null +++ b/modules/microsite/docs/dev/adr/0013_archive_files.md @@ -0,0 +1,44 @@ +--- +layout: docs +title: Archive Files +--- + +# {{ page.title }} + + +## Context and Problem Statement + +Docspell should have support for files that contain the actual files +that matter, like zip files and other such things. It should extract +its contents automatcially. + +Since docspell should never drop or modify user data, the archive file +must be present in the database. And it must be possible to download +the file unmodified. + +On the other hand, files in there need to be text analysed and +converted to pdf files. + +## Decision Outcome + +There is currently a table `attachment_source` which holds references +to "original" files. These are the files as uploaded by the user, +before converted to pdf. Archive files add a subtlety to this: in case +of an archive, an `attachment_source` is the original (non-archive) +file inside an archive. + +The archive file itself will be stored in a separate table `attachment_archive`. + +Example: uploading a `files.zip` ZIP file containing `report.jpg`: + +- `attachment_source`: report.jpg +- `attachment`: report.pdf +- `attachment_archive`: files.zip + +Archive may contain other archives. Then the inner archives will not +be saved. The archive file is extracted recursively, until there is no +known archive file found. + +## Initial Support + +Initial support is implemented for ZIP and EML (e-mail files) files. diff --git a/modules/microsite/docs/doc/joex.md b/modules/microsite/docs/doc/joex.md index 96bca7b0..23309e6e 100644 --- a/modules/microsite/docs/doc/joex.md +++ b/modules/microsite/docs/doc/joex.md @@ -25,6 +25,15 @@ compete on getting the next job from the queue. After a job finishes and no job is waiting in the queue, joex will sleep until notified again. It will also periodically notify itself as a fallback. +## Task vs Job + +Just for the sake of this document, a task denotes the code that has +to be executed or the thing that has to be done. It emerges in a job, +once a task is submitted into the queue from where it will be picked +up and executed eventually. A job maintains a state and other things, +while a task is just code. + + ## Scheduler and Queue The scheduler is the part that runs and monitors the long running @@ -115,6 +124,15 @@ reach a joex component. This periodic wakup is just to ensure that jobs are eventually run. +## Periodic Tasks + +The job executor can execute tasks periodically. These tasks are +stored in the database such that they can be submitted into the job +queue. Multiple job executors can run at once, only one is ever doing +something with a task. So a periodic task is never submitted twice. It +is also not submitted, if a previous task has not finished yet. + + ## Starting on demand The job executor and rest server can be started multiple times. This @@ -129,6 +147,7 @@ all have unique `app-id`s. Once the files have been processced you can stop the additional executors. + ## Shutting down If a job executor is sleeping and not executing any jobs, you can just diff --git a/modules/microsite/docs/features.md b/modules/microsite/docs/features.md index 8390db7f..6571be01 100644 --- a/modules/microsite/docs/features.md +++ b/modules/microsite/docs/features.md @@ -28,6 +28,9 @@ title: Features and Limitations - Images (jpg, png, tiff) - HTML - text/* (treated as Markdown) + - zip + - [eml](https://en.wikipedia.org/wiki/Email#Filename_extensions) + (e-mail files in plain text MIME) - Tools: - Watch a folder: watch folders for changes and send files to docspell - Firefox plugin: right click on a link and send the file to docspell From b1a1a2b83701cc3138bcb8a522dff45ff4740f8f Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:43:18 +0100 Subject: [PATCH 5/6] Add archives to collective insights --- .../src/main/scala/docspell/store/queries/QCollective.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala index 2bbb1b94..1e811660 100644 --- a/modules/store/src/main/scala/docspell/store/queries/QCollective.scala +++ b/modules/store/src/main/scala/docspell/store/queries/QCollective.scala @@ -39,6 +39,9 @@ object QCollective { union distinct select a.file_id,m.length from attachment_source a inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) + union distinct + select a.file_id,m.length from attachment_archive a + inner join filemeta m on m.id = a.file_id where a.id in (select aid from attachs) ) as t""".query[Option[Long]].unique val q3 = fr"SELECT" ++ commas( From 74a6cf1dd17449194e623909e304a45111529628 Mon Sep 17 00:00:00 2001 From: Eike Kettner Date: Thu, 19 Mar 2020 22:43:41 +0100 Subject: [PATCH 6/6] Remove unused migration directory --- .../main/scala/docspell/store/migrate/FlywayMigrate.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/store/src/main/scala/docspell/store/migrate/FlywayMigrate.scala b/modules/store/src/main/scala/docspell/store/migrate/FlywayMigrate.scala index 5fbcac31..0b56ebc2 100644 --- a/modules/store/src/main/scala/docspell/store/migrate/FlywayMigrate.scala +++ b/modules/store/src/main/scala/docspell/store/migrate/FlywayMigrate.scala @@ -13,10 +13,10 @@ object FlywayMigrate { val locations = jdbc.dbmsName match { case Some(dbtype) => val name = if (dbtype == "h2") "postgresql" else dbtype - List("classpath:db/migration/common", s"classpath:db/migration/${name}") + List(s"classpath:db/migration/${name}") case None => - logger.warn(s"Cannot read database name from jdbc url: ${jdbc.url}. Go with H2") - List("classpath:db/migration/common", "classpath:db/h2") + logger.warn(s"Cannot read database name from jdbc url: ${jdbc.url}. Go with PostgreSQL") + List("classpath:db/postgresql") } logger.info(s"Using migration locations: $locations")