From 347c84d33b9f851f3d6efd44a3789547d8b619ac Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 15 Mar 2024 20:59:50 +0100 Subject: [PATCH 01/24] Beginings --- .../__pycache__/suffix_array.cpython-310.pyc | Bin 0 -> 19494 bytes .../__pycache__/suffix_array.cpython-311.pyc | Bin 0 -> 38755 bytes string_indexing/fm_index.py | 134 ++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 string_indexing/__pycache__/suffix_array.cpython-310.pyc create mode 100644 string_indexing/__pycache__/suffix_array.cpython-311.pyc create mode 100644 string_indexing/fm_index.py diff --git a/string_indexing/__pycache__/suffix_array.cpython-310.pyc b/string_indexing/__pycache__/suffix_array.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c75b8d22e89c3dbec0396f1a730504495b483d88 GIT binary patch literal 19494 zcmb7s3y>VgdEPvCXCJpX91n-PBM6>A5aN*pz?TF{gb{)ODX|Eh832|N#NcXivv=6L zz1_Q>IgnWI+LVtnk51^4V>?R9vVADa77^QtEy;P6%Z?-YkyOcLJIce8hvT=Ys#H>i zq+&BNk?;F^W@l&jz)}u1y*)kMJ>CEN(S!EbSkb`W*S7vw{ojAcF#d=)gMT^PJdG=u zGY!Kxe6wj>(!b^oztQ*!P*Dv_R*NjU! zzvP$koA<~33VuC*k3Wvzg1^_Fz;DsN%b&z=$=~Ns;kWGX_owkY_7TIM@n>H%{Mjw* zQU!N+`v-7$0C#)*d;EjwGw$E(&*68k|33c^ekc6<{KNRY%fH`0g5OF1{r&^^-RIBy z58`*qf5?9rzx(~8{xSSc`^Wu9@H^u_>OY3xS-juH{?%jjq`RcBYM!#zv`&Ro=806em^0 z?M*APoAp*ysJAxP>Z%sj=bgyB5ZUd`FftpFdF`g5CeZPnmyWNs!sCm_H-owgj<40i zFl-z@TMt&l_WJP`RePnr8lGz`)?14!weWZlszz)1Xsx;2R*mrLS`fEX8!f;73LYH~ zHkSaRYE7xy^<(STqtbFctg8Ao%wG5K!hl7^95*LS$5fNJ$$)}8ASi5H!NVwg)3;tT zLL)Roi(uftv-o|(c->esL7dKOUe{m?5aljpCgUWW>;kS^vnU#X>E!~TX!fkG)di*R zH-5<4umC+%;3hD+X-C$vN7Oh5sJ$!*C|%rzT2wsgHx|Ri_S*WXJ-EMRp_nx~GgVb< ztsXnsY%kWD!Kq_uugs#@n+WGrx48dhE-KeEw}H|b;|FCq=__wqY8stnA)vGTT5WwD zaLz2J*$DJl|AvvEv|VM zSX1|*vPjS-f5d2?eBZ)NO1>as;A_7zNj#diZP`7$=k#2Vv@Ilk63EE)@?BT=Ty@n$ zU8C#F;3AP*R@d(4P8vODV_(WY|+%vxk#5kiQ}YKjvLn_Lx~wOl8}0s{UJh`MY}n+I`C zTOjU-(6~r+Cx5KxaS3rhhRQa;a>97Spp+$%3rRbud!gyOpwHZXY>vdOi5?`LUwFqn zoFORAM&yvngOs2WDjy~&U|$I8L3GzNo+WQ}FCOqxb0|79ty<$+eSoU_WRzJ1w?-Nu z)dUAn2vrv~Rr|)LE#Ssnvh-qZnhOHVgtiGi(6$rSUkOvf?k_|r`hXT}Z}UYM_dfhY zE*NgPj=M*AXSVd%kZ=#bKCTtF2wSxp&onPcpZD9N+iNUYrBuAo`umCMmgK)J3KPj7uxJgbatLP1B%-W z>%p8BopT~O=Qaay`P>UNwSM$tjb^j1j-GD^_0|=T++1J$DoRKZTnmcyG=dW90znD2 zilAgF5j0(FWj{8_gYky=NeTf) zlxIzW0S`b%mEZMj^=)X%V7hyqD!D&!FsDuO#lrry#?j4 z?f5ycsY4w5c|7;_8-8I65=>gW!Iu4?k|t+vdb6FQJu4rl`XArJGM zV^)5*FsolCo%LK$lnFBmBfBmcznAkX{vHSix0{PKpZ&UxKI3SOXMe+jp;bbk{_3>Q zfR5BVn~;@=mh9z4$lS%2Fc;e0d?H+WUYPIZx*p7m$u}V#=l5-X7|?o8^8Cu@)#~b3 zKHq57TSqU{U<0TirOpgBl8O>t98dre_&bIG9gN1U-T?K95`V|MFB+v~vT(p_4#lUa zG)u6JU>Nuq+r`)|^W%nTY*=Emfk+=&fazr~UR5xAl&>h2G%bn1BU?m;lgss1{grif zij*xn&U*Er(U}ic>o2ENTU~6oV8MiGOMl!vn)4G%t*JN=a7VOhORw$VlsaVhc!LfeLYH*du{as{%OoTzy?1v7ylYL&7% z;I7K%Bj(fQE`X43Zxh$4aFchbkD@I@QW*#h>m2L{#vOS|>l_F&PN304WGm4V{WXfV zd6MmqqUcO#F?6!8^|BTwmyoEvTcm1{v0kF~rh%kb|4wmgc^g*>YO!B@^fBE)N|e`= z-9*_Emo7#8YXZjZq&aI=OvS#7+qh zyVC|xe>W{F+WXs(Rx^-HbZuEb2}^3*oHn494RxOn714FK(2oB0enWf;co5KDXk!cP zE||=hpY0X4VUcwUwBCBfZn2xg__}UEnV*4O#$UdrTaePD^yCP661U;~d70N^X<2;N z#s_3ROLcUAlo%_cykx#E`kye^Hv|h|r+Mtn+Hy_w?dGu`8@v82=8kvG6(zgos#+7U zA@f~Nc2n*ZIN!8E_;7`~=f>waH|8VUWNyM|KDV%#pdk6B3=w9a9Y??r$TUIKQ8c7% z4d>TUxzvW~w%ahG=y{J{1uUJwj_R8q18HFRAZjkIQ^y*IRyMeq6o8mMU9ZSwd8jHB{>x zo3&s7jT z2h0jA%t=#mWJXuhkE9n&AP4V&&bMKNz+!|KAQmD+4WOMZcm$w+(7Erv=J~ld5zg`R z%aBOa){}4qA#5cuF$2W`N`tQ)+O!+BjdpnkPMzD!bshA|%^0h;Iz_*C8t>!19w8Kx zi$g;X`5wmPIS*$6U)(EmcrpiQ7IRz$4brNp`32AsJZN&BpU0d+(;Uqnpym9=$9#)E zP}hWo{0;CfXjZSj#F@U~=X-7!jw_!29_Ip`=D~GeMw{GC8=&KYJpU3}NXLB6I=y#k z0S9v(cs7k4@d^C@T}B0ezjZHYU%v%w#$146uK-IKZKF@NMf$+bFB1B~hlOOd!S_;j zd`SGe-S-xB5q}aq2^5PQ$@f7vK_1NjVjKUn3twTrv@m42Wb_jC85N;`?U3Z7+vxLr~*1%$1 zf?gsSZ)A0SK{1Y}TpgW4AM%&0ylE|@aWb`x9+>b7Cwz)Usw5k@?cx$8ISz>=9=p28 zUgGgd1j>s8gTAoTd~raRk>#T?=q^=ngVSC~R^X6o zXA%8eQ6A2nP27M6rC3r0nz-;j;IySin+C5L4H{6d;|eG? zb4Jm$Zd+#VcIR-sqhnRzyAf8asoqHKZ>=bDLMc!bR4C1$8X_bgq@je!m4;{xtEjHG zgGLAgC;+Lxj5lf`6H>j#Te2eDwD_H`;Z83MI4uIh<=dU{;f0OBobW!wkwhfKgt_Xp z5qeyaI$1~8XAoBj%@tnu5=1W8egy*%3b~G-sPy9bXP@C4SXGQsnJUC0Vh?M25)|8joW|tWkY#91b+a%=w|9)I*m{wmO6%egiG|ju0tde2BW=Q^)%j@OM403xjKfW<&)S1 zG#%&jigHAKHw)S>8hwP9#&?gk?_nVzN#XJe+I|36K%mh=orcXi4KB4_b-vlzmp~{( zLYcTW8HtwwY8w^kS_@k48Ab+IBqV$JNoZ4OY2Gs-Ltsc9_Lu<14(4$W&C!2GjTS#s z51WvzuxF9TS#{LuuCW7P@#zgSI3;(|7d&F@T?FM&kPg6v)BvMi z2>k&*nnw|tp?Z)vSrA2KO&;1(`g`aQu!9FFcL4u#=3!ibmf4w1pfxyPVLng3Y1P*1 z)oN6%R@d78W|Q@DwMsioKT+SsH*;NzC{dqckuD@fScnH(xP<*YEW^urPT5mzU&NL9 zOGKv%&|*H6$5q2Z1uOcn1Ro2~VQ}8i!KC@?agWSep>R+z#$- zq?#?pZ1x*15l4ulLlo`gh!_OFm_3ZQK6_=_0 zHj9gVRA|5(3ES;v5EW~Si<@hkO=LtP_iDZ7_qWbT)Mr?vv?=J?M%&NeinYQVG`n?s z(&|j~w+}pP0vMsa{x>5fwBTb&dEz&02x8NE``<=#zo;D4q41WQ*>#T641= z<0{H&fU6&5r|g=eJxJ+Q5s$6b{A;z=V*MA;M{*$Jkb|A+{>q2Cfuq&;a=I-RRE3&( ziGM{ns2^gHzARbukY8(2d!{}|@q*k%3;T$#x2fdHzAmx=&L%kMV<3{_wu zP!@O#B|pJ7(S1S?qWeTCiteLkoOhAUZ8Cor81;ex?w%~qfhXs%iLA6vHm`9LxxaVFz_9ct7+am;!jUyKdhTbT+2M&vLYY*5(2i5Q}OJ zOHnt}H1Irr7(YmIZ!SeHHBz9ivcW?d=~|<`8N+7?G*dXUAnJ=2WUzTfAAAUY4I{;# zn1c{30!AL-LJg^Y6i+f{7FoX!lL3*9pi6S717?*(sKuk)<#5M-h=YwiJlru4iAQ?T zth?W7FPu<9d{*k~<=zl9F*2@GNu>;5scjNp>sq{<22 zLLug?r97vD4Q+hNPTPBj1$@Lv)O~m9eC!9WH4vu zvh;I^`WfoUFxR!?7I5vB)v}R=!tC#o47dUzbWtyZ+*-9Uu!iKmuotypuYeW3BX9)d&h&~dGIcBNP zqn@z=Xq;1`3p0fvMg|1QJ}`l-En7Pe@WqibCee!`TZK4;z?(64oErv$eOBnM)=0` z_58-+jlFn&7h@2p%Wp7g&D{egvilkAULWOQNb)8FdHS#f?4>f+n(PDn zjc5N%Dr~~ZZW(ixAznn7P+WjEaPo+s(pY10jqteLAbUtlFFF!GIIo#4e)JVjG7- z#Jvcjl3!n{K?yLkr$cO7!^zNU!;dOSoQ;B@{z{a?Au#XYz$V(4 z2$|}Y>!Olqng-EC(jf(oj@sw+-Xn84^1^mi)}+Lc`B^T~fjJ!D5Kw)w8Xpq*DI%Cs zD5_-9@~3DEo6`GF@cyzSd;aAq};4MQeero&srG5cVKlvNfDD#O!hQ4QDX$3oWM} zf$fM$J~KTaZrHrK#-IvnuveE54`6OcY+>CKt!Yo)%Xg*+dWy$1bTX3CmNMyaTyk7X z&QnNGj|cV`(u>bhWJ1$&h64Ohz3tzHG{G=+iW z2KXa6c`;zY+3M#oiPG2&UaqYpz4=RQ`Ijv2XYtQbMDCJmw?ch5lcX*q)W2X&r1h_` zCUh_{x`Z6pIEfxpf^83YrB^i0IPU*ZU%`Wn2rr_+!zCj8zoNoCBXVu9V|ApK86L4{ z5Vz4MbeL_~!dV_fNFl<3`5EBb-EsTbPk1IgG1krbpaqm-!%(47V2Q|_>5qN9mkV<{ z5cUxC`s@x4VBdK(-c|xFk*birfp|yWP-c9wo0n%$1#(-^V92gkqMtN}3%vZB1wN zhSWb{ATr|*xx#UoYe(^ACr7t)2%hOk$!Ehc` ziUjj?*4Nt!x)@@_<>{NyK10OR;~33`kH^=7d{7O&RlOY<6k>46QBZN^uCBhDYLw zGT-thIPh{Qj_I0=8XyWGQGtyaorht3Li{ymIuVbc*9ybwVev`xqp6cV?=T82H0kur z^<7{77M>j@t&~wYXdZwRr=EsS%O@Gtp0SbQ=1|}rBHW}fO zo(|!?%DG-R7fh7#khUC@MISAp;e#?h1&?=|LIn|fq(B^XEOb`f79y4_IYcv&T$@Ht zpVZN{hu;!ZFg}mVWA7Ub-8!=dvgQ}`J;p#5b3PeQxs3D(w=jZP?oA}kVN?XOV_N9R z${_ZTu}HC-QRfM2X;ML46WAI7S^0CEK;~<#R}Ya4W7bC8xEeGE=rBREPSF*#@98k} zDrk>#sKEr#rfffSCFqab#r9?^L{LKr;DcO=2pO^VWI4ahHVO&Yc9|6*iK0E~GAQ5( z@P8Oqnq!TBbIx@v)KFJn$G8kdP*wCPf?_Z}e6N$EbUJMvaX{irv@&pR(fRaU;MSVt z$uY#FB==)6RR;RuIVMA$=>m5=M+8-K)ORHg3Cn=zoaFS7gBYjH7?;NE;Mm~)t}X6L z2Fc`SUV`K~R5bfSNXb_$@}fH-I6mtynEe`t-ZY1G8d>UY>63{8d6a>Y`8V}3W%%;e*YZ@5x?p;QJf%B*?t&FJEq$m zFn7@$|EA;0^F=(${87j~jVri_;#*Ip%lB*f`1>_4{(h|xf4^3Yzh5i)cL6PvBC;~5 z;m*kz1Y+$}kQ4DmTwyWdT{k>^n&)tAxBv~d_yAUY% zKD<5JCC`x-DyT3W4;la^RQ}3wI(KI`oAH-3Xd<)o5;VPpN&{cjA<&F09Mt_iFascf zwNPjJ^(0~vU{~h25n*EXOq=GHc@TO5Vhi`%kz|@vrrTp?~{Zb$zal)12glB z9e!0P>EsNG2YrzYAW0W}K>jn_U?T>>2PAV#5Ww{HL~apSkXhI=MBWNG`2Unk|$n}iinNA%M;MI{7})@VbZa{&`mclffxPe9KiS( zjycK<2-6^CfgUo`5(N{;}F@elR8ud@UH~w0!9K?M1?k_>`r4{L%qod z|B(d|15Ak=CH#5L?PUi1a_y$iUmL$-QbnfP?ORs$^BC12N{Ff)+$zf0NtjVIrH`4z z(q$wpcxTMeeZ;Q+|1kr&Y0L~@MPRuF+zJV&xF8XGyW--ZECy1{?2Zd9vkP|wzxq9F zM}vP?$g7CYB}??5S^F}I$ZYNk^M8zI%mrj%K5oi~YlQjSkgOYt(Pn_2QRindHGxj4 z2ZE)%vL0(|p-e`>)PG?y z#Na$;@IBmF5f!G9d0=w<-C-e_i6k$~mv=pLK+Uhd$b#?J^iC&HNLCSsimw}3f-FXi zqOan?X|7`u6*)=dc%E0lc_JHSsa(>g97TBYM~Olfq#_BfVysScQV^5s^e3E@EimgR z7*t>?blcoP2w?@Lyd0oD+~qks$Pp&IdDxrZv&DV#ai0@-jt}o7B`$UaoJv?rJbnzi zw^#C*|5{k!v0L74**(UqiaRCbg6y!2qY2D<#c5+XNX|pLHvdpU=rW5^c;@$hH9hPDAB5d1gL_IEK# zP9Hl)=ip#wV!)Y+&r?5#1na-Tkj#ro93+OvaoswILfkI-qFM8o5C1;>;1K>ZH?Saw zq9u~3egpSW+GnKY{K1f(QC>$Ph* zPiZ0bc~OW%)8zla5_I%xe0b`gBw#2LcMscEOh=ynVKiOyexRRFkqd;~q|5yue&EQ7%7rPJEQK~vy2!(Cn1kG+lBtZQBIrRktRKZvCIK3lCGdg z^NSOiOZfD$z7!(0+gQFD;+!g?AOgTXx)1}r($L?R)oW_;>RW^%y$YPy$3G951IH@= zUnkd@L#i^YHCjQ6!R*ixV&aZvw+B$+hg@3!G1yCitM67pi4x(^==}q9oyUJ0bK$g{ zsfnDI`NesExr$Qvi=?hcQR&Q%t||GnXJj?x3R%rzRJLKV#jD|GM_5f-M&EgRj@36=&?aN%+d9-ICbD)aPp03U7cU_!3C!LGUG!q8^k;T5p+pOSWm#dc^J6pv#RWlm;VnND^+yYoPCYezWP)sD{gy3b_yoH_C; zz3a*TG5h=O1qxLFsVK?hoGpSM3RSo6`+oO(*I&5Z4g-#V^POJ@|NT~j;cw_i?yzJ} zp6nA0hK~#ZLqHfdyw1Oc*9G?5_`1<(2nwf7xrYMAfGJ?OD}IbCpQu+~7YkCE1D1d_ zV4E|3Y(zTz^W$}Mz<$Z_x+UNMv<94jwtx%J9&iIX0wsXXKq;UrPzLBW8V(u)o=b*6 zc|lGk>`n#lR2JMR4OHRDvOqPUC$J2#Jm3Ya2-E;p25JGT0(F4Zf#rb90xJN$ft7$Y zfmMLDEbn^c?JLN;F3^Dc%L9#oD*{b`D+A4ds{+DNLu<>gsF}SP$175BaOmA*@0|-y zIA82}_2B;cqc8Qm_~IQQV-AJ=QaEGZGd^%87~VG|{R*9L_|<>G(Q3?C54`;9H+o*( zm$B{X+4};W4bbh}Va5sd`IBJ)jF@mz2y5<78a~FaPt+?DNIxZn)lo7S3RCPeyjk`J zMrX=6Wtubwj1$OrQkXR14D)6Oe$99$(XuPn`{Auk;U%1cLA4)*GSmL3C=rUy}Uqtv~l( z#(6Rr?w5k+k$X_8LWwr~hxP$P3~9R~YWi>>=9?az8Ju=S8XuM{iwo1kGsBM!f@v88 z7XlG~)RXp<$NH{TMfOI8XwS6y!jZ_4wA~eHdxCrny56VkS?_-jCzl24|4RmK|7&|X zOk3?JjEPjZhMt_V?KPzV?=*(MEz_b}s3y}Bw=p%wUlQV;NvVF&~M zrjre@FlC%HPGV8h5dVGSj0=M*@ZrShW{m4MO69mC(bFMnB}PxG#P@F;I|4%k;epY! z=XOoh^h^GcGwXK@j}G{UL%Y`JZ4FC~C%C&=kt~bSf@xEwZOo;VdHLI7UY9!ND zo2H9e?Q7`SlT>lXlrW7d*We~Ut{P{YM-5Ufelc*zBrT_NdNgA_>pyo6(B78wi)Pxf#If|LvGTJ8LHe{S2fDLYN~U1cWK` zlsIM5&g)5&J~c4NyKI74rmU0Zr{#Nwtq7@n(l9CJ$8=KUSZBaEX_~YU#+edl+9pL^ zP6c?4=M?f94VcpU+=NMUK5sqlM#;Lnljfq5%`+zp@>{HS?@gL83tgzHT&>2PS&0BmQvWYf;M zfC$88rf2mdT8Iw|XQm#L0$W;oe|k#BKRk<0@0&S&M9ps;RU!pSMPrRLY`<6G^k#MUw zSl!vC@+%ttJ4UI2iZx@?I2pvR&GbtcVL0+FzP)73th=PGxJ4K^bQl1IrHz(oX4yPpE`gVQ0a@;X8vH9NVe=zSS~GziS2uVw#LK3P(3HG>KTDm zKOVwnTK|GyI=Aknq2b}6wC>f>P;lgJAgy|pNR!qhXExxyQYTJzYl{kIyZRtbt{Jnd zYhB^4J}FF_Xl-j7IbG1mY}Y!89^k3-bAALw4V(4r(Hw!1si16Vzik!yYGrd@cr_!O z$r7d!P95;c-*}D@>-~Yz@wbQ3=EYWS3vvrR4?xSn)yqHkI->rVFcSdk?73e9k{4 zg`}Nya~D8}kN|d%=d+YWX+fo>=W(CVPv}E{uTjlMxdrslUJ?zXMtfp=X5O7%HM1)2 zi!?=ik-oIe9__(yvHyxaYES1{#jE;D^DhU!@cn%77lS`{|H6%Hva45d^+xtTbe6@c zrgzNjhZZWyNbyC8QI;5|<5^?Ux;w zotIslpq?^Do!A7xj9jvTDZyrN1}hcaP^Oz8U{my)!BN^&M82tLHxKQ)3z(6gg(h3T zs%K#JYih~oEzkBmm2kzOU3qL7yzrS#a>h9r~_N81B`^ZuntoeV#UMGT)AKZE&5UmpSa?Rx{#Y|u@+r%5;MPe zzw5`WC|7%S>`RcZz3}doHEKthrNFC1cV!Q4FHc(awbuVC%Fh-iQa*c3S|)9irn_bO zc>$E-8Q+E^Peqb{eM>rX=C5xJjRZ&59rc4>l|p$Osl$o1j2TWmfD@YV0b>GZ#Rq60 zY)pk!);EZz87e4(s%gyVaW8FiwA(&82JnJGKw03Z8X*l}iwPHTXs3CgfX2ooLkfL) zCk!76f?=kK$vTV~VPh{u7N&ty5?FsJ04``b9^o7$CMD1n=fUhdP6kJU@1B!(O|*v2 z1mDRc*#3dh5wP#!{1r7RZ7d<$QvM_UfONfE@kx%|sJL-7QE~mXx!01y^*7|2Hl?PG z-_t^lWrd7*dUR-{72g4W*dL;n^D1R*xsoTU7q1af8MqoSo}unBNc*NulZO08Hoe4DAWDP= zBVf|6oM*Yv^ZnQ9J6T`uMu$Mv*(meTbxs_Zms6t zUlH%Xs0=QIqG!-6`kV zd2zmPe(YYyXXEqV_}qPO?61cE+4%jwFFZd#{zcDEKls@PsSPhLIA2bQFSFV8D-m_f zQ(TPTl6rAXhhfxd*M>VPw@}KdE({G50BHvq{6ZO{oG2?UT-k)PMc4$dey!|K85<&5 zLd^g~9+j`>pG04*6Rf8%?!Vp~x6bZVZ?MKJA-8HlCTgYKITnY6k@qYJFM zIi`#jTgLQC$EM$<>f7*8$Ok}Gx2@)%xDc&?*vd0~e5O{t$XB=qeHv6`UXSO0o3t%o zU#X7Dq&Z!mMUfJ5J`;uAtBT!rQ2GV{B0f9#r%<+*z5J9)QU=tuTqrmtfRffp*SJuS zPk>sal^DCqs~KUK{I^t-h}951L|tiV;GeiBGM1I2}bl>qNXZtzdHN>kTpx= zl`+1u@poCWJ=BWalC^fH*eB+X8lxT2vFWZEJHG*lQCpO8hp-r(QM0z zgw&W!mvLcC9@py5vtrVVNF~u2XRJ_BoEr{?GIl9AFfN6LMn^KH7dtj(Ea7o>-T2~0 z=?zMLfG(>!l*xJ3;1)h{m z->bW4cVjWj)(*wm5ot*0raGU^^G1Bj&drK*bEGAmn?RT@H@4p%PoB6rb!$ql>r(2v za7ngyE7tBv!$W&%%oKYyu_a}1m+kF}y**`Y=WBx2LGB=&zyzJcz)aw51(GjC4NxtK zgC?kpY*XfGfhpRsqh+bU%4zVpB>tW@Olzn)&}>Mv0~ykF`kpjS3v?|YX2l`K0LXvv zh~|%R_Y?f{V_HCtjN+TJOQ@rPt>s#kl&=0^>&V0ubQ;x#!1^!?Ktrkk2Gm} z9J$-{bUXUGV#>~HgwdpN*Y$y*BUu?*rj+iV8#+H4 z?jMB6Dm>BB9}Y?*e(Ak_Qjz6ZyMlE4QBa7a?q7rIb}mmjm&g0!<8!ac&KAYlk`i0k z>f(DK35*!Ii%pS)W*N7xj8>;KNGVRzEmLqLkP%Kur|8NMT`{xTWt^;f{lT$u|8T}J z><@+cIcFeDXkv&2AaA`@UylzR97~_3j0o(*f9MO2rGcZ3f@wABk)HxA28%N~5bK#f zxnQnJnXA(E%?T+bu7J$UA2nY(74uvezVLFaFE)PlwY1q5?Yi*ISjWZwNPk*%L=HS^ z+?3cYH+CwGo%6wa$M5(3^iN`^qjiyHCWC{9s3yKwcGfG-`o{*FshWX^KkcYXIqKp) zasS*t+2KeK4B{~w{ytON3sYNxM7e=rp2v-B?V1mrYtqMCtL_bd!nbOUrN`nP5N(^&ez`af3`BVEpjGu21@dn`O5O>a(WJBC47k^ zx7P9J92d@_QDaw+r7dN#o(rRq(_cR{mO|0d6*e;-!yx48=P@*TbdW&q_su;JXavzSr89u?#(xED<{wc6H z@Hga=T4j}XU2E5-PS!y>1s{uk=42$C&X(~t*2Q}KzE55lOJiQevQ3++vIz>6v8vo zFaf4XqP*&J8n7^aJWCrUkuF4CMs4ulH-91g-2O$+&)v$d-bl7F7N+c#-b5nKUXgG; zW6K(71=tqa>e4V)v}b4BV5FqrNSKornUdVk(0ga!9vx+i@4)ozWcZXOB{K`l zSRi6XE}DkAkpEk3BPWp4oRt+0m^6iS{rC@^#0W6{E6+1oTpu30)(kOLM||vh*VPTP z8)SEb;%-Q_?ahBI`((>L#j=kW5gV*5nB?ff*@!RQ(3m)u^d)=lHr;wXwlcB~R%kbD zkRv+hoN`H%Qqn|B-3kUSaBDPcRlE}0>9PfDeac#&ZduJl5y$3zcfS$)R^)8DbUE7^ zeHVfk4n+<G?39nR?3m6m7=8Q*oxRK|oW-l&(^*Tu$Wk6raf zE$NcVxNzl6^i15Ju*?l5eLpxGKbx+pjd#pdLXFyT%P!ZnCcEc*<_AANEU(=eZAa~T zrki4x$0kE*Rq?Z5KeiZPjKZtrySinzE>vsf<^t%NQ*$pQeb--3 z_N6xVYCp2=WySV#%Jy>FQ5Nw(_86Ql-ipWN2D2sVxo{xT^LQEQH`@E>qfFM zd>!_ffFkLHU_mPaS^{REI!ngdfAqy;nD_&PJaUK1h2FvyGPfZLf@U~BrVWW)VYy4} z`(wa77b$>mg|-bms}?s0jN09O+Hjf}4R+|UwVtXeRAZqwDyPM`m!-@Y31O+?;2IJK zCwOTEu+WC>N6VO`ur_PE0~L^EDAYfEHk6YUGr5x(3Jwo0B6(`ccIx_mV5OmOe}A5d z$;-SiqY5FC9|9v{uxp=(fF=(y?(^bbI)CiE*YjD)=OvF#bPKD5pJ-EZHiuLitrmtH z!Q2q5G#u&=!+toFw-U&h*@cW7?dU%@8X5{i02RW<@f|#z-=p5*d<*m*+1gK(6}IOR z)t%5U>YgsEjO@E~I4#;9h}8>XwJdrS(VG&zU~mc=#jE|W!W;M8*qab*qd zq1&stSKf9i^^oCj`oB!Vvdu_u?&Q6uU8%HYc(38=M+VxvbH`~l9ZbU2wFNrKkT>1e zuTbJGCJp`S`qff~&z1up8?utp>&!vppM~&VQ?izK(O>79!u&X9Wnwi*rPd6yfaw$S zsq}Yupc7`c>GRd4RmZ(JC$43bDs_En$@S_Y<&0#dEnNFX^GssW3BtLpR%x0p{3(GF z0-FiY+~TX4W`;BapqoRNaABeoB7+^2JHYoagdxp`cvTf5*~ z3+?4YZ=;e=`}Jdct|&fp)m}&S)tP&M&0O%6#w-W+11(RD{#NI)ezsyxeV6f#rYTXI zikuaQAJC1rng@8Ig1fxbPy_N%am1o|#z`<@d~Cu4zzQ%du71K?VEoG%NKR7hD@tu8rx^ikS(n8&*o|6Z>u*Nsi5T+!Le{r7T7E%EJYQR*-T|5vnvF= zJSu7c|LT#O)V~{jXzS;WMxg=i#1xS${eD6SX%TOMLXQBxRpe@bZG#N=ijP7n3_|p! z7DRz)3QLKE*kD`SxeM+pywadL1dt+N=afKqTfH0STIL^~{i;D9iDB!nuGM8&)cjbInLlUSr@dhSb z245Zxrkpgsi@!Vuc#C)-aSi{U~M9ym|cj*+|@>$T3*;G&I%zwr@69`qq(4aGLl^;5SPaTeNn5B`o98#+50RD|e ztsM+)BO?}V3K(9KKsbA738d3`5}{xw>rpmpNrVAAvW!k%qXCWd4{#^r*mHdE3;U1t z?>qdeMCD002#_2lV;_PpB|JJh9LhNS0|Vn{$A{r=moc9T`UC0!QYO-$1L%kFI<8EV zt3!x&!50j%c%g3qVCbq=Vx)T0Ep4|({<%ZhawOG4AGnAdjdjj$ix0%5_`vLLh&)p6 zhID!LY#Z2cv*Sxo)wLm|x-HoyS9d7Y9kOSW;@L!F9>E_UJhhuqkcw_b@B`F7&0d`G z1LvCAo}a}uclG!5vmE!-;mr>!$$@h-pUrzxY5|{ogfG@dyvxn!{lnwI>W4vRzdvx^KQa)U@T&b;aOX36HcRxktR?Ib6K5UqZl$b278`*zE**)U z=j;&pqIlEXdZl)~EUxF5WF4!79ZR}O75+YfzfXW|PP8Z`8dm8q2-Fg&CcrkQYjj2+ zf&b7e01@@?vC%}1LMm{WOjwU9aEK=G94f%RwSo4nLcj~7B6Q*gS_?GXPkw^|0b8OD zr+xs#G!3#xe!%pC0w6X42(FR5Q4^Z>AS7IL=xU;_ zWc|t&3|iyhfDrcNv(ZjV{Zslnpe4tsC8t#^qOCD!pFqyS$1F=g?m*6tTsc}VxiAfL z_Bc&)O?G6NCEqjkmCol*Ur+Sa@n`BQby4eEt?#R6>#3(OkAI9l16ufF^jUsn89}bG z*kW`{30sexFi)U-i3XQ1Ubc)Gw(q8Ej9X+#>bvxFn!ptT#7nlC;chxizFQe938aRg zz_xOn72*=|KBOw7>)j+$A0G$?m>yobizGjxWZ%U41Uu=fjGdVM=Iu76p-XmkE3R(X z*Eq|uNz<|ifb3FqJ`n2`#Jcza$WDd1-eeCDmO?m-K53)2Tww2D5#Ksg`O}t08K^xAjZZD2U#$(m9 z%j5ppRkvRtM!?gR@^qyw?gy3?3zik}!MQU@-`q&jFIzS$mW?UP#;em%>^I+=a zJf0)vD`82B!l0#IfNQovJ+>5OZr0~4H7-e+i!dkBVbYKpEUt+uSfTcvG;5TT;>>oY zauYe-&NWy>@MI}?KJzc)0%NBj;Jkx#_)L!vlHcE1NHHN^>IG@cSLdhWA zaj=~&Qw=YF#-x$pGc^7-4;2uSXwUs6{zKmb05!9GWxVrtV`A{u8M$GT(y&Rc+XPKQ z=w8R?=VP{LRiruXEQyXqrKmLX9Bp^dz5rl8THTkYE>6YzWa~1;ir@eyb5q(~@xWcb z;I2==#W5#K5v&zyn;Uk)j;J|ZSD)~uEZ#^k+H>Jl%=UHKUJ}WgglTrbyy%0AI)9&- z4TUU#ek?KaOT}GILZKE!81uXue|63#m-v(tA7PYQ z1}<FV}Q*`QO%Vld;T8t@*S%s|=9SW_l zp6Jkb&=srJR%6;aZOv1TcMHe{ko;6)_L#O7+R0sk&BI9{YJp}1)*ITK3UwUBPQZ0u z-TFbg6nK4V8H&Pt@aV!`afa1Hs@M8EO`4wi9Fv#p7w|}lmXs+hX-bS*(HaZvCJ&FZ7so+wEF7@6s`+ceuM@c*P9Kvnnr z*6auJ8j`+n`rxz?!`R%sHWVm?;w)(>97>%ip&-{{fk)J{e_)QtUJDiB&7u z3;m>3Ry^(eDj;hX7*ZzTB2s}>hGAG~j(#$MOqq_PKtD2MfX6md67mAc*tiohL^Lib z$jJ{zxVUq_n0Il;35T&DE|E@7`b&D6b_PwDgq)4^Pw5^3+B`%5F8~=xr=gi;w$8vl zF(LX~r1wi_d8~7K=gdyvA9roMJ9$D`2hZC*vU`u>-V<5#u%sz*BKewJ(yf$q!}z!y zLPB*=X`s(58}{N`_U=;vJ^Lc-loJGvUii>-30$wi1{oA&tXCB z@%gj&d+zVKzenjlh--4$A*Jk4WbH$Hb?khyZvGhlr1@h?>$a4AyKLXC*te&Q+l$d7 zEh&Z`C7kGaFSgUD6gzQFq^i-<40<$42rD3-WPX2`sm!=Uk4Ori$I>=WbUn>8vUe0A zeO^BW_y2W!0rO6g`JIRd7J?R&jdT+#Aw376)sSRL3zo0+S4i={QL60#5yO}En)s%8 z)9tE6IN6ijliYLb-J7d!t&(duV*lSP*LG)R#P(f^eb>FQd(u5gvF}M4_wWwYbW+{| z@EowxWw*Xdwb-vPs1HO^M!g;^r?1dz)4)h-_Lo9jtnco9hM4f?eh_sax(Gw?BSF}* z=qSL`mPun6`$z7`i>tCMqjAzwuqzb#`=V^~lvT^j<(ZvuWiIDJ7ttyB&}wUY+L=p9 zR(knX;Frdp5zHWxX{RQLn(eTC6Z6b2&Q&H3Hgjx$%;%RsgN?dv(vkn2Ak0;C1)Hq9fj@F_Z*?qETu%$EkltyB7;^wBQol7@_W)r!r4U#iUuYk=pTS=8PV6? zTg===1o^E}NA?{l_MyNv(Vm$m{tJ+k5mgy)KR7yyg_LL4`pNGq1ml*h8iCOP8sio> z52)3DQW_mU7t$!w-$7$D&Vfz;mj0ZtNz$Tr_K0Fiv zfzuy4>mMHOhe6@VU~5T62xW{%U(DEY-I%G$(L()izsL9|5+R2&<+%j;&fz`?7T)%Z za57^HkM^@VNhI2lzM%Gr5L$;2OeZ+dACmqhu7n7+5u2tS&vS;kL_eNV2I>Bm9w50b z$ft+v4?MIWHk53sRxH(VWBkxpCSgMh40kp_kh2y+J5n`k5ygY7+OB~rS|?YoS1Q-b z&JBta(jkv&=T`*4IlCJY9g4g8fqV0Ud$a8BQrumU1LcXe(@NR1Uh%AtoJyC%hEXY9t(2~VH=1xaFyArX z_j%_%;j`}K8Ko2+aVBdk15sbPqFJd}pLVTOT$I>%C#bkOe{B*fyQ8*8wz8O4vDH4X z`4()xtj^fBMzO6)vCpGw$ii*boeV_Rq)V5@ZAz&xDyE$k*WQP9&VqAI%DE=(SpLAV zVZpIsUX&eOilZyVK52K=jXH!KSa7$c+-)q;`US`OWLS1|Dvr*SqccrKuUW9IVbxom zE^AQAI_8_^$39=Jlx;&L-8;TA8SOiNZ2*9vJqx~aqfTFyqH7*E7)(_Uj4caB3`bU` zl`biRpXRIahS>JVnn-ihlM}QFR}ah{0OPGV>qwsQ64ODyB#>Cc9`#)skJ%Qi-jvk~ zJzd2Ap$X=cmKn>1p|r($dHCWm>aRGOQ!QJRmV;jiO3M+&@d74;^(6tb0T4_(;Mb3G zOCF2-hAVOW)4lWNJ4fywx#zjlD|@#q-tD*{+jl5PXXI#LfOw+ZVZjNbQ+E&>53_3) z)ey#olZHaZ8Qbrl0_|)0CJkDKvw~5hFwz+J!x6xwb<6@O0{I5y+JsDt4hr|7-3s-n zncTm$cez6cu4x$EK^SRphp(AGd1@(|XP#b)K7W)65>8Xf4AUTrde3><6E@@j$tHkn zf*R_>GbNDASD{M&NAT&$hVdv@^@hBT6($ z{|dQeQDEpD|G8GXl%^{W3A7OSp8zuEL1}a(%#6jk$vYuU>E8o{>;Uy|Ww9hjk>$mW zkRBnKg{uAE;e@zMP2FN)R8w}gE6(=E20IyY0AP)|+%Ry7_bIMbk^P9>2cwo|7_>wV zrEMkAcNLqLgxfAy#+cjE7VG7~i-XbQ6dh_7CgU}Vr3U^|=~{Se%{9R)d`+S!;lH&n zRnr<7jJ^isoU&BI_WG$$@vwbkH*LD79#`b5_!JK3&#*9t_r6sn# zrJ7&7?foq>xs#3PNnAABO!Vv#+wj&t!<@)%dn>8ct<>u5w&&iGa&JKtWIqB4`Dae! zN5O}+u+;%A7a3WpyaDN3h+p6=(i~OqZwQdWE92q}4c~yJ|CX-Pp2eA={~l+p4!$$0 zwS6j1}-CP#3_%RZX{#rdqej&D)h`lBPEAmRCKmta_fW5_k|GDM(L1 zupwW1o3EG7m8OhUkDRW*xBJR&*|}VCE+-8xF#@Jm?7-r(2V&EL*o0EFb1XmG*x8Na zux{?V)tU4pJMLD_3wONoicQLjP3$I0wXZ>;_BEi(&8?uz&B$H_V1(S-!p$bA4*?>6 zX1m{X`J(j>;aumr6_W6|xBd1Gh-}QI*Or)bgl* zj4bWZb*qd^*)j1XjY@FAM_oEP9s%`Ye! zfhMZapW_6iKkMfk`)_ZCn?(1m?(4mCz36Vabgfdl_R}}-^xy4|A|efYs{qW{8{!Rf zW<)k9X_ZS_l@cHifeh>bE^vLf=#2JUI`TlQS`guwsQOsJ%__~>6?+Gble6Y20tIsb zAmnCd*2W#OSfz+nDfZDg5NcUlqS@GTF5`4PBF_X2ki?P{)<|)RwF5p~AbI8T$ur0T zeeq}7r5_bRfwE|<+GUNX$4qAbv^@39ZsFq7qY!5BN3}0H5VSuc*8=J{6UBASMkUaRl^gdCFYAXM;fr8 zZT4J*egsH9a-}3%g1B6~&MLfNhS#$1dhqs9_*pIkzp0UBfB%w%E=Bbo}yL3@Kh%{h(v+fwU!eLYB3N~To(`tzY}hO z+8cNfs_jW5t$42JW>HLxTAWs>(S(qmaVZ6|KoM=RVrHpD~FMgReWtl^SM#=I) zWs5IbwqE}eGU8kyp$N=%t@y8XaXw{{#k+-;!CK^yTfC0lDaY=sm2j2sL9n?~KVC7v zBHA$zNPoD6kA{??iNeSYSQGx=bk25pTFh*zVvVZY4d-D0Pu$9s5aZMjS>RbHW}tB* zv{?TZ+Q2vaU(w?Po}b%k$HdYTVQqg`Zi723-woz+?;r_50{=L zg1{wu2bTrV7O(=^12#Yhg1pc>xEz>hPDWknCL2u%wtA1TLdeL3{Tl}mL%3h93(j-Z zf#FHNy8ed{eiR0zhJJNz;GFVg2NQ7T%oOPvjByBGT=?r^5k8=W3qr@og~BBGh2hk9 z@D-x+vV(jXrxVpWsgLUOwTH%i6B2;P>+GKIyw`BA^4?fV+(`Z_@Y!(3!^*0S7~SNi zl-NXG?91RUTNlS4?{c+W$V2UoU*iYS{sAmE_&)33*s!F5Zx*+FQ|rs3#WJ9Zge9XUdB$IbX$M6yyiZ+6G*fh1djb_IB7 zj}N*;$jwh(JQWSkydU49ls3thX2sH+vNWe1B{OSdrxXWvO~uiW=#w2Si2Gqh+>gTi z9^1h9N4RaAZV?a(#umY2oKQqva(7SQy!iMSmp)N1gX7!-6H|5aW6LP;wIo6qT&{^2 zEZw#ML)Sb(MjK83*WzO9H(j&|VHnlYCPm71#yzw5X*HMx!=PQ1I42y+4z_2I{|Q(u z8Z|!d-E>CdD*YD%Y}~Wn$8^qgF#JC4E(LxH(!a&EiBx4DV5%J6KsS~pvVNl;Up)ucQut)z%_v&Rs~r*Ezk)YTTb zv&Wf=9gIuCJsMATO_LM5rpsHOA96wd zxm5J6QUc)U*&6;=xCqqy9enXc%cx)VR^bT#uSf?^L#>c~Jt^wGL4)=-Zeq}u*UueE ziprYp(Or@Kk8IU31XQ1ef~-EVe!dEtaj(*_jhqW%%84j}p4lxjw!)F*amck^lUH^> zs-eJz-b7c@b88EH<|6y?E~`V)AYvKw8OVP1$um`{82P3l4tlAsIWc?@zt8h|~0 z0!H;siGb3w^`1{@*&&NN6>(=O_u-wP?zwncMOb7-Uoi3%8BUJ(FV{|#QUmv8`QR-2bkxf|5ZnlKb4%Yd)niQ6SC91DC=xyjZ_ zhJ*hfO2kmIb{R8`IlHfJ#f8h5L}&2%)-7YLp5ZN;i^LXVE;h6%4Vz(~+;!j!kKA=6 zC9aAbqM5nsq1_cdHuE~YXXr*_JTQ0q`i8j;uoe;T9EO0pwhQ5)fBG#qEI>bK7 z2LJN#7-HAGJU|=qx2kY`ZF?jDWjnlX+Y)_q>txHClw}P`$a#w5^+FxZ5c&U7v|ea! zXbey^a|coz&B*6xME(r3TDu)<=r(NChIfV2D3%}C(lNsZjQLeaZld{|!uDLsViJP5 zR!$PYU&EPCr<=37+;R~%3nTgH{6-4aK+&Au$k<2F6mRVTuKGm9 zSHuzP`q;u&K?h=R>S_(s7)T-WeUfX%Al}TT>*pNqP`7GMUNLS1axp4cA%L6LK>*ZByot=-&aUUG>``|qZk=9^JctLse}31e9zr?l)C4jSi*Hi;rLz6gQ}f|FBX>vSvYkp98LgQSbr@h;Lan_ zL^j(VdslJRUtsSrr0AvoE6ven;Ihj@7l-JztYp0t8BM!O=v6?m9&%ZYdLS$?Q;<-W zIEn}8k71}@gtxtl%nRdoFix4+T+Sh|`Ao!?L}*{4sVPy-3j=!m4P7J9L$$t%lZg8F z*l0AZ$E3*t#FlbXY^f|LvzoR(HmIOnG{I_71uns~izM_}V6mD&2dSXkZh8^w0S&Cy zob&6|BDwPx1J(^(XsB-IsJ9dUCj38n72|c;eA%KG`WFh`bp$*~NOTwvFFG|}p%*dc zMDXOcY9uG%Y0KiazG2!evPV9wpq>p-Lyr30EAk~H7A1Gts%>fX^K!_zH2EAO|H!|YKF^>RsZTp z1hcJEtAJci6FbEYeHr=@FH~eRXK;G8C)L7UT#J#LPwhjjh-{fyA)nAHVaJsChxH+m z%KCo8t3P>9EmFs#o^qn6mYj3HPp|2FbkRPfmx(ShlG?E;Y!~ztZvOV+JQ1&x)&hHS z106yGWaq>}q#tAG7KV`j30JksgD`$itknAUXjEI-u8=RV?DEnO9_Eo)h?^tZ@F`i}sM7%8q=qQ*H*`Ycs> z9iGi8<7`_}wk`3VsE1ozLS3~I7YaU{@}?fl9fa2jCp_5*P#i#?dM|h6*<_AQpf2dZ zJ#6dK_+p+{cZ{t6nI@0h5JmV_{w$+#4fT3Lb@@7CK%lw4+2$MDmAdwY^7ibI9Mg>j zwSv_=KMy1iF7kbsnJ`k4EG_XY6}XP{xoHr{mZ>Z9zsm4Sl~l<;WbFR|)l)b2ds4PN zg=0_Gw4Xob*#9XyTB@dM(R|A3m!GB2=cqH55vazKt)dPKifDH=rzrDz3k8w~Ss23D?zI?A8P{(6xcmC1NEL>nd%pt<)8 zNdJytrA=|RrNp*JW^2@V>BNWc!+mAJT%R)6KP*`ue=Q+> zx?NejLoV5=l}ex!+7Ow!;FmgRxL#(cVct<+3b&r+D%4eX^ujI>(mP76!rkyazleYo%B=dj$;0|P0 zGO_+?!5m%~UDq=_E{#7miUXVeO{i~i6bGlqT~?~c>i9>rLbxjaOM6vJiUnY$-3hB{ zW72qQ%k}=be!04RUPy-IY6NnE8|Z%7zD=>iqBCB=Upd znqo}Yz^quf6LEhSb`Qmb$ZhIEBGiCmczABSz3Uq{`y*NOXw29tP$erYQ$h{~%wsv| zKaT*gM))fq)0F)d+j=u)g~dv%QOciFN$Yt@X`^_#{bKut^_SL1)~D^x2lmX3eig#4tY zAx+XVJ9yb4Ofs}YwKlp4x2w%sukgMS*+RsE=;1NZ#CH&(h>Mq0A1Fqi)E4(l?qyIv zrar0XeUhk5a0@bT^4NR>+}NAvTjpEtzOmrjnq8YZLXj2-Yf>*Y38vd*U|rB8lfFp> zzt}v|H4-@y(qSe#*$Kh1%|_Id1rXqIn>wi4Q~|sdO7~uCv_ni;#;vE+l+IGI*Y9!-hu1!Gh-SaeJ3NH z!U|&ntl%Z})?1(mIX1QBsv>nG?O#yYcd2YTxaY%Tqs`=@Nbp6V(qLZ}ElDlgTJVu= z+Z3o!8z15J%9M9c@kh4rRqT5sO%L4_YKw4kdw;SKfeM;Bm8MSolq0F{EEQe5MaaxQ4(y2VjawxIbsc1_v1}% zJly{-X)Ds_>Sa?`$DQ@k6eOR z7w?HU)9xB5x!;Jdo$t8ckXpSTAK86CaUX~{9*d#?A*>2qM!^T_GzV;Ux~qa}qi}=> z!aTHD1uulLIZ*BuAe&V|XN`a;7%J#)5S)(w4* zy7ue*jkXf1hYISgLMhcP3(7?*O$9EOfLC9sV7XD)%bKP_+e!h7LlrzPI0U%StH4_( zbU!wzpvgjQQ$d+sfE&FEs%-+IU8taPk03xGy*T`eUH`TA?jfUBsG@$&f=;8borWO` TY(}bz3T!0;-0q42TNnQy%dtv} literal 0 HcmV?d00001 diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py new file mode 100644 index 0000000..f929866 --- /dev/null +++ b/string_indexing/fm_index.py @@ -0,0 +1,134 @@ +from suffix_array import naive +#from compression import burrows_wheeler +#from compression.burrows_wheeler import transform_from_suffix_array +#from compression.burrows_wheeler import inverse_transform_naive +def transform_from_suffix_array(SA, text, n): + return '#' + ''.join( + text[SA[i] - 1] if SA[i] > 1 else '$' for i in range(n + 1)) + +def inverse_transform_naive(BWT, n): + reversal = [''] * (n + 1) + for _ in range(n + 1): + reversal = [c + r for (r, c) in zip(sorted(reversal), BWT[1:])] + return '#' + ''.join(sorted(reversal)[0][1:]) + +def get_L_from_SA(SA, text, n): + return '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) + + +class FMIndex: + + # all of strings beginns with # (idk why?) + # F is first characters from suffixes in order from suffix array with $ at the beggining + # L is result of BWT + def __init__ (self, F, L, n): + self.L = L + self.F = F + self.n = n + self.sampleSize = 8 # const for sampling + + #prepare char mapping for F + self.mapperOfChar = { F[2] : 0} + self.begginings = [2] + last = F[2] + lenOfBeginings = 1 + for i in range(3, n+2): + if F[i] != last: + last = F[i] + self.begginings.append(i) + self.mapperOfChar[last] = lenOfBeginings + lenOfBeginings += 1 + + self.lenOfAlphabet = len(self.mapperOfChar) + + #prepare closest samplings + currentSample = 1 + self.closestSample = [1, 1] + for i in range(2, n+2): + if abs(currentSample-i) > abs(currentSample + self.sampleSize-i) and i + self.sampleSize < self.n: + currentSample += self.sampleSize + self.closestSample.append(currentSample) + + #Generate values for occ for given samples O(|A|*n) + self.occInSampleForChar = { i: [0] for i in self.mapperOfChar} + for c in self.mapperOfChar: + currValue = 0 + nextSample = 1 + self.sampleSize + for i in range(2, n+2): + if L[i] == c: + currValue += 1 + if i == nextSample: + self.occInSampleForChar[c].append(currValue) + + print(self.begginings) + print(self.mapperOfChar) + + def count(self, p, size): + if size > self.n: + return 0 + + currChar = p[size-1] + if currChar not in self.mapperOfChar: + return 0 + + mapIdx = self.mapperOfChar[currChar] + l = self.begginings[mapIdx] + r = self.n + 1 + if mapIdx != self.lenOfAlphabet - 1: + r = self.begginings[mapIdx + 1] - 1 + + for i in range(size-2, -1, -1): + currChar = p[i] + if currChar not in self.mapperOfChar: + return 0 + occurencesBefore = self._getOcc(currChar, l - 1) + occurencesAfter = self._getOcc(currChar, r) + if occurencesBefore == occurencesAfter: + return 0 + mapIdx = self.mapperOfChar[currChar] + l = self.begginings[mapIdx] + occurencesBefore + r = l + occurencesAfter - 1 + return r - l + 1 + + + #Should be private + def _getOcc(self, c, i): + closestSample = self.closestSample[i] + toAdd = 0 + if closestSample < i: + for j in range(closestSample + 1, i + 1): + if self.L[j] == c: + toAdd += 1 + elif closestSample > i: + for j in range(i+1, closestSample + 1): + if self.L[j] == c: + toAdd -= 1 + + return self.occInSampleForChar[c][(closestSample-1)//self.sampleSize] + toAdd + + + def query(self, p, l): + return self.count(p, l) > 0 + + + +text = '#abaaba' +n = 6 +SA = naive(text, n) +bwt = transform_from_suffix_array(SA, text, n) +revBWT = inverse_transform_naive(bwt, n) +print(text) +print(SA) +print(bwt) +F = get_L_from_SA(SA, text, n) +L = bwt +print(F) +index = FMIndex(F, L, n) +print(index.count('aa', 2)) +print(index.count('aab', 3)) +print(index.count('a', 1)) +print(index.count('b', 1)) +print(index.count('aba', 3)) +print(index.count('c', 1)) +print(index.count('caab', 4)) +print(index.count('abaaba', 6)) From b495f666640e900b085c69aa15d5b7c94c7cda79 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 15 Mar 2024 21:19:56 +0100 Subject: [PATCH 02/24] It works --- string_indexing/fm_index.py | 52 +++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index f929866..04860c7 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -42,34 +42,35 @@ def __init__ (self, F, L, n): self.lenOfAlphabet = len(self.mapperOfChar) #prepare closest samplings - currentSample = 1 - self.closestSample = [1, 1] - for i in range(2, n+2): + currentSample = 0 + self.closestSample = [0] + for i in range(1, n+2): if abs(currentSample-i) > abs(currentSample + self.sampleSize-i) and i + self.sampleSize < self.n: currentSample += self.sampleSize self.closestSample.append(currentSample) #Generate values for occ for given samples O(|A|*n) - self.occInSampleForChar = { i: [0] for i in self.mapperOfChar} + self.occInSampleForChar = { L[i]: [0] for i in range(1, n+2)} for c in self.mapperOfChar: currValue = 0 - nextSample = 1 + self.sampleSize - for i in range(2, n+2): + nextSample = self.sampleSize + for i in range(1, n+2): if L[i] == c: currValue += 1 if i == nextSample: self.occInSampleForChar[c].append(currValue) - - print(self.begginings) - print(self.mapperOfChar) + #print(self.occInSampleForChar) + #print(self.closestSample) + #print(self.begginings) + #print(self.mapperOfChar) - def count(self, p, size): + def getRangeOfOccurence(self, p, size): if size > self.n: - return 0 + return [-1, -1] currChar = p[size-1] if currChar not in self.mapperOfChar: - return 0 + return [-1, -1] mapIdx = self.mapperOfChar[currChar] l = self.begginings[mapIdx] @@ -78,17 +79,28 @@ def count(self, p, size): r = self.begginings[mapIdx + 1] - 1 for i in range(size-2, -1, -1): + #print(l, r) currChar = p[i] if currChar not in self.mapperOfChar: - return 0 + return [-1, -1] occurencesBefore = self._getOcc(currChar, l - 1) occurencesAfter = self._getOcc(currChar, r) + #print('OCC ', occurencesBefore, occurencesAfter) if occurencesBefore == occurencesAfter: - return 0 + return [-1, -1] mapIdx = self.mapperOfChar[currChar] l = self.begginings[mapIdx] + occurencesBefore - r = l + occurencesAfter - 1 - return r - l + 1 + r = self.begginings[mapIdx] + occurencesAfter - 1 + if r < l: + return [-1, -1] + #print(l, r) + return [l, r] + + def count(self, p, size): + ran = self.getRangeOfOccurence(p, size) + if ran[0] == -1: + return 0 + return max(ran[1] - ran[0] + 1, 0) #Should be private @@ -104,7 +116,7 @@ def _getOcc(self, c, i): if self.L[j] == c: toAdd -= 1 - return self.occInSampleForChar[c][(closestSample-1)//self.sampleSize] + toAdd + return self.occInSampleForChar[c][(closestSample)//self.sampleSize] + toAdd def query(self, p, l): @@ -118,11 +130,11 @@ def query(self, p, l): bwt = transform_from_suffix_array(SA, text, n) revBWT = inverse_transform_naive(bwt, n) print(text) -print(SA) -print(bwt) +#print(SA) +#print(bwt) F = get_L_from_SA(SA, text, n) L = bwt -print(F) +#print(F) index = FMIndex(F, L, n) print(index.count('aa', 2)) print(index.count('aab', 3)) From 700fbb93257ed8d32896cc8184b3db219f8a7988 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 15 Mar 2024 21:38:43 +0100 Subject: [PATCH 03/24] Add locate function --- string_indexing/fm_index.py | 49 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 04860c7..92d71a1 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -12,29 +12,28 @@ def inverse_transform_naive(BWT, n): reversal = [c + r for (r, c) in zip(sorted(reversal), BWT[1:])] return '#' + ''.join(sorted(reversal)[0][1:]) -def get_L_from_SA(SA, text, n): - return '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) - class FMIndex: # all of strings beginns with # (idk why?) # F is first characters from suffixes in order from suffix array with $ at the beggining # L is result of BWT - def __init__ (self, F, L, n): - self.L = L - self.F = F + + def __init__ (self, SA, BWT, n): + self.L = BWT + self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) self.n = n + self.SA = SA self.sampleSize = 8 # const for sampling #prepare char mapping for F - self.mapperOfChar = { F[2] : 0} + self.mapperOfChar = { self.F[2] : 0} self.begginings = [2] - last = F[2] + last = self.F[2] lenOfBeginings = 1 for i in range(3, n+2): - if F[i] != last: - last = F[i] + if self.F[i] != last: + last = self.F[i] self.begginings.append(i) self.mapperOfChar[last] = lenOfBeginings lenOfBeginings += 1 @@ -50,12 +49,12 @@ def __init__ (self, F, L, n): self.closestSample.append(currentSample) #Generate values for occ for given samples O(|A|*n) - self.occInSampleForChar = { L[i]: [0] for i in range(1, n+2)} + self.occInSampleForChar = { self.L[i]: [0] for i in range(1, n+2)} for c in self.mapperOfChar: currValue = 0 nextSample = self.sampleSize for i in range(1, n+2): - if L[i] == c: + if self.L[i] == c: currValue += 1 if i == nextSample: self.occInSampleForChar[c].append(currValue) @@ -121,26 +120,26 @@ def _getOcc(self, c, i): def query(self, p, l): return self.count(p, l) > 0 - + + def get_all_occurrance(self, p, l): + arr = self.getRangeOfOccurence(p, l) + if arr[0] == -1: + return -1 + return [self.SA[i-1] for i in range(arr[0], arr[1] + 1)] -text = '#abaaba' -n = 6 +text = '#ababa' +n = 5 SA = naive(text, n) -bwt = transform_from_suffix_array(SA, text, n) -revBWT = inverse_transform_naive(bwt, n) +BWT = transform_from_suffix_array(SA, text, n) print(text) -#print(SA) -#print(bwt) -F = get_L_from_SA(SA, text, n) -L = bwt -#print(F) -index = FMIndex(F, L, n) -print(index.count('aa', 2)) +print(SA) +index = FMIndex(SA, BWT, n) +print(index.get_all_occurrance('aa', 2)) print(index.count('aab', 3)) print(index.count('a', 1)) print(index.count('b', 1)) -print(index.count('aba', 3)) +print(index.get_all_occurrance('aba', 3)) print(index.count('c', 1)) print(index.count('caab', 4)) print(index.count('abaaba', 6)) From 51882c63d660583212cbcfb17fd5f581c07000f7 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 15 Mar 2024 21:40:43 +0100 Subject: [PATCH 04/24] Fix for locate --- string_indexing/fm_index.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 92d71a1..805a873 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -16,8 +16,7 @@ def inverse_transform_naive(BWT, n): class FMIndex: # all of strings beginns with # (idk why?) - # F is first characters from suffixes in order from suffix array with $ at the beggining - # L is result of BWT + # i sppose that patterns do not starts with # def __init__ (self, SA, BWT, n): self.L = BWT @@ -124,7 +123,7 @@ def query(self, p, l): def get_all_occurrance(self, p, l): arr = self.getRangeOfOccurence(p, l) if arr[0] == -1: - return -1 + return [] return [self.SA[i-1] for i in range(arr[0], arr[1] + 1)] From 72fb4755a0a2bf9fdc86aa4a26491ba182d66c87 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sat, 16 Mar 2024 09:45:56 +0100 Subject: [PATCH 05/24] Dodano unit testy --- .gitignore | 3 ++ string_indexing/fm_index.py | 58 +++++++++---------------------- test/test_fm_index.py | 69 +++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 42 deletions(-) create mode 100644 test/test_fm_index.py diff --git a/.gitignore b/.gitignore index bfc8703..793ce6e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ backup*/* benchar/cbenchar/build/* +.vscode +*/__pycache__ +*/*/__pycache__ diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 805a873..3086396 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,24 +1,10 @@ -from suffix_array import naive -#from compression import burrows_wheeler -#from compression.burrows_wheeler import transform_from_suffix_array -#from compression.burrows_wheeler import inverse_transform_naive -def transform_from_suffix_array(SA, text, n): - return '#' + ''.join( - text[SA[i] - 1] if SA[i] > 1 else '$' for i in range(n + 1)) - -def inverse_transform_naive(BWT, n): - reversal = [''] * (n + 1) - for _ in range(n + 1): - reversal = [c + r for (r, c) in zip(sorted(reversal), BWT[1:])] - return '#' + ''.join(sorted(reversal)[0][1:]) - class FMIndex: # all of strings beginns with # (idk why?) # i sppose that patterns do not starts with # - def __init__ (self, SA, BWT, n): + def __init__ (self, SA, BWT, text, n): self.L = BWT self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) self.n = n @@ -43,7 +29,7 @@ def __init__ (self, SA, BWT, n): currentSample = 0 self.closestSample = [0] for i in range(1, n+2): - if abs(currentSample-i) > abs(currentSample + self.sampleSize-i) and i + self.sampleSize < self.n: + if abs(currentSample-i) > abs(currentSample + self.sampleSize-i) and (i + self.sampleSize < self.n): currentSample += self.sampleSize self.closestSample.append(currentSample) @@ -57,11 +43,9 @@ def __init__ (self, SA, BWT, n): currValue += 1 if i == nextSample: self.occInSampleForChar[c].append(currValue) - #print(self.occInSampleForChar) - #print(self.closestSample) - #print(self.begginings) - #print(self.mapperOfChar) + nextSample = nextSample + self.sampleSize + # should be private def getRangeOfOccurence(self, p, size): if size > self.n: return [-1, -1] @@ -69,7 +53,7 @@ def getRangeOfOccurence(self, p, size): currChar = p[size-1] if currChar not in self.mapperOfChar: return [-1, -1] - + mapIdx = self.mapperOfChar[currChar] l = self.begginings[mapIdx] r = self.n + 1 @@ -77,13 +61,11 @@ def getRangeOfOccurence(self, p, size): r = self.begginings[mapIdx + 1] - 1 for i in range(size-2, -1, -1): - #print(l, r) currChar = p[i] if currChar not in self.mapperOfChar: return [-1, -1] occurencesBefore = self._getOcc(currChar, l - 1) occurencesAfter = self._getOcc(currChar, r) - #print('OCC ', occurencesBefore, occurencesAfter) if occurencesBefore == occurencesAfter: return [-1, -1] mapIdx = self.mapperOfChar[currChar] @@ -91,9 +73,9 @@ def getRangeOfOccurence(self, p, size): r = self.begginings[mapIdx] + occurencesAfter - 1 if r < l: return [-1, -1] - #print(l, r) return [l, r] + # O(|p|) def count(self, p, size): ran = self.getRangeOfOccurence(p, size) if ran[0] == -1: @@ -116,29 +98,21 @@ def _getOcc(self, c, i): return self.occInSampleForChar[c][(closestSample)//self.sampleSize] + toAdd - + #O(|p|) def query(self, p, l): return self.count(p, l) > 0 + # O(|p| + k) where k is the number or occurances of p in text def get_all_occurrance(self, p, l): arr = self.getRangeOfOccurence(p, l) if arr[0] == -1: return [] - return [self.SA[i-1] for i in range(arr[0], arr[1] + 1)] + return [self.SA[i-1] for i in range(arr[0], arr[1] + 1)] - -text = '#ababa' -n = 5 -SA = naive(text, n) -BWT = transform_from_suffix_array(SA, text, n) -print(text) -print(SA) -index = FMIndex(SA, BWT, n) -print(index.get_all_occurrance('aa', 2)) -print(index.count('aab', 3)) -print(index.count('a', 1)) -print(index.count('b', 1)) -print(index.get_all_occurrance('aba', 3)) -print(index.count('c', 1)) -print(index.count('caab', 4)) -print(index.count('abaaba', 6)) + # O(|p|) + def get_any_occurrance(self, p, l): + arr = self.getRangeOfOccurence(p, l) + if arr[0] == -1: + return -1 + return self.SA[arr[0]-1] + diff --git a/test/test_fm_index.py b/test/test_fm_index.py new file mode 100644 index 0000000..c04f4f2 --- /dev/null +++ b/test/test_fm_index.py @@ -0,0 +1,69 @@ +import itertools +import os +import unittest + +from compression import burrows_wheeler +from string_indexing import suffix_array +from string_indexing import fm_index +from generator import rand + +class TestFMIndex(unittest.TestCase): + run_large = unittest.skipUnless( + os.environ.get('LARGE', False), 'Skip test in small runs') + + def get_all_occurences_of_pattern_naive(self, text, n, pattern, l): + result = [] + for i in range(1, n-l + 2): + occurs = True + for j in range(0, l): + if text[i+j] != pattern[j]: + occurs = False + break + if occurs: + result.append(i) + return result + + + def check_fm_api_for_pattern(self, FMIndex, all_occurences_of_pattern, pattern, l): + cnt = FMIndex.count(pattern, l) + occurance = FMIndex.get_all_occurrance(pattern, l) + any_occurance = FMIndex.get_any_occurrance(pattern, l) + exists = FMIndex.query(pattern, l) + self.assertEqual(cnt, len(all_occurences_of_pattern)) + self.assertEqual(sorted(occurance), sorted(all_occurences_of_pattern)) + self.assertTrue((any_occurance in all_occurences_of_pattern) or (any_occurance == -1 and len(all_occurences_of_pattern) == 0)) + self.assertTrue(exists == (len(all_occurences_of_pattern) > 0)) + + + def check_patterns_for_text_naive(self, text, n, patterns): + SA = suffix_array.naive(text, n) + BWT = burrows_wheeler.transform_from_suffix_array(SA, text, n) + FMIndex = fm_index.FMIndex(SA, BWT, text, n) + for pattern in patterns: + l = len(pattern) + pattern_occurances = self.get_all_occurences_of_pattern_naive(text, n, pattern, l) + self.check_fm_api_for_pattern(FMIndex, pattern_occurances, pattern, l) + + + api_naive_test_cases = [ + ['#ababa', ['a', 'a', 'aba', 'aa', 'ba', 'ab', 'bb', 'c', 'abc', 'ababa', 'ababaa']], + ['#aaababcaaabba', ['a', 'b', 'c', 'aab', 'aabb', 'aaababcaaabba']], + ['#aaabaababaababaababaaababaaabaabaaa', ['a', 'ab', 'aab', 'aaab', 'aaaab', 'aba', 'abaa', + 'abaaa', 'aaba', 'aabaa', 'aabaaa', 'aaaba', 'aaabaa']] + ] + + def test_fm_api_naive(self): + for test_case in self.api_naive_test_cases: + n = len(test_case[0]) - 1 + self.check_patterns_for_text_naive(test_case[0], n, test_case[1]) + + + @run_large + def test_large_random(self): + n = 10000 + text = '#' + rand.random_word(n, ['a', 'b']) + q = 1000 + patterns = [rand.random_word(100, ['a', 'b']) for i in range(q)] + self.check_patterns_for_text_naive(text, n, patterns) + + From a85b9786863818e7d1af7d9c743f634e23ab5448 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sat, 16 Mar 2024 12:01:16 +0100 Subject: [PATCH 06/24] delete cache files --- .../__pycache__/suffix_array.cpython-310.pyc | Bin 19494 -> 0 bytes .../__pycache__/suffix_array.cpython-311.pyc | Bin 38755 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 string_indexing/__pycache__/suffix_array.cpython-310.pyc delete mode 100644 string_indexing/__pycache__/suffix_array.cpython-311.pyc diff --git a/string_indexing/__pycache__/suffix_array.cpython-310.pyc b/string_indexing/__pycache__/suffix_array.cpython-310.pyc deleted file mode 100644 index c75b8d22e89c3dbec0396f1a730504495b483d88..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19494 zcmb7s3y>VgdEPvCXCJpX91n-PBM6>A5aN*pz?TF{gb{)ODX|Eh832|N#NcXivv=6L zz1_Q>IgnWI+LVtnk51^4V>?R9vVADa77^QtEy;P6%Z?-YkyOcLJIce8hvT=Ys#H>i zq+&BNk?;F^W@l&jz)}u1y*)kMJ>CEN(S!EbSkb`W*S7vw{ojAcF#d=)gMT^PJdG=u zGY!Kxe6wj>(!b^oztQ*!P*Dv_R*NjU! zzvP$koA<~33VuC*k3Wvzg1^_Fz;DsN%b&z=$=~Ns;kWGX_owkY_7TIM@n>H%{Mjw* zQU!N+`v-7$0C#)*d;EjwGw$E(&*68k|33c^ekc6<{KNRY%fH`0g5OF1{r&^^-RIBy z58`*qf5?9rzx(~8{xSSc`^Wu9@H^u_>OY3xS-juH{?%jjq`RcBYM!#zv`&Ro=806em^0 z?M*APoAp*ysJAxP>Z%sj=bgyB5ZUd`FftpFdF`g5CeZPnmyWNs!sCm_H-owgj<40i zFl-z@TMt&l_WJP`RePnr8lGz`)?14!weWZlszz)1Xsx;2R*mrLS`fEX8!f;73LYH~ zHkSaRYE7xy^<(STqtbFctg8Ao%wG5K!hl7^95*LS$5fNJ$$)}8ASi5H!NVwg)3;tT zLL)Roi(uftv-o|(c->esL7dKOUe{m?5aljpCgUWW>;kS^vnU#X>E!~TX!fkG)di*R zH-5<4umC+%;3hD+X-C$vN7Oh5sJ$!*C|%rzT2wsgHx|Ri_S*WXJ-EMRp_nx~GgVb< ztsXnsY%kWD!Kq_uugs#@n+WGrx48dhE-KeEw}H|b;|FCq=__wqY8stnA)vGTT5WwD zaLz2J*$DJl|AvvEv|VM zSX1|*vPjS-f5d2?eBZ)NO1>as;A_7zNj#diZP`7$=k#2Vv@Ilk63EE)@?BT=Ty@n$ zU8C#F;3AP*R@d(4P8vODV_(WY|+%vxk#5kiQ}YKjvLn_Lx~wOl8}0s{UJh`MY}n+I`C zTOjU-(6~r+Cx5KxaS3rhhRQa;a>97Spp+$%3rRbud!gyOpwHZXY>vdOi5?`LUwFqn zoFORAM&yvngOs2WDjy~&U|$I8L3GzNo+WQ}FCOqxb0|79ty<$+eSoU_WRzJ1w?-Nu z)dUAn2vrv~Rr|)LE#Ssnvh-qZnhOHVgtiGi(6$rSUkOvf?k_|r`hXT}Z}UYM_dfhY zE*NgPj=M*AXSVd%kZ=#bKCTtF2wSxp&onPcpZD9N+iNUYrBuAo`umCMmgK)J3KPj7uxJgbatLP1B%-W z>%p8BopT~O=Qaay`P>UNwSM$tjb^j1j-GD^_0|=T++1J$DoRKZTnmcyG=dW90znD2 zilAgF5j0(FWj{8_gYky=NeTf) zlxIzW0S`b%mEZMj^=)X%V7hyqD!D&!FsDuO#lrry#?j4 z?f5ycsY4w5c|7;_8-8I65=>gW!Iu4?k|t+vdb6FQJu4rl`XArJGM zV^)5*FsolCo%LK$lnFBmBfBmcznAkX{vHSix0{PKpZ&UxKI3SOXMe+jp;bbk{_3>Q zfR5BVn~;@=mh9z4$lS%2Fc;e0d?H+WUYPIZx*p7m$u}V#=l5-X7|?o8^8Cu@)#~b3 zKHq57TSqU{U<0TirOpgBl8O>t98dre_&bIG9gN1U-T?K95`V|MFB+v~vT(p_4#lUa zG)u6JU>Nuq+r`)|^W%nTY*=Emfk+=&fazr~UR5xAl&>h2G%bn1BU?m;lgss1{grif zij*xn&U*Er(U}ic>o2ENTU~6oV8MiGOMl!vn)4G%t*JN=a7VOhORw$VlsaVhc!LfeLYH*du{as{%OoTzy?1v7ylYL&7% z;I7K%Bj(fQE`X43Zxh$4aFchbkD@I@QW*#h>m2L{#vOS|>l_F&PN304WGm4V{WXfV zd6MmqqUcO#F?6!8^|BTwmyoEvTcm1{v0kF~rh%kb|4wmgc^g*>YO!B@^fBE)N|e`= z-9*_Emo7#8YXZjZq&aI=OvS#7+qh zyVC|xe>W{F+WXs(Rx^-HbZuEb2}^3*oHn494RxOn714FK(2oB0enWf;co5KDXk!cP zE||=hpY0X4VUcwUwBCBfZn2xg__}UEnV*4O#$UdrTaePD^yCP661U;~d70N^X<2;N z#s_3ROLcUAlo%_cykx#E`kye^Hv|h|r+Mtn+Hy_w?dGu`8@v82=8kvG6(zgos#+7U zA@f~Nc2n*ZIN!8E_;7`~=f>waH|8VUWNyM|KDV%#pdk6B3=w9a9Y??r$TUIKQ8c7% z4d>TUxzvW~w%ahG=y{J{1uUJwj_R8q18HFRAZjkIQ^y*IRyMeq6o8mMU9ZSwd8jHB{>x zo3&s7jT z2h0jA%t=#mWJXuhkE9n&AP4V&&bMKNz+!|KAQmD+4WOMZcm$w+(7Erv=J~ld5zg`R z%aBOa){}4qA#5cuF$2W`N`tQ)+O!+BjdpnkPMzD!bshA|%^0h;Iz_*C8t>!19w8Kx zi$g;X`5wmPIS*$6U)(EmcrpiQ7IRz$4brNp`32AsJZN&BpU0d+(;Uqnpym9=$9#)E zP}hWo{0;CfXjZSj#F@U~=X-7!jw_!29_Ip`=D~GeMw{GC8=&KYJpU3}NXLB6I=y#k z0S9v(cs7k4@d^C@T}B0ezjZHYU%v%w#$146uK-IKZKF@NMf$+bFB1B~hlOOd!S_;j zd`SGe-S-xB5q}aq2^5PQ$@f7vK_1NjVjKUn3twTrv@m42Wb_jC85N;`?U3Z7+vxLr~*1%$1 zf?gsSZ)A0SK{1Y}TpgW4AM%&0ylE|@aWb`x9+>b7Cwz)Usw5k@?cx$8ISz>=9=p28 zUgGgd1j>s8gTAoTd~raRk>#T?=q^=ngVSC~R^X6o zXA%8eQ6A2nP27M6rC3r0nz-;j;IySin+C5L4H{6d;|eG? zb4Jm$Zd+#VcIR-sqhnRzyAf8asoqHKZ>=bDLMc!bR4C1$8X_bgq@je!m4;{xtEjHG zgGLAgC;+Lxj5lf`6H>j#Te2eDwD_H`;Z83MI4uIh<=dU{;f0OBobW!wkwhfKgt_Xp z5qeyaI$1~8XAoBj%@tnu5=1W8egy*%3b~G-sPy9bXP@C4SXGQsnJUC0Vh?M25)|8joW|tWkY#91b+a%=w|9)I*m{wmO6%egiG|ju0tde2BW=Q^)%j@OM403xjKfW<&)S1 zG#%&jigHAKHw)S>8hwP9#&?gk?_nVzN#XJe+I|36K%mh=orcXi4KB4_b-vlzmp~{( zLYcTW8HtwwY8w^kS_@k48Ab+IBqV$JNoZ4OY2Gs-Ltsc9_Lu<14(4$W&C!2GjTS#s z51WvzuxF9TS#{LuuCW7P@#zgSI3;(|7d&F@T?FM&kPg6v)BvMi z2>k&*nnw|tp?Z)vSrA2KO&;1(`g`aQu!9FFcL4u#=3!ibmf4w1pfxyPVLng3Y1P*1 z)oN6%R@d78W|Q@DwMsioKT+SsH*;NzC{dqckuD@fScnH(xP<*YEW^urPT5mzU&NL9 zOGKv%&|*H6$5q2Z1uOcn1Ro2~VQ}8i!KC@?agWSep>R+z#$- zq?#?pZ1x*15l4ulLlo`gh!_OFm_3ZQK6_=_0 zHj9gVRA|5(3ES;v5EW~Si<@hkO=LtP_iDZ7_qWbT)Mr?vv?=J?M%&NeinYQVG`n?s z(&|j~w+}pP0vMsa{x>5fwBTb&dEz&02x8NE``<=#zo;D4q41WQ*>#T641= z<0{H&fU6&5r|g=eJxJ+Q5s$6b{A;z=V*MA;M{*$Jkb|A+{>q2Cfuq&;a=I-RRE3&( ziGM{ns2^gHzARbukY8(2d!{}|@q*k%3;T$#x2fdHzAmx=&L%kMV<3{_wu zP!@O#B|pJ7(S1S?qWeTCiteLkoOhAUZ8Cor81;ex?w%~qfhXs%iLA6vHm`9LxxaVFz_9ct7+am;!jUyKdhTbT+2M&vLYY*5(2i5Q}OJ zOHnt}H1Irr7(YmIZ!SeHHBz9ivcW?d=~|<`8N+7?G*dXUAnJ=2WUzTfAAAUY4I{;# zn1c{30!AL-LJg^Y6i+f{7FoX!lL3*9pi6S717?*(sKuk)<#5M-h=YwiJlru4iAQ?T zth?W7FPu<9d{*k~<=zl9F*2@GNu>;5scjNp>sq{<22 zLLug?r97vD4Q+hNPTPBj1$@Lv)O~m9eC!9WH4vu zvh;I^`WfoUFxR!?7I5vB)v}R=!tC#o47dUzbWtyZ+*-9Uu!iKmuotypuYeW3BX9)d&h&~dGIcBNP zqn@z=Xq;1`3p0fvMg|1QJ}`l-En7Pe@WqibCee!`TZK4;z?(64oErv$eOBnM)=0` z_58-+jlFn&7h@2p%Wp7g&D{egvilkAULWOQNb)8FdHS#f?4>f+n(PDn zjc5N%Dr~~ZZW(ixAznn7P+WjEaPo+s(pY10jqteLAbUtlFFF!GIIo#4e)JVjG7- z#Jvcjl3!n{K?yLkr$cO7!^zNU!;dOSoQ;B@{z{a?Au#XYz$V(4 z2$|}Y>!Olqng-EC(jf(oj@sw+-Xn84^1^mi)}+Lc`B^T~fjJ!D5Kw)w8Xpq*DI%Cs zD5_-9@~3DEo6`GF@cyzSd;aAq};4MQeero&srG5cVKlvNfDD#O!hQ4QDX$3oWM} zf$fM$J~KTaZrHrK#-IvnuveE54`6OcY+>CKt!Yo)%Xg*+dWy$1bTX3CmNMyaTyk7X z&QnNGj|cV`(u>bhWJ1$&h64Ohz3tzHG{G=+iW z2KXa6c`;zY+3M#oiPG2&UaqYpz4=RQ`Ijv2XYtQbMDCJmw?ch5lcX*q)W2X&r1h_` zCUh_{x`Z6pIEfxpf^83YrB^i0IPU*ZU%`Wn2rr_+!zCj8zoNoCBXVu9V|ApK86L4{ z5Vz4MbeL_~!dV_fNFl<3`5EBb-EsTbPk1IgG1krbpaqm-!%(47V2Q|_>5qN9mkV<{ z5cUxC`s@x4VBdK(-c|xFk*birfp|yWP-c9wo0n%$1#(-^V92gkqMtN}3%vZB1wN zhSWb{ATr|*xx#UoYe(^ACr7t)2%hOk$!Ehc` ziUjj?*4Nt!x)@@_<>{NyK10OR;~33`kH^=7d{7O&RlOY<6k>46QBZN^uCBhDYLw zGT-thIPh{Qj_I0=8XyWGQGtyaorht3Li{ymIuVbc*9ybwVev`xqp6cV?=T82H0kur z^<7{77M>j@t&~wYXdZwRr=EsS%O@Gtp0SbQ=1|}rBHW}fO zo(|!?%DG-R7fh7#khUC@MISAp;e#?h1&?=|LIn|fq(B^XEOb`f79y4_IYcv&T$@Ht zpVZN{hu;!ZFg}mVWA7Ub-8!=dvgQ}`J;p#5b3PeQxs3D(w=jZP?oA}kVN?XOV_N9R z${_ZTu}HC-QRfM2X;ML46WAI7S^0CEK;~<#R}Ya4W7bC8xEeGE=rBREPSF*#@98k} zDrk>#sKEr#rfffSCFqab#r9?^L{LKr;DcO=2pO^VWI4ahHVO&Yc9|6*iK0E~GAQ5( z@P8Oqnq!TBbIx@v)KFJn$G8kdP*wCPf?_Z}e6N$EbUJMvaX{irv@&pR(fRaU;MSVt z$uY#FB==)6RR;RuIVMA$=>m5=M+8-K)ORHg3Cn=zoaFS7gBYjH7?;NE;Mm~)t}X6L z2Fc`SUV`K~R5bfSNXb_$@}fH-I6mtynEe`t-ZY1G8d>UY>63{8d6a>Y`8V}3W%%;e*YZ@5x?p;QJf%B*?t&FJEq$m zFn7@$|EA;0^F=(${87j~jVri_;#*Ip%lB*f`1>_4{(h|xf4^3Yzh5i)cL6PvBC;~5 z;m*kz1Y+$}kQ4DmTwyWdT{k>^n&)tAxBv~d_yAUY% zKD<5JCC`x-DyT3W4;la^RQ}3wI(KI`oAH-3Xd<)o5;VPpN&{cjA<&F09Mt_iFascf zwNPjJ^(0~vU{~h25n*EXOq=GHc@TO5Vhi`%kz|@vrrTp?~{Zb$zal)12glB z9e!0P>EsNG2YrzYAW0W}K>jn_U?T>>2PAV#5Ww{HL~apSkXhI=MBWNG`2Unk|$n}iinNA%M;MI{7})@VbZa{&`mclffxPe9KiS( zjycK<2-6^CfgUo`5(N{;}F@elR8ud@UH~w0!9K?M1?k_>`r4{L%qod z|B(d|15Ak=CH#5L?PUi1a_y$iUmL$-QbnfP?ORs$^BC12N{Ff)+$zf0NtjVIrH`4z z(q$wpcxTMeeZ;Q+|1kr&Y0L~@MPRuF+zJV&xF8XGyW--ZECy1{?2Zd9vkP|wzxq9F zM}vP?$g7CYB}??5S^F}I$ZYNk^M8zI%mrj%K5oi~YlQjSkgOYt(Pn_2QRindHGxj4 z2ZE)%vL0(|p-e`>)PG?y z#Na$;@IBmF5f!G9d0=w<-C-e_i6k$~mv=pLK+Uhd$b#?J^iC&HNLCSsimw}3f-FXi zqOan?X|7`u6*)=dc%E0lc_JHSsa(>g97TBYM~Olfq#_BfVysScQV^5s^e3E@EimgR z7*t>?blcoP2w?@Lyd0oD+~qks$Pp&IdDxrZv&DV#ai0@-jt}o7B`$UaoJv?rJbnzi zw^#C*|5{k!v0L74**(UqiaRCbg6y!2qY2D<#c5+XNX|pLHvdpU=rW5^c;@$hH9hPDAB5d1gL_IEK# zP9Hl)=ip#wV!)Y+&r?5#1na-Tkj#ro93+OvaoswILfkI-qFM8o5C1;>;1K>ZH?Saw zq9u~3egpSW+GnKY{K1f(QC>$Ph* zPiZ0bc~OW%)8zla5_I%xe0b`gBw#2LcMscEOh=ynVKiOyexRRFkqd;~q|5yue&EQ7%7rPJEQK~vy2!(Cn1kG+lBtZQBIrRktRKZvCIK3lCGdg z^NSOiOZfD$z7!(0+gQFD;+!g?AOgTXx)1}r($L?R)oW_;>RW^%y$YPy$3G951IH@= zUnkd@L#i^YHCjQ6!R*ixV&aZvw+B$+hg@3!G1yCitM67pi4x(^==}q9oyUJ0bK$g{ zsfnDI`NesExr$Qvi=?hcQR&Q%t||GnXJj?x3R%rzRJLKV#jD|GM_5f-M&EgRj@36=&?aN%+d9-ICbD)aPp03U7cU_!3C!LGUG!q8^k;T5p+pOSWm#dc^J6pv#RWlm;VnND^+yYoPCYezWP)sD{gy3b_yoH_C; zz3a*TG5h=O1qxLFsVK?hoGpSM3RSo6`+oO(*I&5Z4g-#V^POJ@|NT~j;cw_i?yzJ} zp6nA0hK~#ZLqHfdyw1Oc*9G?5_`1<(2nwf7xrYMAfGJ?OD}IbCpQu+~7YkCE1D1d_ zV4E|3Y(zTz^W$}Mz<$Z_x+UNMv<94jwtx%J9&iIX0wsXXKq;UrPzLBW8V(u)o=b*6 zc|lGk>`n#lR2JMR4OHRDvOqPUC$J2#Jm3Ya2-E;p25JGT0(F4Zf#rb90xJN$ft7$Y zfmMLDEbn^c?JLN;F3^Dc%L9#oD*{b`D+A4ds{+DNLu<>gsF}SP$175BaOmA*@0|-y zIA82}_2B;cqc8Qm_~IQQV-AJ=QaEGZGd^%87~VG|{R*9L_|<>G(Q3?C54`;9H+o*( zm$B{X+4};W4bbh}Va5sd`IBJ)jF@mz2y5<78a~FaPt+?DNIxZn)lo7S3RCPeyjk`J zMrX=6Wtubwj1$OrQkXR14D)6Oe$99$(XuPn`{Auk;U%1cLA4)*GSmL3C=rUy}Uqtv~l( z#(6Rr?w5k+k$X_8LWwr~hxP$P3~9R~YWi>>=9?az8Ju=S8XuM{iwo1kGsBM!f@v88 z7XlG~)RXp<$NH{TMfOI8XwS6y!jZ_4wA~eHdxCrny56VkS?_-jCzl24|4RmK|7&|X zOk3?JjEPjZhMt_V?KPzV?=*(MEz_b}s3y}Bw=p%wUlQV;NvVF&~M zrjre@FlC%HPGV8h5dVGSj0=M*@ZrShW{m4MO69mC(bFMnB}PxG#P@F;I|4%k;epY! z=XOoh^h^GcGwXK@j}G{UL%Y`JZ4FC~C%C&=kt~bSf@xEwZOo;VdHLI7UY9!ND zo2H9e?Q7`SlT>lXlrW7d*We~Ut{P{YM-5Ufelc*zBrT_NdNgA_>pyo6(B78wi)Pxf#If|LvGTJ8LHe{S2fDLYN~U1cWK` zlsIM5&g)5&J~c4NyKI74rmU0Zr{#Nwtq7@n(l9CJ$8=KUSZBaEX_~YU#+edl+9pL^ zP6c?4=M?f94VcpU+=NMUK5sqlM#;Lnljfq5%`+zp@>{HS?@gL83tgzHT&>2PS&0BmQvWYf;M zfC$88rf2mdT8Iw|XQm#L0$W;oe|k#BKRk<0@0&S&M9ps;RU!pSMPrRLY`<6G^k#MUw zSl!vC@+%ttJ4UI2iZx@?I2pvR&GbtcVL0+FzP)73th=PGxJ4K^bQl1IrHz(oX4yPpE`gVQ0a@;X8vH9NVe=zSS~GziS2uVw#LK3P(3HG>KTDm zKOVwnTK|GyI=Aknq2b}6wC>f>P;lgJAgy|pNR!qhXExxyQYTJzYl{kIyZRtbt{Jnd zYhB^4J}FF_Xl-j7IbG1mY}Y!89^k3-bAALw4V(4r(Hw!1si16Vzik!yYGrd@cr_!O z$r7d!P95;c-*}D@>-~Yz@wbQ3=EYWS3vvrR4?xSn)yqHkI->rVFcSdk?73e9k{4 zg`}Nya~D8}kN|d%=d+YWX+fo>=W(CVPv}E{uTjlMxdrslUJ?zXMtfp=X5O7%HM1)2 zi!?=ik-oIe9__(yvHyxaYES1{#jE;D^DhU!@cn%77lS`{|H6%Hva45d^+xtTbe6@c zrgzNjhZZWyNbyC8QI;5|<5^?Ux;w zotIslpq?^Do!A7xj9jvTDZyrN1}hcaP^Oz8U{my)!BN^&M82tLHxKQ)3z(6gg(h3T zs%K#JYih~oEzkBmm2kzOU3qL7yzrS#a>h9r~_N81B`^ZuntoeV#UMGT)AKZE&5UmpSa?Rx{#Y|u@+r%5;MPe zzw5`WC|7%S>`RcZz3}doHEKthrNFC1cV!Q4FHc(awbuVC%Fh-iQa*c3S|)9irn_bO zc>$E-8Q+E^Peqb{eM>rX=C5xJjRZ&59rc4>l|p$Osl$o1j2TWmfD@YV0b>GZ#Rq60 zY)pk!);EZz87e4(s%gyVaW8FiwA(&82JnJGKw03Z8X*l}iwPHTXs3CgfX2ooLkfL) zCk!76f?=kK$vTV~VPh{u7N&ty5?FsJ04``b9^o7$CMD1n=fUhdP6kJU@1B!(O|*v2 z1mDRc*#3dh5wP#!{1r7RZ7d<$QvM_UfONfE@kx%|sJL-7QE~mXx!01y^*7|2Hl?PG z-_t^lWrd7*dUR-{72g4W*dL;n^D1R*xsoTU7q1af8MqoSo}unBNc*NulZO08Hoe4DAWDP= zBVf|6oM*Yv^ZnQ9J6T`uMu$Mv*(meTbxs_Zms6t zUlH%Xs0=QIqG!-6`kV zd2zmPe(YYyXXEqV_}qPO?61cE+4%jwFFZd#{zcDEKls@PsSPhLIA2bQFSFV8D-m_f zQ(TPTl6rAXhhfxd*M>VPw@}KdE({G50BHvq{6ZO{oG2?UT-k)PMc4$dey!|K85<&5 zLd^g~9+j`>pG04*6Rf8%?!Vp~x6bZVZ?MKJA-8HlCTgYKITnY6k@qYJFM zIi`#jTgLQC$EM$<>f7*8$Ok}Gx2@)%xDc&?*vd0~e5O{t$XB=qeHv6`UXSO0o3t%o zU#X7Dq&Z!mMUfJ5J`;uAtBT!rQ2GV{B0f9#r%<+*z5J9)QU=tuTqrmtfRffp*SJuS zPk>sal^DCqs~KUK{I^t-h}951L|tiV;GeiBGM1I2}bl>qNXZtzdHN>kTpx= zl`+1u@poCWJ=BWalC^fH*eB+X8lxT2vFWZEJHG*lQCpO8hp-r(QM0z zgw&W!mvLcC9@py5vtrVVNF~u2XRJ_BoEr{?GIl9AFfN6LMn^KH7dtj(Ea7o>-T2~0 z=?zMLfG(>!l*xJ3;1)h{m z->bW4cVjWj)(*wm5ot*0raGU^^G1Bj&drK*bEGAmn?RT@H@4p%PoB6rb!$ql>r(2v za7ngyE7tBv!$W&%%oKYyu_a}1m+kF}y**`Y=WBx2LGB=&zyzJcz)aw51(GjC4NxtK zgC?kpY*XfGfhpRsqh+bU%4zVpB>tW@Olzn)&}>Mv0~ykF`kpjS3v?|YX2l`K0LXvv zh~|%R_Y?f{V_HCtjN+TJOQ@rPt>s#kl&=0^>&V0ubQ;x#!1^!?Ktrkk2Gm} z9J$-{bUXUGV#>~HgwdpN*Y$y*BUu?*rj+iV8#+H4 z?jMB6Dm>BB9}Y?*e(Ak_Qjz6ZyMlE4QBa7a?q7rIb}mmjm&g0!<8!ac&KAYlk`i0k z>f(DK35*!Ii%pS)W*N7xj8>;KNGVRzEmLqLkP%Kur|8NMT`{xTWt^;f{lT$u|8T}J z><@+cIcFeDXkv&2AaA`@UylzR97~_3j0o(*f9MO2rGcZ3f@wABk)HxA28%N~5bK#f zxnQnJnXA(E%?T+bu7J$UA2nY(74uvezVLFaFE)PlwY1q5?Yi*ISjWZwNPk*%L=HS^ z+?3cYH+CwGo%6wa$M5(3^iN`^qjiyHCWC{9s3yKwcGfG-`o{*FshWX^KkcYXIqKp) zasS*t+2KeK4B{~w{ytON3sYNxM7e=rp2v-B?V1mrYtqMCtL_bd!nbOUrN`nP5N(^&ez`af3`BVEpjGu21@dn`O5O>a(WJBC47k^ zx7P9J92d@_QDaw+r7dN#o(rRq(_cR{mO|0d6*e;-!yx48=P@*TbdW&q_su;JXavzSr89u?#(xED<{wc6H z@Hga=T4j}XU2E5-PS!y>1s{uk=42$C&X(~t*2Q}KzE55lOJiQevQ3++vIz>6v8vo zFaf4XqP*&J8n7^aJWCrUkuF4CMs4ulH-91g-2O$+&)v$d-bl7F7N+c#-b5nKUXgG; zW6K(71=tqa>e4V)v}b4BV5FqrNSKornUdVk(0ga!9vx+i@4)ozWcZXOB{K`l zSRi6XE}DkAkpEk3BPWp4oRt+0m^6iS{rC@^#0W6{E6+1oTpu30)(kOLM||vh*VPTP z8)SEb;%-Q_?ahBI`((>L#j=kW5gV*5nB?ff*@!RQ(3m)u^d)=lHr;wXwlcB~R%kbD zkRv+hoN`H%Qqn|B-3kUSaBDPcRlE}0>9PfDeac#&ZduJl5y$3zcfS$)R^)8DbUE7^ zeHVfk4n+<G?39nR?3m6m7=8Q*oxRK|oW-l&(^*Tu$Wk6raf zE$NcVxNzl6^i15Ju*?l5eLpxGKbx+pjd#pdLXFyT%P!ZnCcEc*<_AANEU(=eZAa~T zrki4x$0kE*Rq?Z5KeiZPjKZtrySinzE>vsf<^t%NQ*$pQeb--3 z_N6xVYCp2=WySV#%Jy>FQ5Nw(_86Ql-ipWN2D2sVxo{xT^LQEQH`@E>qfFM zd>!_ffFkLHU_mPaS^{REI!ngdfAqy;nD_&PJaUK1h2FvyGPfZLf@U~BrVWW)VYy4} z`(wa77b$>mg|-bms}?s0jN09O+Hjf}4R+|UwVtXeRAZqwDyPM`m!-@Y31O+?;2IJK zCwOTEu+WC>N6VO`ur_PE0~L^EDAYfEHk6YUGr5x(3Jwo0B6(`ccIx_mV5OmOe}A5d z$;-SiqY5FC9|9v{uxp=(fF=(y?(^bbI)CiE*YjD)=OvF#bPKD5pJ-EZHiuLitrmtH z!Q2q5G#u&=!+toFw-U&h*@cW7?dU%@8X5{i02RW<@f|#z-=p5*d<*m*+1gK(6}IOR z)t%5U>YgsEjO@E~I4#;9h}8>XwJdrS(VG&zU~mc=#jE|W!W;M8*qab*qd zq1&stSKf9i^^oCj`oB!Vvdu_u?&Q6uU8%HYc(38=M+VxvbH`~l9ZbU2wFNrKkT>1e zuTbJGCJp`S`qff~&z1up8?utp>&!vppM~&VQ?izK(O>79!u&X9Wnwi*rPd6yfaw$S zsq}Yupc7`c>GRd4RmZ(JC$43bDs_En$@S_Y<&0#dEnNFX^GssW3BtLpR%x0p{3(GF z0-FiY+~TX4W`;BapqoRNaABeoB7+^2JHYoagdxp`cvTf5*~ z3+?4YZ=;e=`}Jdct|&fp)m}&S)tP&M&0O%6#w-W+11(RD{#NI)ezsyxeV6f#rYTXI zikuaQAJC1rng@8Ig1fxbPy_N%am1o|#z`<@d~Cu4zzQ%du71K?VEoG%NKR7hD@tu8rx^ikS(n8&*o|6Z>u*Nsi5T+!Le{r7T7E%EJYQR*-T|5vnvF= zJSu7c|LT#O)V~{jXzS;WMxg=i#1xS${eD6SX%TOMLXQBxRpe@bZG#N=ijP7n3_|p! z7DRz)3QLKE*kD`SxeM+pywadL1dt+N=afKqTfH0STIL^~{i;D9iDB!nuGM8&)cjbInLlUSr@dhSb z245Zxrkpgsi@!Vuc#C)-aSi{U~M9ym|cj*+|@>$T3*;G&I%zwr@69`qq(4aGLl^;5SPaTeNn5B`o98#+50RD|e ztsM+)BO?}V3K(9KKsbA738d3`5}{xw>rpmpNrVAAvW!k%qXCWd4{#^r*mHdE3;U1t z?>qdeMCD002#_2lV;_PpB|JJh9LhNS0|Vn{$A{r=moc9T`UC0!QYO-$1L%kFI<8EV zt3!x&!50j%c%g3qVCbq=Vx)T0Ep4|({<%ZhawOG4AGnAdjdjj$ix0%5_`vLLh&)p6 zhID!LY#Z2cv*Sxo)wLm|x-HoyS9d7Y9kOSW;@L!F9>E_UJhhuqkcw_b@B`F7&0d`G z1LvCAo}a}uclG!5vmE!-;mr>!$$@h-pUrzxY5|{ogfG@dyvxn!{lnwI>W4vRzdvx^KQa)U@T&b;aOX36HcRxktR?Ib6K5UqZl$b278`*zE**)U z=j;&pqIlEXdZl)~EUxF5WF4!79ZR}O75+YfzfXW|PP8Z`8dm8q2-Fg&CcrkQYjj2+ zf&b7e01@@?vC%}1LMm{WOjwU9aEK=G94f%RwSo4nLcj~7B6Q*gS_?GXPkw^|0b8OD zr+xs#G!3#xe!%pC0w6X42(FR5Q4^Z>AS7IL=xU;_ zWc|t&3|iyhfDrcNv(ZjV{Zslnpe4tsC8t#^qOCD!pFqyS$1F=g?m*6tTsc}VxiAfL z_Bc&)O?G6NCEqjkmCol*Ur+Sa@n`BQby4eEt?#R6>#3(OkAI9l16ufF^jUsn89}bG z*kW`{30sexFi)U-i3XQ1Ubc)Gw(q8Ej9X+#>bvxFn!ptT#7nlC;chxizFQe938aRg zz_xOn72*=|KBOw7>)j+$A0G$?m>yobizGjxWZ%U41Uu=fjGdVM=Iu76p-XmkE3R(X z*Eq|uNz<|ifb3FqJ`n2`#Jcza$WDd1-eeCDmO?m-K53)2Tww2D5#Ksg`O}t08K^xAjZZD2U#$(m9 z%j5ppRkvRtM!?gR@^qyw?gy3?3zik}!MQU@-`q&jFIzS$mW?UP#;em%>^I+=a zJf0)vD`82B!l0#IfNQovJ+>5OZr0~4H7-e+i!dkBVbYKpEUt+uSfTcvG;5TT;>>oY zauYe-&NWy>@MI}?KJzc)0%NBj;Jkx#_)L!vlHcE1NHHN^>IG@cSLdhWA zaj=~&Qw=YF#-x$pGc^7-4;2uSXwUs6{zKmb05!9GWxVrtV`A{u8M$GT(y&Rc+XPKQ z=w8R?=VP{LRiruXEQyXqrKmLX9Bp^dz5rl8THTkYE>6YzWa~1;ir@eyb5q(~@xWcb z;I2==#W5#K5v&zyn;Uk)j;J|ZSD)~uEZ#^k+H>Jl%=UHKUJ}WgglTrbyy%0AI)9&- z4TUU#ek?KaOT}GILZKE!81uXue|63#m-v(tA7PYQ z1}<FV}Q*`QO%Vld;T8t@*S%s|=9SW_l zp6Jkb&=srJR%6;aZOv1TcMHe{ko;6)_L#O7+R0sk&BI9{YJp}1)*ITK3UwUBPQZ0u z-TFbg6nK4V8H&Pt@aV!`afa1Hs@M8EO`4wi9Fv#p7w|}lmXs+hX-bS*(HaZvCJ&FZ7so+wEF7@6s`+ceuM@c*P9Kvnnr z*6auJ8j`+n`rxz?!`R%sHWVm?;w)(>97>%ip&-{{fk)J{e_)QtUJDiB&7u z3;m>3Ry^(eDj;hX7*ZzTB2s}>hGAG~j(#$MOqq_PKtD2MfX6md67mAc*tiohL^Lib z$jJ{zxVUq_n0Il;35T&DE|E@7`b&D6b_PwDgq)4^Pw5^3+B`%5F8~=xr=gi;w$8vl zF(LX~r1wi_d8~7K=gdyvA9roMJ9$D`2hZC*vU`u>-V<5#u%sz*BKewJ(yf$q!}z!y zLPB*=X`s(58}{N`_U=;vJ^Lc-loJGvUii>-30$wi1{oA&tXCB z@%gj&d+zVKzenjlh--4$A*Jk4WbH$Hb?khyZvGhlr1@h?>$a4AyKLXC*te&Q+l$d7 zEh&Z`C7kGaFSgUD6gzQFq^i-<40<$42rD3-WPX2`sm!=Uk4Ori$I>=WbUn>8vUe0A zeO^BW_y2W!0rO6g`JIRd7J?R&jdT+#Aw376)sSRL3zo0+S4i={QL60#5yO}En)s%8 z)9tE6IN6ijliYLb-J7d!t&(duV*lSP*LG)R#P(f^eb>FQd(u5gvF}M4_wWwYbW+{| z@EowxWw*Xdwb-vPs1HO^M!g;^r?1dz)4)h-_Lo9jtnco9hM4f?eh_sax(Gw?BSF}* z=qSL`mPun6`$z7`i>tCMqjAzwuqzb#`=V^~lvT^j<(ZvuWiIDJ7ttyB&}wUY+L=p9 zR(knX;Frdp5zHWxX{RQLn(eTC6Z6b2&Q&H3Hgjx$%;%RsgN?dv(vkn2Ak0;C1)Hq9fj@F_Z*?qETu%$EkltyB7;^wBQol7@_W)r!r4U#iUuYk=pTS=8PV6? zTg===1o^E}NA?{l_MyNv(Vm$m{tJ+k5mgy)KR7yyg_LL4`pNGq1ml*h8iCOP8sio> z52)3DQW_mU7t$!w-$7$D&Vfz;mj0ZtNz$Tr_K0Fiv zfzuy4>mMHOhe6@VU~5T62xW{%U(DEY-I%G$(L()izsL9|5+R2&<+%j;&fz`?7T)%Z za57^HkM^@VNhI2lzM%Gr5L$;2OeZ+dACmqhu7n7+5u2tS&vS;kL_eNV2I>Bm9w50b z$ft+v4?MIWHk53sRxH(VWBkxpCSgMh40kp_kh2y+J5n`k5ygY7+OB~rS|?YoS1Q-b z&JBta(jkv&=T`*4IlCJY9g4g8fqV0Ud$a8BQrumU1LcXe(@NR1Uh%AtoJyC%hEXY9t(2~VH=1xaFyArX z_j%_%;j`}K8Ko2+aVBdk15sbPqFJd}pLVTOT$I>%C#bkOe{B*fyQ8*8wz8O4vDH4X z`4()xtj^fBMzO6)vCpGw$ii*boeV_Rq)V5@ZAz&xDyE$k*WQP9&VqAI%DE=(SpLAV zVZpIsUX&eOilZyVK52K=jXH!KSa7$c+-)q;`US`OWLS1|Dvr*SqccrKuUW9IVbxom zE^AQAI_8_^$39=Jlx;&L-8;TA8SOiNZ2*9vJqx~aqfTFyqH7*E7)(_Uj4caB3`bU` zl`biRpXRIahS>JVnn-ihlM}QFR}ah{0OPGV>qwsQ64ODyB#>Cc9`#)skJ%Qi-jvk~ zJzd2Ap$X=cmKn>1p|r($dHCWm>aRGOQ!QJRmV;jiO3M+&@d74;^(6tb0T4_(;Mb3G zOCF2-hAVOW)4lWNJ4fywx#zjlD|@#q-tD*{+jl5PXXI#LfOw+ZVZjNbQ+E&>53_3) z)ey#olZHaZ8Qbrl0_|)0CJkDKvw~5hFwz+J!x6xwb<6@O0{I5y+JsDt4hr|7-3s-n zncTm$cez6cu4x$EK^SRphp(AGd1@(|XP#b)K7W)65>8Xf4AUTrde3><6E@@j$tHkn zf*R_>GbNDASD{M&NAT&$hVdv@^@hBT6($ z{|dQeQDEpD|G8GXl%^{W3A7OSp8zuEL1}a(%#6jk$vYuU>E8o{>;Uy|Ww9hjk>$mW zkRBnKg{uAE;e@zMP2FN)R8w}gE6(=E20IyY0AP)|+%Ry7_bIMbk^P9>2cwo|7_>wV zrEMkAcNLqLgxfAy#+cjE7VG7~i-XbQ6dh_7CgU}Vr3U^|=~{Se%{9R)d`+S!;lH&n zRnr<7jJ^isoU&BI_WG$$@vwbkH*LD79#`b5_!JK3&#*9t_r6sn# zrJ7&7?foq>xs#3PNnAABO!Vv#+wj&t!<@)%dn>8ct<>u5w&&iGa&JKtWIqB4`Dae! zN5O}+u+;%A7a3WpyaDN3h+p6=(i~OqZwQdWE92q}4c~yJ|CX-Pp2eA={~l+p4!$$0 zwS6j1}-CP#3_%RZX{#rdqej&D)h`lBPEAmRCKmta_fW5_k|GDM(L1 zupwW1o3EG7m8OhUkDRW*xBJR&*|}VCE+-8xF#@Jm?7-r(2V&EL*o0EFb1XmG*x8Na zux{?V)tU4pJMLD_3wONoicQLjP3$I0wXZ>;_BEi(&8?uz&B$H_V1(S-!p$bA4*?>6 zX1m{X`J(j>;aumr6_W6|xBd1Gh-}QI*Or)bgl* zj4bWZb*qd^*)j1XjY@FAM_oEP9s%`Ye! zfhMZapW_6iKkMfk`)_ZCn?(1m?(4mCz36Vabgfdl_R}}-^xy4|A|efYs{qW{8{!Rf zW<)k9X_ZS_l@cHifeh>bE^vLf=#2JUI`TlQS`guwsQOsJ%__~>6?+Gble6Y20tIsb zAmnCd*2W#OSfz+nDfZDg5NcUlqS@GTF5`4PBF_X2ki?P{)<|)RwF5p~AbI8T$ur0T zeeq}7r5_bRfwE|<+GUNX$4qAbv^@39ZsFq7qY!5BN3}0H5VSuc*8=J{6UBASMkUaRl^gdCFYAXM;fr8 zZT4J*egsH9a-}3%g1B6~&MLfNhS#$1dhqs9_*pIkzp0UBfB%w%E=Bbo}yL3@Kh%{h(v+fwU!eLYB3N~To(`tzY}hO z+8cNfs_jW5t$42JW>HLxTAWs>(S(qmaVZ6|KoM=RVrHpD~FMgReWtl^SM#=I) zWs5IbwqE}eGU8kyp$N=%t@y8XaXw{{#k+-;!CK^yTfC0lDaY=sm2j2sL9n?~KVC7v zBHA$zNPoD6kA{??iNeSYSQGx=bk25pTFh*zVvVZY4d-D0Pu$9s5aZMjS>RbHW}tB* zv{?TZ+Q2vaU(w?Po}b%k$HdYTVQqg`Zi723-woz+?;r_50{=L zg1{wu2bTrV7O(=^12#Yhg1pc>xEz>hPDWknCL2u%wtA1TLdeL3{Tl}mL%3h93(j-Z zf#FHNy8ed{eiR0zhJJNz;GFVg2NQ7T%oOPvjByBGT=?r^5k8=W3qr@og~BBGh2hk9 z@D-x+vV(jXrxVpWsgLUOwTH%i6B2;P>+GKIyw`BA^4?fV+(`Z_@Y!(3!^*0S7~SNi zl-NXG?91RUTNlS4?{c+W$V2UoU*iYS{sAmE_&)33*s!F5Zx*+FQ|rs3#WJ9Zge9XUdB$IbX$M6yyiZ+6G*fh1djb_IB7 zj}N*;$jwh(JQWSkydU49ls3thX2sH+vNWe1B{OSdrxXWvO~uiW=#w2Si2Gqh+>gTi z9^1h9N4RaAZV?a(#umY2oKQqva(7SQy!iMSmp)N1gX7!-6H|5aW6LP;wIo6qT&{^2 zEZw#ML)Sb(MjK83*WzO9H(j&|VHnlYCPm71#yzw5X*HMx!=PQ1I42y+4z_2I{|Q(u z8Z|!d-E>CdD*YD%Y}~Wn$8^qgF#JC4E(LxH(!a&EiBx4DV5%J6KsS~pvVNl;Up)ucQut)z%_v&Rs~r*Ezk)YTTb zv&Wf=9gIuCJsMATO_LM5rpsHOA96wd zxm5J6QUc)U*&6;=xCqqy9enXc%cx)VR^bT#uSf?^L#>c~Jt^wGL4)=-Zeq}u*UueE ziprYp(Or@Kk8IU31XQ1ef~-EVe!dEtaj(*_jhqW%%84j}p4lxjw!)F*amck^lUH^> zs-eJz-b7c@b88EH<|6y?E~`V)AYvKw8OVP1$um`{82P3l4tlAsIWc?@zt8h|~0 z0!H;siGb3w^`1{@*&&NN6>(=O_u-wP?zwncMOb7-Uoi3%8BUJ(FV{|#QUmv8`QR-2bkxf|5ZnlKb4%Yd)niQ6SC91DC=xyjZ_ zhJ*hfO2kmIb{R8`IlHfJ#f8h5L}&2%)-7YLp5ZN;i^LXVE;h6%4Vz(~+;!j!kKA=6 zC9aAbqM5nsq1_cdHuE~YXXr*_JTQ0q`i8j;uoe;T9EO0pwhQ5)fBG#qEI>bK7 z2LJN#7-HAGJU|=qx2kY`ZF?jDWjnlX+Y)_q>txHClw}P`$a#w5^+FxZ5c&U7v|ea! zXbey^a|coz&B*6xME(r3TDu)<=r(NChIfV2D3%}C(lNsZjQLeaZld{|!uDLsViJP5 zR!$PYU&EPCr<=37+;R~%3nTgH{6-4aK+&Au$k<2F6mRVTuKGm9 zSHuzP`q;u&K?h=R>S_(s7)T-WeUfX%Al}TT>*pNqP`7GMUNLS1axp4cA%L6LK>*ZByot=-&aUUG>``|qZk=9^JctLse}31e9zr?l)C4jSi*Hi;rLz6gQ}f|FBX>vSvYkp98LgQSbr@h;Lan_ zL^j(VdslJRUtsSrr0AvoE6ven;Ihj@7l-JztYp0t8BM!O=v6?m9&%ZYdLS$?Q;<-W zIEn}8k71}@gtxtl%nRdoFix4+T+Sh|`Ao!?L}*{4sVPy-3j=!m4P7J9L$$t%lZg8F z*l0AZ$E3*t#FlbXY^f|LvzoR(HmIOnG{I_71uns~izM_}V6mD&2dSXkZh8^w0S&Cy zob&6|BDwPx1J(^(XsB-IsJ9dUCj38n72|c;eA%KG`WFh`bp$*~NOTwvFFG|}p%*dc zMDXOcY9uG%Y0KiazG2!evPV9wpq>p-Lyr30EAk~H7A1Gts%>fX^K!_zH2EAO|H!|YKF^>RsZTp z1hcJEtAJci6FbEYeHr=@FH~eRXK;G8C)L7UT#J#LPwhjjh-{fyA)nAHVaJsChxH+m z%KCo8t3P>9EmFs#o^qn6mYj3HPp|2FbkRPfmx(ShlG?E;Y!~ztZvOV+JQ1&x)&hHS z106yGWaq>}q#tAG7KV`j30JksgD`$itknAUXjEI-u8=RV?DEnO9_Eo)h?^tZ@F`i}sM7%8q=qQ*H*`Ycs> z9iGi8<7`_}wk`3VsE1ozLS3~I7YaU{@}?fl9fa2jCp_5*P#i#?dM|h6*<_AQpf2dZ zJ#6dK_+p+{cZ{t6nI@0h5JmV_{w$+#4fT3Lb@@7CK%lw4+2$MDmAdwY^7ibI9Mg>j zwSv_=KMy1iF7kbsnJ`k4EG_XY6}XP{xoHr{mZ>Z9zsm4Sl~l<;WbFR|)l)b2ds4PN zg=0_Gw4Xob*#9XyTB@dM(R|A3m!GB2=cqH55vazKt)dPKifDH=rzrDz3k8w~Ss23D?zI?A8P{(6xcmC1NEL>nd%pt<)8 zNdJytrA=|RrNp*JW^2@V>BNWc!+mAJT%R)6KP*`ue=Q+> zx?NejLoV5=l}ex!+7Ow!;FmgRxL#(cVct<+3b&r+D%4eX^ujI>(mP76!rkyazleYo%B=dj$;0|P0 zGO_+?!5m%~UDq=_E{#7miUXVeO{i~i6bGlqT~?~c>i9>rLbxjaOM6vJiUnY$-3hB{ zW72qQ%k}=be!04RUPy-IY6NnE8|Z%7zD=>iqBCB=Upd znqo}Yz^quf6LEhSb`Qmb$ZhIEBGiCmczABSz3Uq{`y*NOXw29tP$erYQ$h{~%wsv| zKaT*gM))fq)0F)d+j=u)g~dv%QOciFN$Yt@X`^_#{bKut^_SL1)~D^x2lmX3eig#4tY zAx+XVJ9yb4Ofs}YwKlp4x2w%sukgMS*+RsE=;1NZ#CH&(h>Mq0A1Fqi)E4(l?qyIv zrar0XeUhk5a0@bT^4NR>+}NAvTjpEtzOmrjnq8YZLXj2-Yf>*Y38vd*U|rB8lfFp> zzt}v|H4-@y(qSe#*$Kh1%|_Id1rXqIn>wi4Q~|sdO7~uCv_ni;#;vE+l+IGI*Y9!-hu1!Gh-SaeJ3NH z!U|&ntl%Z})?1(mIX1QBsv>nG?O#yYcd2YTxaY%Tqs`=@Nbp6V(qLZ}ElDlgTJVu= z+Z3o!8z15J%9M9c@kh4rRqT5sO%L4_YKw4kdw;SKfeM;Bm8MSolq0F{EEQe5MaaxQ4(y2VjawxIbsc1_v1}% zJly{-X)Ds_>Sa?`$DQ@k6eOR z7w?HU)9xB5x!;Jdo$t8ckXpSTAK86CaUX~{9*d#?A*>2qM!^T_GzV;Ux~qa}qi}=> z!aTHD1uulLIZ*BuAe&V|XN`a;7%J#)5S)(w4* zy7ue*jkXf1hYISgLMhcP3(7?*O$9EOfLC9sV7XD)%bKP_+e!h7LlrzPI0U%StH4_( zbU!wzpvgjQQ$d+sfE&FEs%-+IU8taPk03xGy*T`eUH`TA?jfUBsG@$&f=;8borWO` TY(}bz3T!0;-0q42TNnQy%dtv} From d92588a7a53d56d860f663786eb603c13e942607 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sun, 24 Mar 2024 19:54:35 +0100 Subject: [PATCH 07/24] CV changes --- .gitignore | 3 - string_indexing/fm_index.py | 152 ++++++++++++----------------- test/test_exact_string_matching.py | 10 +- test/test_fm_index.py | 69 ------------- 4 files changed, 70 insertions(+), 164 deletions(-) delete mode 100644 test/test_fm_index.py diff --git a/.gitignore b/.gitignore index 793ce6e..bfc8703 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,2 @@ backup*/* benchar/cbenchar/build/* -.vscode -*/__pycache__ -*/*/__pycache__ diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 3086396..ce3c6ba 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,118 +1,88 @@ -class FMIndex: - - # all of strings beginns with # (idk why?) - # i sppose that patterns do not starts with # - +class _FMIndex: def __init__ (self, SA, BWT, text, n): self.L = BWT self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) self.n = n self.SA = SA - self.sampleSize = 8 # const for sampling + self.sample_size = 8 # const for sampling #prepare char mapping for F - self.mapperOfChar = { self.F[2] : 0} - self.begginings = [2] + self.mapper_of_chars = { self.F[2] : 0} + self.beginnings = [2] last = self.F[2] - lenOfBeginings = 1 for i in range(3, n+2): if self.F[i] != last: last = self.F[i] - self.begginings.append(i) - self.mapperOfChar[last] = lenOfBeginings - lenOfBeginings += 1 + self.beginnings.append(i) + self.mapper_of_chars[last] = len(self.beginnings) - 1 - self.lenOfAlphabet = len(self.mapperOfChar) + self.len_of_alphabet = len(self.mapper_of_chars) #prepare closest samplings - currentSample = 0 - self.closestSample = [0] + current_sample = 0 + self.closest_sample = [0] for i in range(1, n+2): - if abs(currentSample-i) > abs(currentSample + self.sampleSize-i) and (i + self.sampleSize < self.n): - currentSample += self.sampleSize - self.closestSample.append(currentSample) + if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < self.n): + current_sample += self.sample_size + self.closest_sample.append(current_sample) #Generate values for occ for given samples O(|A|*n) - self.occInSampleForChar = { self.L[i]: [0] for i in range(1, n+2)} - for c in self.mapperOfChar: - currValue = 0 - nextSample = self.sampleSize + self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} + for c in self.mapper_of_chars: + current_value = 0 + next_sample = self.sample_size for i in range(1, n+2): if self.L[i] == c: - currValue += 1 - if i == nextSample: - self.occInSampleForChar[c].append(currValue) - nextSample = nextSample + self.sampleSize - - # should be private - def getRangeOfOccurence(self, p, size): - if size > self.n: - return [-1, -1] - - currChar = p[size-1] - if currChar not in self.mapperOfChar: - return [-1, -1] + current_value += 1 + if i == next_sample: + self.occ_in_sample_for_char[c].append(current_value) + next_sample = next_sample + self.sample_size + +def from_suffix_array_and_bwt (SA, BWT, text, n): + return _FMIndex(SA, BWT, text, n) - mapIdx = self.mapperOfChar[currChar] - l = self.begginings[mapIdx] - r = self.n + 1 - if mapIdx != self.lenOfAlphabet - 1: - r = self.begginings[mapIdx + 1] - 1 - - for i in range(size-2, -1, -1): - currChar = p[i] - if currChar not in self.mapperOfChar: - return [-1, -1] - occurencesBefore = self._getOcc(currChar, l - 1) - occurencesAfter = self._getOcc(currChar, r) - if occurencesBefore == occurencesAfter: - return [-1, -1] - mapIdx = self.mapperOfChar[currChar] - l = self.begginings[mapIdx] + occurencesBefore - r = self.begginings[mapIdx] + occurencesAfter - 1 - if r < l: - return [-1, -1] - return [l, r] +# O(|p|) +def count(fm, p, size): + (low, high) = _get_range_of_occurrences(fm, p, size) + return max(high - low + 1, 0) if low > -1 else 0 - # O(|p|) - def count(self, p, size): - ran = self.getRangeOfOccurence(p, size) - if ran[0] == -1: - return 0 - return max(ran[1] - ran[0] + 1, 0) +# O(|p| + k) where k is the number or occurances of p in text +def contains(fm, p, l): + (low, high) = _get_range_of_occurrences(fm, p, l) + yield from sorted([fm.SA[i-1] for i in range(low, high + 1) if low > -1]) - #Should be private - def _getOcc(self, c, i): - closestSample = self.closestSample[i] - toAdd = 0 - if closestSample < i: - for j in range(closestSample + 1, i + 1): - if self.L[j] == c: - toAdd += 1 - elif closestSample > i: - for j in range(i+1, closestSample + 1): - if self.L[j] == c: - toAdd -= 1 - - return self.occInSampleForChar[c][(closestSample)//self.sampleSize] + toAdd - - #O(|p|) - def query(self, p, l): - return self.count(p, l) > 0 +def _get_occ(fm, c, i): + if fm.closest_sample[i] < i: + to_add = sum(1 for c_prim in fm.L[fm.closest_sample[i] + 1:i + 1] if c_prim == c) + else: + to_add = sum(-1 for c_prim in fm.L[i + 1:fm.closest_sample[i] + 1] if c_prim == c) + return fm.occ_in_sample_for_char[c][fm.closest_sample[i] // fm.sample_size] + to_add - # O(|p| + k) where k is the number or occurances of p in text - def get_all_occurrance(self, p, l): - arr = self.getRangeOfOccurence(p, l) - if arr[0] == -1: - return [] - return [self.SA[i-1] for i in range(arr[0], arr[1] + 1)] +def _get_range_of_occurrences(fm, p, size): + if size > fm.n or size == 0: + return (-1, -1) + + if p[-1] not in fm.mapper_of_chars: + return (-1, -1) - # O(|p|) - def get_any_occurrance(self, p, l): - arr = self.getRangeOfOccurence(p, l) - if arr[0] == -1: - return -1 - return self.SA[arr[0]-1] + map_idx = fm.mapper_of_chars[p[-1]] + l = fm.beginnings[map_idx] + r = fm.n + 1 + if map_idx != fm.len_of_alphabet - 1: + r = fm.beginnings[map_idx + 1] - 1 + for i in range(size-1, 0, -1): + if p[i] not in fm.mapper_of_chars: + return (-1, -1) + occurencesBefore = _get_occ(fm, p[i], l - 1) + occurencesAfter = _get_occ(fm, p[i], r) + if occurencesBefore == occurencesAfter: + return (-1, -1) + map_idx = fm.mapper_of_chars[p[i]] + l = fm.beginnings[map_idx] + occurencesBefore + r = fm.beginnings[map_idx] + occurencesAfter - 1 + if r < l: + return (-1, -1) + return (l, r) \ No newline at end of file diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index b92cef8..a509e5f 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -6,13 +6,20 @@ from generator import rand from exact_string_matching import forward, backward, other -from string_indexing import lcp, suffix_tree, suffix_array +from string_indexing import lcp, suffix_tree, suffix_array, fm_index +from compression import burrows_wheeler def lcp_lr_contains(t, w, n, m): SA = suffix_array.skew(t, n) LCP_LR = lcp.build_lcp_lr(lcp.kasai(SA, t, n), n) return lcp.contains(SA, LCP_LR, t, w, n, m) +def fm_index_contains(t, w, n, m): + SA = suffix_array.skew(t, n) + BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) + fm = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n) + return fm_index.contains(fm, w, m) + EXACT_STRING_MATCHING_ALGORITHMS = [ [ 'Morris-Pratt', forward.morris_pratt ], [ 'Knuth-Morris-Pratt', forward.knuth_morris_pratt ], @@ -45,6 +52,7 @@ def lcp_lr_contains(t, w, n, m): suffix_array.prefix_doubling(t, n), t, w, n, m), ], [ 'lcp-lr array', lcp_lr_contains ], + [ 'Fm index', fm_index_contains] ] class TestExactStringMatching(unittest.TestCase): diff --git a/test/test_fm_index.py b/test/test_fm_index.py deleted file mode 100644 index c04f4f2..0000000 --- a/test/test_fm_index.py +++ /dev/null @@ -1,69 +0,0 @@ -import itertools -import os -import unittest - -from compression import burrows_wheeler -from string_indexing import suffix_array -from string_indexing import fm_index -from generator import rand - -class TestFMIndex(unittest.TestCase): - run_large = unittest.skipUnless( - os.environ.get('LARGE', False), 'Skip test in small runs') - - def get_all_occurences_of_pattern_naive(self, text, n, pattern, l): - result = [] - for i in range(1, n-l + 2): - occurs = True - for j in range(0, l): - if text[i+j] != pattern[j]: - occurs = False - break - if occurs: - result.append(i) - return result - - - def check_fm_api_for_pattern(self, FMIndex, all_occurences_of_pattern, pattern, l): - cnt = FMIndex.count(pattern, l) - occurance = FMIndex.get_all_occurrance(pattern, l) - any_occurance = FMIndex.get_any_occurrance(pattern, l) - exists = FMIndex.query(pattern, l) - self.assertEqual(cnt, len(all_occurences_of_pattern)) - self.assertEqual(sorted(occurance), sorted(all_occurences_of_pattern)) - self.assertTrue((any_occurance in all_occurences_of_pattern) or (any_occurance == -1 and len(all_occurences_of_pattern) == 0)) - self.assertTrue(exists == (len(all_occurences_of_pattern) > 0)) - - - def check_patterns_for_text_naive(self, text, n, patterns): - SA = suffix_array.naive(text, n) - BWT = burrows_wheeler.transform_from_suffix_array(SA, text, n) - FMIndex = fm_index.FMIndex(SA, BWT, text, n) - for pattern in patterns: - l = len(pattern) - pattern_occurances = self.get_all_occurences_of_pattern_naive(text, n, pattern, l) - self.check_fm_api_for_pattern(FMIndex, pattern_occurances, pattern, l) - - - api_naive_test_cases = [ - ['#ababa', ['a', 'a', 'aba', 'aa', 'ba', 'ab', 'bb', 'c', 'abc', 'ababa', 'ababaa']], - ['#aaababcaaabba', ['a', 'b', 'c', 'aab', 'aabb', 'aaababcaaabba']], - ['#aaabaababaababaababaaababaaabaabaaa', ['a', 'ab', 'aab', 'aaab', 'aaaab', 'aba', 'abaa', - 'abaaa', 'aaba', 'aabaa', 'aabaaa', 'aaaba', 'aaabaa']] - ] - - def test_fm_api_naive(self): - for test_case in self.api_naive_test_cases: - n = len(test_case[0]) - 1 - self.check_patterns_for_text_naive(test_case[0], n, test_case[1]) - - - @run_large - def test_large_random(self): - n = 10000 - text = '#' + rand.random_word(n, ['a', 'b']) - q = 1000 - patterns = [rand.random_word(100, ['a', 'b']) for i in range(q)] - self.check_patterns_for_text_naive(text, n, patterns) - - From acb7f7f6041950c736c6a41c60b066aac7d01847 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sun, 24 Mar 2024 20:03:36 +0100 Subject: [PATCH 08/24] Small naming changes --- string_indexing/fm_index.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index ce3c6ba..b85f867 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -76,13 +76,13 @@ def _get_range_of_occurrences(fm, p, size): for i in range(size-1, 0, -1): if p[i] not in fm.mapper_of_chars: return (-1, -1) - occurencesBefore = _get_occ(fm, p[i], l - 1) - occurencesAfter = _get_occ(fm, p[i], r) - if occurencesBefore == occurencesAfter: + occurrences_before = _get_occ(fm, p[i], l - 1) + occurrences_after = _get_occ(fm, p[i], r) + if occurrences_before == occurrences_after: return (-1, -1) map_idx = fm.mapper_of_chars[p[i]] - l = fm.beginnings[map_idx] + occurencesBefore - r = fm.beginnings[map_idx] + occurencesAfter - 1 + l = fm.beginnings[map_idx] + occurrences_before + r = fm.beginnings[map_idx] + occurrences_after - 1 if r < l: return (-1, -1) return (l, r) \ No newline at end of file From af7a06f95537fdc6f9a5b762ee84fdbc0878bab1 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sun, 24 Mar 2024 20:40:01 +0100 Subject: [PATCH 09/24] whitespace fix --- string_indexing/fm_index.py | 138 ++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index b85f867..60921d3 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,88 +1,88 @@ class _FMIndex: - def __init__ (self, SA, BWT, text, n): - self.L = BWT - self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) - self.n = n - self.SA = SA - self.sample_size = 8 # const for sampling + def __init__ (self, SA, BWT, text, n): + self.L = BWT + self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) + self.n = n + self.SA = SA + self.sample_size = 8 # const for sampling - #prepare char mapping for F - self.mapper_of_chars = { self.F[2] : 0} - self.beginnings = [2] - last = self.F[2] - for i in range(3, n+2): - if self.F[i] != last: - last = self.F[i] - self.beginnings.append(i) - self.mapper_of_chars[last] = len(self.beginnings) - 1 + #prepare char mapping for F + self.mapper_of_chars = { self.F[2] : 0} + self.beginnings = [2] + last = self.F[2] + for i in range(3, n+2): + if self.F[i] != last: + last = self.F[i] + self.beginnings.append(i) + self.mapper_of_chars[last] = len(self.beginnings) - 1 - self.len_of_alphabet = len(self.mapper_of_chars) - - #prepare closest samplings - current_sample = 0 - self.closest_sample = [0] + self.len_of_alphabet = len(self.mapper_of_chars) + + #prepare closest samplings + current_sample = 0 + self.closest_sample = [0] + for i in range(1, n+2): + if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < self.n): + current_sample += self.sample_size + self.closest_sample.append(current_sample) + + #Generate values for occ for given samples O(|A|*n) + self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} + for c in self.mapper_of_chars: + current_value = 0 + next_sample = self.sample_size for i in range(1, n+2): - if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < self.n): - current_sample += self.sample_size - self.closest_sample.append(current_sample) + if self.L[i] == c: + current_value += 1 + if i == next_sample: + self.occ_in_sample_for_char[c].append(current_value) + next_sample = next_sample + self.sample_size - #Generate values for occ for given samples O(|A|*n) - self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} - for c in self.mapper_of_chars: - current_value = 0 - next_sample = self.sample_size - for i in range(1, n+2): - if self.L[i] == c: - current_value += 1 - if i == next_sample: - self.occ_in_sample_for_char[c].append(current_value) - next_sample = next_sample + self.sample_size - def from_suffix_array_and_bwt (SA, BWT, text, n): - return _FMIndex(SA, BWT, text, n) + return _FMIndex(SA, BWT, text, n) # O(|p|) def count(fm, p, size): - (low, high) = _get_range_of_occurrences(fm, p, size) - return max(high - low + 1, 0) if low > -1 else 0 + (low, high) = _get_range_of_occurrences(fm, p, size) + return max(high - low + 1, 0) if low > -1 else 0 # O(|p| + k) where k is the number or occurances of p in text def contains(fm, p, l): - (low, high) = _get_range_of_occurrences(fm, p, l) - yield from sorted([fm.SA[i-1] for i in range(low, high + 1) if low > -1]) + (low, high) = _get_range_of_occurrences(fm, p, l) + yield from sorted([fm.SA[i-1] for i in range(low, high + 1) if low > -1]) def _get_occ(fm, c, i): - if fm.closest_sample[i] < i: - to_add = sum(1 for c_prim in fm.L[fm.closest_sample[i] + 1:i + 1] if c_prim == c) - else: - to_add = sum(-1 for c_prim in fm.L[i + 1:fm.closest_sample[i] + 1] if c_prim == c) - return fm.occ_in_sample_for_char[c][fm.closest_sample[i] // fm.sample_size] + to_add - + if fm.closest_sample[i] < i: + to_add = sum(1 for c_prim in fm.L[fm.closest_sample[i] + 1:i + 1] if c_prim == c) + else: + to_add = sum(-1 for c_prim in fm.L[i + 1:fm.closest_sample[i] + 1] if c_prim == c) + return fm.occ_in_sample_for_char[c][fm.closest_sample[i] // fm.sample_size] + to_add + def _get_range_of_occurrences(fm, p, size): - if size > fm.n or size == 0: - return (-1, -1) + if size > fm.n or size == 0: + return (-1, -1) - if p[-1] not in fm.mapper_of_chars: - return (-1, -1) + if p[-1] not in fm.mapper_of_chars: + return (-1, -1) - map_idx = fm.mapper_of_chars[p[-1]] - l = fm.beginnings[map_idx] - r = fm.n + 1 - if map_idx != fm.len_of_alphabet - 1: - r = fm.beginnings[map_idx + 1] - 1 - - for i in range(size-1, 0, -1): - if p[i] not in fm.mapper_of_chars: - return (-1, -1) - occurrences_before = _get_occ(fm, p[i], l - 1) - occurrences_after = _get_occ(fm, p[i], r) - if occurrences_before == occurrences_after: - return (-1, -1) - map_idx = fm.mapper_of_chars[p[i]] - l = fm.beginnings[map_idx] + occurrences_before - r = fm.beginnings[map_idx] + occurrences_after - 1 - if r < l: - return (-1, -1) - return (l, r) \ No newline at end of file + map_idx = fm.mapper_of_chars[p[-1]] + l = fm.beginnings[map_idx] + r = fm.n + 1 + if map_idx != fm.len_of_alphabet - 1: + r = fm.beginnings[map_idx + 1] - 1 + + for i in range(size-1, 0, -1): + if p[i] not in fm.mapper_of_chars: + return (-1, -1) + occurrences_before = _get_occ(fm, p[i], l - 1) + occurrences_after = _get_occ(fm, p[i], r) + if occurrences_before == occurrences_after: + return (-1, -1) + map_idx = fm.mapper_of_chars[p[i]] + l = fm.beginnings[map_idx] + occurrences_before + r = fm.beginnings[map_idx] + occurrences_after - 1 + if r < l: + return (-1, -1) + return (l, r) From f0a12561018b0d65c1bcafaf35aa55c312c8d3ac Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 7 Jun 2024 17:59:19 +0200 Subject: [PATCH 10/24] Added wavelet_tree.py Added lz_index.py Added test/test_wavelet_tree.py Tested FmIndex and Wavelet tree LzIndex has to be debugged yet --- common/wavelet_tree.py | 106 +++++++++++ string_indexing/fm_index.py | 74 ++++---- string_indexing/lz_index.py | 282 +++++++++++++++++++++++++++++ test/test_exact_string_matching.py | 11 +- test/test_wavelet_tree.py | 258 ++++++++++++++++++++++++++ 5 files changed, 696 insertions(+), 35 deletions(-) create mode 100644 common/wavelet_tree.py create mode 100644 string_indexing/lz_index.py create mode 100644 test/test_wavelet_tree.py diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py new file mode 100644 index 0000000..9eceebb --- /dev/null +++ b/common/wavelet_tree.py @@ -0,0 +1,106 @@ + +class wavelet_tree: + def __init__(self, t, n, sorted_alphabet_list = None): + self.t = t + t = t[1:] + if sorted_alphabet_list is not None: + self.alphabet = sorted_alphabet_list + else: + self.alphabet = set(t) + sorted_alphabet_list = sorted(list(self.alphabet)) + self.n = n + self.smallest = sorted_alphabet_list[0] + self.biggest = sorted_alphabet_list[-1] + if len(sorted_alphabet_list) == 1: + self.leaf = True + return + self.leaf = False + left_alphabet = sorted_alphabet_list[:(len(sorted_alphabet_list) + 1)//2] + right_alphabet = sorted_alphabet_list[(len(sorted_alphabet_list) + 1)//2:] + self.zero_indexed = set(left_alphabet) + self.one_indexed = set(right_alphabet) + value_arr = [1 if c in self.one_indexed else 0 for c in t ] + self.prefix_sum = [0] + for i in range(n): + self.prefix_sum.append(self.prefix_sum[i] + value_arr[i]) + self.left_indexes = [0] + self.rigth_indexes = [0] + for i in range(n): + self.left_indexes.append(i+1) if t[i] in self.zero_indexed else self.rigth_indexes.append(i+1) + left_node_text = '#' + ''.join(c for c in t if c in self.zero_indexed) + rigth_node_text = '#' + ''.join(c for c in t if c in self.one_indexed) + self.left = wavelet_tree(left_node_text, len(left_node_text) - 1, left_alphabet) + self.right = wavelet_tree(rigth_node_text, len(rigth_node_text) - 1, right_alphabet) + + def _left_tree_range(self, l, r): + return l - self.prefix_sum[l-1], r - self.prefix_sum[r] + + def _right_tree_range(self, l, r): + return self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0), self.prefix_sum[r] + + def rank(self, c, l, r): + if c not in self.alphabet or l > r or l > self.n or r < 1: + return 0 + if self.leaf: + return r-l+1 + if c in self.zero_indexed: + new_l, new_r = self._left_tree_range(l, r) + return self.left.rank(c, new_l, new_r) + new_l, new_r = self._right_tree_range(l, r) + return self.right.rank(c, new_l, new_r) + + def preifx_rank(self, c, r): + return self.rank(c, 1, r) + + def select(self, c, k, l, r): + if c not in self.alphabet or l > r or l > self.n or r < 1 : + return None + if self.leaf: + return k+l-1 if k <= r-l+1 else None + if c in self.zero_indexed: + new_l, new_r = self._left_tree_range(l, r) + recursion_result = self.left.select(c, k, new_l, new_r) + return self.left_indexes[recursion_result] if recursion_result is not None else None + new_l, new_r = self._right_tree_range(l, r) + recursion_result = self.right.select(c, k, new_l, new_r) + return self.rigth_indexes[recursion_result] if recursion_result is not None else None + + def quantile(self, k, l, r): + if k < 1 or k > r-l+1: + return None + if self.leaf: + return self.smallest if k <= self.n else None + left_num = self.prefix_sum[r] - self.prefix_sum[l-1] + if r-l+1-left_num >= k: + new_l, new_r = self._left_tree_range(l, r) + return self.left.quantile(k, new_l, new_r) + new_l, new_r = self._right_tree_range(l, r) + return self.right.quantile(k-r+l-1+left_num, new_l, new_r) + + def _does_one_range_end_in_another(self, l, r, i, j): + return (i <= l and j >= l) or (i <= r and j >= r) + + def _ranges_intersect(self, l, r, i, j): + return self._does_one_range_end_in_another(l, r, i ,j) or \ + self._does_one_range_end_in_another(i, j, l, r) + + def range_count(self, l, r, x, y): + if l > r or l > self.n or l < 1 or x > y: + return 0 + if y < self.smallest or x > self.biggest: + return 0 + if x <= self.smallest and self.biggest <= y: + return r-l+1 + if self.leaf: + return 0 + if self._ranges_intersect(self.left.smallest, self.left.biggest, x, y) and \ + self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): + new_left_l, new_left_r = self._left_tree_range(l, r) + new_right_l, new_right_r = self._right_tree_range(l, r) + return self.left.range_count(new_left_l, new_left_r, x, y) \ + + self.right.range_count(new_right_l, new_right_r, x, y) + if self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): + new_l, new_r = self._right_tree_range(l, r) + return self.right.range_count(new_l, new_r, x, y) + new_l, new_r = self._left_tree_range(l, r) + return self.left.range_count(new_l, new_r, x, y) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 60921d3..b47ecce 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,46 +1,60 @@ -class _FMIndex: - def __init__ (self, SA, BWT, text, n): - self.L = BWT - self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) - self.n = n - self.SA = SA +class _RankSearcher: + def __init__(self, L, mapper_of_chars, n): + self.sample_size = 8 # const for sampling - - #prepare char mapping for F - self.mapper_of_chars = { self.F[2] : 0} - self.beginnings = [2] - last = self.F[2] - for i in range(3, n+2): - if self.F[i] != last: - last = self.F[i] - self.beginnings.append(i) - self.mapper_of_chars[last] = len(self.beginnings) - 1 - - self.len_of_alphabet = len(self.mapper_of_chars) - + self.L = L + #prepare closest samplings current_sample = 0 self.closest_sample = [0] for i in range(1, n+2): - if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < self.n): + if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < n): current_sample += self.sample_size self.closest_sample.append(current_sample) #Generate values for occ for given samples O(|A|*n) self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} - for c in self.mapper_of_chars: + for c in mapper_of_chars: current_value = 0 next_sample = self.sample_size for i in range(1, n+2): - if self.L[i] == c: + if L[i] == c: current_value += 1 if i == next_sample: self.occ_in_sample_for_char[c].append(current_value) next_sample = next_sample + self.sample_size -def from_suffix_array_and_bwt (SA, BWT, text, n): - return _FMIndex(SA, BWT, text, n) + def prefix_rank(self, c, i): + if self.closest_sample[i] < i: + to_add = sum(1 for c_prim in self.L[self.closest_sample[i] + 1:i + 1] if c_prim == c) + else: + to_add = sum(-1 for c_prim in self.L[i + 1:self.closest_sample[i] + 1] if c_prim == c) + return self.occ_in_sample_for_char[c][self.closest_sample[i] // self.sample_size] + to_add + + +class _FMIndex: + def __init__ (self, SA, BWT, text, n, rank_searcher = None): + self.L = BWT + self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) + self.n = n + self.SA = SA + + #prepare char mapping for F + self.mapper_of_chars = { self.F[2] : 0} + self.beginnings = [2] + last = self.F[2] + for i in range(3, n+2): + if self.F[i] != last: + last = self.F[i] + self.beginnings.append(i) + self.mapper_of_chars[last] = len(self.beginnings) - 1 + + self.len_of_alphabet = len(self.mapper_of_chars) + self.rank_searcher = _RankSearcher(self.L, self.mapper_of_chars, n) if rank_searcher is None else rank_searcher + +def from_suffix_array_and_bwt(SA, BWT, text, n, rank_searcher = None): + return _FMIndex(SA, BWT, text, n, rank_searcher) # O(|p|) def count(fm, p, size): @@ -52,14 +66,6 @@ def contains(fm, p, l): (low, high) = _get_range_of_occurrences(fm, p, l) yield from sorted([fm.SA[i-1] for i in range(low, high + 1) if low > -1]) - -def _get_occ(fm, c, i): - if fm.closest_sample[i] < i: - to_add = sum(1 for c_prim in fm.L[fm.closest_sample[i] + 1:i + 1] if c_prim == c) - else: - to_add = sum(-1 for c_prim in fm.L[i + 1:fm.closest_sample[i] + 1] if c_prim == c) - return fm.occ_in_sample_for_char[c][fm.closest_sample[i] // fm.sample_size] + to_add - def _get_range_of_occurrences(fm, p, size): if size > fm.n or size == 0: return (-1, -1) @@ -76,8 +82,8 @@ def _get_range_of_occurrences(fm, p, size): for i in range(size-1, 0, -1): if p[i] not in fm.mapper_of_chars: return (-1, -1) - occurrences_before = _get_occ(fm, p[i], l - 1) - occurrences_after = _get_occ(fm, p[i], r) + occurrences_before = fm.rank_searcher.prefix_rank(p[i], l - 1) + occurrences_after = fm.rank_searcher.prefix_rank(p[i], r) if occurrences_before == occurrences_after: return (-1, -1) map_idx = fm.mapper_of_chars[p[i]] diff --git a/string_indexing/lz_index.py b/string_indexing/lz_index.py new file mode 100644 index 0000000..1128c7d --- /dev/null +++ b/string_indexing/lz_index.py @@ -0,0 +1,282 @@ +#also a bug here +"""def naive_lz78_compress(text): + text = text[1:] + text = text + '$' #for last block uniqness + dictionary = {} + compressed = [] + phrase = '' + + for char in text: + next_phrase = phrase + char + exists = dictionary.get(next_phrase) + if exists is None: + if phrase == '': + compressed.append((0, next_phrase[-1])) + dictionary[next_phrase] = 1 + phrase = '' + else: + index = dictionary.get(phrase) + compressed.append((index, next_phrase[-1])) + dictionary[next_phrase] = index + 1 + phrase = '' + else: + phrase = next_phrase + + return compressed""" + +class _LZTreeNode: + def __init__(self, parent, character, id, position): + self.parent = parent + self.position = position + if parent is not None: + parent.children[character] = self + self.depth = parent.depth + 1 + else: + self.depth = 0 + self.id = id + self.left_rank = self + self.right_rank = self + self.children = {} + self.character = character + + def set_ranks(self, rank): + if self.id is not None: + self.rank = rank + rank = rank + 1 + else: + self.rank = None + if len(self.children) > 0: + for child_key in sorted(self.children): + rank = self.children[child_key].set_ranks(rank) + min_key = min(self.children) + max_key = max(self.children) + self.left_rank = self.children[min_key].left_rank \ + if self.rank is None or self.children[min_key].left_rank.rank < self.rank \ + else self + self.right_rank = self.children[max_key].right_rank \ + if self.rank is None or self.children[max_key].right_rank.rank < self.rank \ + else self + return rank + + #left rank seems to be useless + def print_node(self) -> None: + if self.parent is not None: + print("(",self.id, ", ", self.parent.id, ", ", self.character, ", ", self.rank, ", ", self.left_rank.rank, ", ", self.right_rank.rank, "), ") + for child in self.children.values(): + child.print_node() + + def get_left_rank(self) -> int: + return self.left_rank.rank + + def get_right_rank(self) -> int: + return self.right_rank.rank + + def get_id(self) -> int: + return self.id + + def get_children(self) -> dict: + return self.children + + + +class LZTrieBase: + + def __init__(self) -> None: + pass + + #normal string with hash at the begining + def search(self, t, n): + return self._search_internal(t, n, self.root) + + def _search_internal(self, t, idx, node : _LZTreeNode): + if idx == 0: + return node + if t[idx] not in node.children: + return None + return self._search_internal(t, idx - 1, node.children[t[idx]]) + + def get_size(self) -> int: + return self.size + + def debug(self) -> None: + self.root.print_node() + + + +class LZTrie(LZTrieBase): + def __init__(self, t : str, n : int): + t += '$' #guaranting unique last node + self.root = _LZTreeNode(None, '#', 0, None) + current_node = self.root + id = 1 + for i in range(1, n+1): + current_char = t[i] + if current_char not in current_node.children: + _LZTreeNode(current_node, current_char, id, i) + id += 1 + current_node = self.root + else: + current_node = current_node.children[current_char] + self.size = id + self.root.set_ranks(0) + + +class NodeMapper: + def __init__(self, lz_trie, size): + self.arr = [None] * size + self._map_tree_to_list(lz_trie.root) + + def _map_tree_to_list(self, node) -> None: + if node.id is not None: + self.arr[node.id] = node + for child in node.children.values(): + self._map_tree_to_list(child) + + def get_node_by_id(self, id : int) -> _LZTreeNode: + return self.arr[id] + +class RangeSearcher: + def __init__(self, points): + self.points = points + + def search_in_range(self, l1, r1, l2, r2): + result = [] + for (x, y) in self.points: + if l1 <= x and x <= l2 and r1 <= y and y <= r2: + result.append((x, y)) + return result + + +class RevLZTrie(LZTrieBase): + + def __init__(self, lz_trie : LZTrie): + self.root = _LZTreeNode(None, '#', 0) + self._add_recursive(lz_trie.root) + self.root.set_ranks(0) + + def _add_recursive(self, node): + for child in node.get_children().values(): + self._add_recursive(child) + self._add_block(child, self.root, child.id) + + def _add_block(self, lz_node : _LZTreeNode, rev_node : _LZTreeNode, id : int) -> None: + if lz_node.parent is None or lz_node.parent.character is '#': + if lz_node.character in rev_node.get_children(): + rev_node.children[lz_node.character].id = id + else: + rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, id, None) + else: + if lz_node.character not in rev_node.get_children(): + rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, None, None) + self._add_block(lz_node.parent, rev_node.children[lz_node.character], id) + + + +class LZIndex: + def __init__(self, lz_trie : LZTrie, rev_lz_trie : RevLZTrie, node_mapper : NodeMapper, range_searcher : RangeSearcher): + self.lz_trie = lz_trie + self.rev_lz_trie = rev_lz_trie + self.node_mapper = node_mapper + self.range_searcher = range_searcher + +def create_lz_index(t, n): + lz_trie = LZTrie(t, n) + rev_trie = RevLZTrie(lz_trie) + rev_trie.debug() + lz_node_mapper = NodeMapper(lz_trie, lz_trie.get_size()) + rev_node_mapper = NodeMapper(rev_trie, lz_trie.get_size()) + + for node in lz_node_mapper.arr: + if node.parent is not None: + print((node.parent.id), node.character) + points = [] + for i in range(1, lz_trie.get_size() - 1): + points.append((rev_node_mapper.get_node_by_id(i).rank, lz_node_mapper.get_node_by_id(i+1).rank)) + range_searcher = RangeSearcher(points) + return LZIndex(lz_trie, rev_trie, rev_node_mapper, range_searcher) + +def lz_index_search(lz_index : LZIndex, s, m): + + # single block case + v = '#' + (s[::-1])[:-1] + root = lz_index.rev_lz_trie.search(v, m) + result = [] + for i in range(root.get_left_rank(), root.get_right_rank() + 1): + node = lz_index.node_mapper.get_node_by_id(i) + result.append((node.id, m + root.depth - node.depth)) + print("case 1", (node.id, m + root.depth - node.depth)) + + # two block case + for i in range(1, m): + rev_prefix = '#' + (s[::-1])[m-i:m] + sufix = '#' + s[i+1:] + rev_node = lz_index.rev_lz_trie.search(rev_prefix, i) + node = lz_index.lz_trie.search(sufix, m-i) + for (x, y) in lz_index.range_searcher.search_in_range(rev_node.get_left_rank(), rev_node.get_right_rank(), node.get_left_rank(), node.get_right_rank()): + result.append((x, i)) + print("case 2", (x, i)) + + # other case + used = [[False]*(m+1)]*(m+1) + existance = [[None]*(m+1)]*(m+1) + arr = [{}] + for i in range(1, m+1): + recorded = {} + current_node = lz_index.lz_trie.root + for j in range(i, m+1): + if current_node is not None and s[i] not in current_node.children: + current_node = None + elif current_node is not None: + current_node = current_node.children[s[j]] + existance[i][j] = current_node + if current_node is not None: + recorded[current_node.id] = j + arr.append(recorded) + + for i in range(1, m+1): + for j in range(i, m+1): + if existance[i][j] is None or used[i][j] == True: + continue + start_id = existance[i][j].id + current_id = start_id - 1 + current_end = j - 1 + while current_end < m and (current_id + 1) in arr[current_end+1]: + current_id = current_id + 1 + used[current_end + 1][arr[current_end + 1][current_id]] = True + current_end = arr[current_end + 1][current_id] + size = current_id - start_id + 1 + if i > 1: + size = size + 1 + if current_end < m: + size = size + 1 + if size < 3 or (current_end != m and existance[current_end+1][m] is None): + continue + if current_end == m or (existance[current_end+1][m].get_left_rank() <= lz_index.node_mapper.get_node_by_id(current_id + 1) and lz_index.node_mapper.get_node_by_id(current_id + 1) <= existance[current_end+1][m].get_left_rank()): + if start_id == 1 and i == 1: + result.append(start_id, i) + continue + elif start_id == 1: + continue + current_node = lz_index.node_mapper.get_node_by_id(start_id - 1) + prev = i - 1 + while prev > 0 and current_node.parent != None and s[prev] in current_node.parent.children and current_node.parent.children[s[prev]] == current_node: + prev = prev - 1 + current_node = current_node.parent + if prev == 0: + result.append(start_id - 1, i - 1) + return result + + + +t = '#aabbababa' +#LZIndex(t) #blocks are not unique in default LZ78 compress (I think it's a bug) +#LZIndex('#aabbababa') #bugged string +#trie = LZTrie(t, len(t)-1) +#trie.debug() + +lz_index = create_lz_index(t, len(t) - 1) + +print(lz_index_search(lz_index, '#aba', 3)) + + + diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index a509e5f..69bd30c 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -8,12 +8,20 @@ from exact_string_matching import forward, backward, other from string_indexing import lcp, suffix_tree, suffix_array, fm_index from compression import burrows_wheeler +from common import wavelet_tree def lcp_lr_contains(t, w, n, m): SA = suffix_array.skew(t, n) LCP_LR = lcp.build_lcp_lr(lcp.kasai(SA, t, n), n) return lcp.contains(SA, LCP_LR, t, w, n, m) +def fm_index_wavelet_contains(t, w, n, m): + SA = suffix_array.skew(t, n) + BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) + rank_searcher = wavelet_tree.wavelet_tree(t, n) + fm = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, rank_searcher) + return fm_index.contains(fm, w, m) + def fm_index_contains(t, w, n, m): SA = suffix_array.skew(t, n) BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) @@ -52,7 +60,8 @@ def fm_index_contains(t, w, n, m): suffix_array.prefix_doubling(t, n), t, w, n, m), ], [ 'lcp-lr array', lcp_lr_contains ], - [ 'Fm index', fm_index_contains] + [ 'fm index', fm_index_contains], + [ 'fm index with wavelet tree', fm_index_contains] ] class TestExactStringMatching(unittest.TestCase): diff --git a/test/test_wavelet_tree.py b/test/test_wavelet_tree.py new file mode 100644 index 0000000..3c46d4f --- /dev/null +++ b/test/test_wavelet_tree.py @@ -0,0 +1,258 @@ +import os +import unittest + +from common import wavelet_tree +from generator import rand + +class dummy_solver: + def __init__(self, t, n): + self.t = t + self.n = n + + def rank(self, c, l ,r): + if l > r or l > self.n or l < 1: + return 0 + return sum([1 if self.t[i] == c else 0 for i in range(l, r+1)]) + + def preifx_rank(self, c, r): + return self.rank(c, 1, r) + + def select(self, c, k, l, r): + current_occurrence = 0 + if l > r or l > self.n or l < 1: + return None + for i in range(l, r+1): + if self.t[i] == c: + current_occurrence = current_occurrence + 1 + if current_occurrence == k: + return i + return None + + def quantile(self, k, l, r): + if l > r or l > self.n or l < 1 or k > r-l+1: + return None + substring = self.t[l : r+1] + return sorted(substring)[k-1] + + def range_count(self, l, r, x, y): + if l > r or l > self.n or l < 1: + return None + result = 0 + for i in range(l, r+1): + if x <= self.t[i] <= y: + result = result + 1 + return result + +def rank_result(solver, queries): + return [solver.rank(c, l, r) for (c, l, r) in queries] + +def select_result(solver, queries): + return [solver.select(c, k, l, r) for (c, k, l, r) in queries] + +def range_count_result(solver, queries): + return [solver.range_count(l, r, x, y) for (l, r, x, y) in queries] + +def quantile_result(solver, queries): + return [solver.quantile(k, l, r) for (k, l, r) in queries] + +def create_range_for_query(n): + l = rand.random.randint(1, n) + r = rand.random.randint(l, n) + return (l, r) + +def create_rank_query(n, alphabet): + l, r = create_range_for_query(n) + return (rand.random.choice(alphabet), l, r) + +def create_select_query(n, alphabet): + l, r = create_range_for_query(n) + return (rand.random.choice(alphabet), rand.random.randint(1, r-l+1), l, r) + +def create_quantile_query(n, alphabet): + l, r = create_range_for_query(n) + return (rand.random.randint(1, r-l+1), l, r) + +def create_range_count_query(n, alphabet): + l, r = create_range_for_query(n) + x = rand.random.choice(alphabet) + y = rand.random.choice(alphabet) + if x > y: + x, y = y, x + return (l, r, x, y) + + + +class TestWaveletTree(unittest.TestCase): + run_large = unittest.skipUnless( + os.environ.get('LARGE', False), 'Skip test in small runs') + + model_solver = type(dummy_solver) + + test_classes = [wavelet_tree.wavelet_tree] + + runner_functions = [ + (create_rank_query, rank_result), + (create_select_query, select_result), + (create_quantile_query, quantile_result), + (create_range_count_query, range_count_result) + ] + + random_small_test_data = [ + (12, 10, ['a', 'b', 'c']), + (10, 20, ['a', 'b', 'c']), + (5, 12, ['a', 'b']), + (7, 12, ['a', 'c']), + (5, 25, ['a', 'b', 'c', 'd', 'w', 'e']) + ] + + def create_queries(self, n, q, alphabet, genaration_function): + return [genaration_function(n, alphabet) for _ in range(q)] + + def test_tree_api_handmade(self): + for test_idx in range(len(self.test_inputs)): + for cls in self.test_classes: + text, test_cases = self.test_inputs[test_idx] + solver = cls(text, len(text)-1) + for i in range(len(self.runner_functions)): + _, runner = self.runner_functions[i] + result = runner(solver, test_cases[i]) + self.assertEqual(self.test_expected_outputs[test_idx][i], result) + + def test_small_random(self): + for (n, q, alphabet) in self.random_small_test_data: + self.tree_api_random_test(n, q, alphabet) + + def tree_api_random_test(self, n, q, alphabet): + text = rand.random_word(n, alphabet) + model_solver = dummy_solver(text, n) + runners_and_queries = [(runner, self.create_queries(n, q, alphabet, fun)) for (fun, runner) in self.runner_functions] + model_results = [runner(model_solver, queries) for (runner, queries) in runners_and_queries] + for cls in self.test_classes: + solver = cls(text, n) + results = [runner(solver, queries) for (runner, queries) in runners_and_queries] + self.assertEqual(model_results, results) + + + large_test_case_data = [ + (1000, 10000, ['a', 'b']), + (1000, 10000, 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM[];,./?><+_)(*&^%$#@!1234567890-=)'.split()), + ] + + @run_large + def test_large_random(self): + for (n, q, alphabet) in self.large_test_case_data: + self.tree_api_random_test(n, q, alphabet) + + test_expected_outputs = [ + [ + [2, 1, 3, 0, 1], + [3, 5, None, 4, None], + ['a', 'a', 'a', 'b', None], + [5, 3, 1, 1, 3, 0, 0] + ], + [ + [4, 1, 4, 1, 1, 0, 0, 3, 1, 2, 1, 0], + [None, None, 3, None, 5, 6, 10, None, None, 7, 8, 10], + ['c', 'a', 'c', 'a', 'a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], + [0, 1, 10, 2, 5, 1, 1, 3, 3, 1, 3, 1] + ] + ] + + test_inputs = [ + ( + '#ababa', + [ + [ + ('a', 1, 3), + ('b', 1, 3), + ('a',1 , 5), + ('c', 1, 5), + ('a', 3, 3) + ], + [ + ('a', 1, 3, 3), + ('a', 3, 1, 5), + ('b', 1, 3, 3), + ('b', 2, 2, 4), + ('c', 1, 1, 5) + ], + [ + (1, 1, 5), + (1, 3, 3), + (3, 1, 5), + (4, 1, 4), + (2, 1, 1) + ], + [ + (1, 5, 'a', 'b'), + (1, 5, 'a', 'a'), + (2, 4, 'a', 'a'), + (3, 3, 'a', 'a'), + (1, 3, ' ', 'c'), + (1, 3, ' ', ' '), + (2, 4, 'c', 'c') + ] + ] + ), + ( + '#bcbbbaabca', + [ + [ + ('b', 3, 10), + ('a', 6, 6), + ('b', 3, 8), + ('a', 9, 10), + ('c', 6, 9), + ('a', 8, 8), + ('c', 4, 4), + ('b', 4, 10), + ('a', 9, 10), + ('a', 5, 8), + ('c', 1, 2), + ('c', 3, 5) + ], + [ + ('c', 2, 8, 10), + ('c', 2, 4, 5), + ('b', 1, 3, 9), + ('c', 1, 5, 8), + ('b', 1, 5, 5), + ('a', 1, 5, 8), + ('a', 1, 10, 10), + ('c', 6, 3, 8), + ('a', 1, 3, 3), + ('a', 2, 6, 9), + ('b', 4, 3, 9), + ('a', 1, 10, 10) + ], + [ + (1, 9, 9), + (2, 6, 8), + (1, 9, 9), + (2, 6, 8), + (2, 4, 7), + (2, 1, 5), + (2, 4, 6), + (1, 10, 10), + (2, 5, 7), + (2, 1, 2), + (3, 8, 10), + (4, 7, 10) + ], + [ + (10, 10, 'b', 'b'), + (3, 3, 'b', 'b'), + (1, 10, 'a', 'c'), + (6, 7, 'a', 'c'), + (1, 6, 'a', 'b'), + (5, 5, 'a', 'b'), + (9, 10, 'b', 'c'), + (3, 7, 'b', 'c'), + (4, 8, 'b', 'c'), + (10, 10, 'a', 'b'), + (4, 6, 'a', 'b'), + (9, 9, 'a', 'c') + ] + ] + ) + ] \ No newline at end of file From 11f445d58adc81b40b7ad09609be224f737a4ecb Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 7 Jun 2024 18:25:25 +0200 Subject: [PATCH 11/24] Style fixes --- string_indexing/fm_index.py | 79 +++++++++++++++--------------- test/test_exact_string_matching.py | 10 ++-- 2 files changed, 44 insertions(+), 45 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index b47ecce..1595577 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,94 +1,93 @@ class _RankSearcher: + SAMPLE_SIZE = 8 + def __init__(self, L, mapper_of_chars, n): - - self.sample_size = 8 # const for sampling self.L = L #prepare closest samplings current_sample = 0 self.closest_sample = [0] for i in range(1, n+2): - if abs(current_sample-i) > abs(current_sample + self.sample_size-i) and (i + self.sample_size < n): - current_sample += self.sample_size + if abs(current_sample-i) > abs(current_sample + self.SAMPLE_SIZE-i) and (i + self.SAMPLE_SIZE < n): + current_sample += self.SAMPLE_SIZE self.closest_sample.append(current_sample) #Generate values for occ for given samples O(|A|*n) self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} for c in mapper_of_chars: - current_value = 0 - next_sample = self.sample_size + current_value, next_sample = 0, self.SAMPLE_SIZE for i in range(1, n+2): if L[i] == c: current_value += 1 if i == next_sample: self.occ_in_sample_for_char[c].append(current_value) - next_sample = next_sample + self.sample_size + next_sample = next_sample + self.SAMPLE_SIZE def prefix_rank(self, c, i): if self.closest_sample[i] < i: to_add = sum(1 for c_prim in self.L[self.closest_sample[i] + 1:i + 1] if c_prim == c) else: to_add = sum(-1 for c_prim in self.L[i + 1:self.closest_sample[i] + 1] if c_prim == c) - return self.occ_in_sample_for_char[c][self.closest_sample[i] // self.sample_size] + to_add + return self.occ_in_sample_for_char[c][self.closest_sample[i] // self.SAMPLE_SIZE] + to_add class _FMIndex: def __init__ (self, SA, BWT, text, n, rank_searcher = None): self.L = BWT - self.F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) + F = '#$' + ''.join(text[SA[i]] for i in range(1, n + 1)) self.n = n self.SA = SA #prepare char mapping for F - self.mapper_of_chars = { self.F[2] : 0} + self.mapper_of_chars = { F[2] : 0} self.beginnings = [2] - last = self.F[2] + last = F[2] for i in range(3, n+2): - if self.F[i] != last: - last = self.F[i] + if F[i] != last: + last = F[i] self.beginnings.append(i) self.mapper_of_chars[last] = len(self.beginnings) - 1 self.len_of_alphabet = len(self.mapper_of_chars) - self.rank_searcher = _RankSearcher(self.L, self.mapper_of_chars, n) if rank_searcher is None else rank_searcher + self.rank_searcher = _RankSearcher(self.L, self.mapper_of_chars, n) \ + if rank_searcher is None else rank_searcher def from_suffix_array_and_bwt(SA, BWT, text, n, rank_searcher = None): return _FMIndex(SA, BWT, text, n, rank_searcher) # O(|p|) -def count(fm, p, size): - (low, high) = _get_range_of_occurrences(fm, p, size) +def count(FM, p, size): + low, high = _get_range_of_occurrences(FM, p, size) return max(high - low + 1, 0) if low > -1 else 0 # O(|p| + k) where k is the number or occurances of p in text -def contains(fm, p, l): - (low, high) = _get_range_of_occurrences(fm, p, l) - yield from sorted([fm.SA[i-1] for i in range(low, high + 1) if low > -1]) +def contains(FM, p, l): + low, high = _get_range_of_occurrences(FM, p, l) + yield from sorted([FM.SA[i-1] for i in range(low, high + 1) if low > -1]) -def _get_range_of_occurrences(fm, p, size): - if size > fm.n or size == 0: - return (-1, -1) +def _get_range_of_occurrences(FM, p, size): + if size > FM.n or size == 0: + return -1, -1 - if p[-1] not in fm.mapper_of_chars: - return (-1, -1) + if p[-1] not in FM.mapper_of_chars: + return -1, -1 - map_idx = fm.mapper_of_chars[p[-1]] - l = fm.beginnings[map_idx] - r = fm.n + 1 - if map_idx != fm.len_of_alphabet - 1: - r = fm.beginnings[map_idx + 1] - 1 + map_idx = FM.mapper_of_chars[p[-1]] + l= FM.beginnings[map_idx] + r = FM.beginnings[map_idx + 1] - 1 if map_idx != FM.len_of_alphabet - 1 else FM.n + 1 - for i in range(size-1, 0, -1): - if p[i] not in fm.mapper_of_chars: - return (-1, -1) - occurrences_before = fm.rank_searcher.prefix_rank(p[i], l - 1) - occurrences_after = fm.rank_searcher.prefix_rank(p[i], r) + for c in p[-2:0:-1]: + if c not in FM.mapper_of_chars: + return -1, -1 + occurrences_before = FM.rank_searcher.prefix_rank(c, l - 1) + occurrences_after = FM.rank_searcher.prefix_rank(c, r) if occurrences_before == occurrences_after: - return (-1, -1) - map_idx = fm.mapper_of_chars[p[i]] - l = fm.beginnings[map_idx] + occurrences_before - r = fm.beginnings[map_idx] + occurrences_after - 1 + return -1, -1 + map_idx = FM.mapper_of_chars[c] + l = FM.beginnings[map_idx] + occurrences_before + r = FM.beginnings[map_idx] + occurrences_after - 1 if r < l: - return (-1, -1) - return (l, r) + return -1, -1 + + return l, r diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index 69bd30c..cad1e5b 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -18,15 +18,15 @@ def lcp_lr_contains(t, w, n, m): def fm_index_wavelet_contains(t, w, n, m): SA = suffix_array.skew(t, n) BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) - rank_searcher = wavelet_tree.wavelet_tree(t, n) - fm = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, rank_searcher) - return fm_index.contains(fm, w, m) + RS = wavelet_tree.wavelet_tree(t, n) + FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, RS) + return fm_index.contains(FM, w, m) def fm_index_contains(t, w, n, m): SA = suffix_array.skew(t, n) BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) - fm = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n) - return fm_index.contains(fm, w, m) + FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n) + return fm_index.contains(FM, w, m) EXACT_STRING_MATCHING_ALGORITHMS = [ [ 'Morris-Pratt', forward.morris_pratt ], From e05c1d38b6828f2f274e6c8aeff9376b18a4a501 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 7 Jun 2024 18:56:41 +0200 Subject: [PATCH 12/24] Make check fix --- common/wavelet_tree.py | 63 ++++++++++++++++-------------- test/test_exact_string_matching.py | 2 +- test/test_wavelet_tree.py | 44 ++++++++++----------- 3 files changed, 56 insertions(+), 53 deletions(-) diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py index 9eceebb..a5f17cc 100644 --- a/common/wavelet_tree.py +++ b/common/wavelet_tree.py @@ -1,13 +1,13 @@ - -class wavelet_tree: +# pylint: disable=too-many-instance-attributes +class WaveletTree: def __init__(self, t, n, sorted_alphabet_list = None): self.t = t t = t[1:] if sorted_alphabet_list is not None: - self.alphabet = sorted_alphabet_list + self.alphabet = sorted_alphabet_list else: - self.alphabet = set(t) - sorted_alphabet_list = sorted(list(self.alphabet)) + self.alphabet = set(t) + sorted_alphabet_list = sorted(list(self.alphabet)) self.n = n self.smallest = sorted_alphabet_list[0] self.biggest = sorted_alphabet_list[-1] @@ -26,17 +26,21 @@ def __init__(self, t, n, sorted_alphabet_list = None): self.left_indexes = [0] self.rigth_indexes = [0] for i in range(n): - self.left_indexes.append(i+1) if t[i] in self.zero_indexed else self.rigth_indexes.append(i+1) - left_node_text = '#' + ''.join(c for c in t if c in self.zero_indexed) - rigth_node_text = '#' + ''.join(c for c in t if c in self.one_indexed) - self.left = wavelet_tree(left_node_text, len(left_node_text) - 1, left_alphabet) - self.right = wavelet_tree(rigth_node_text, len(rigth_node_text) - 1, right_alphabet) + if t[i] in self.zero_indexed: + self.left_indexes.append(i+1) + else: + self.rigth_indexes.append(i+1) + left_text = '#' + ''.join(c for c in t if c in self.zero_indexed) + rigth_text = '#' + ''.join(c for c in t if c in self.one_indexed) + self.left = WaveletTree(left_text, len(left_text) - 1, left_alphabet) + self.right = WaveletTree(rigth_text, len(rigth_text) - 1, right_alphabet) def _left_tree_range(self, l, r): return l - self.prefix_sum[l-1], r - self.prefix_sum[r] - + def _right_tree_range(self, l, r): - return self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0), self.prefix_sum[r] + return self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0) \ + , self.prefix_sum[r] def rank(self, c, l, r): if c not in self.alphabet or l > r or l > self.n or r < 1: @@ -51,7 +55,7 @@ def rank(self, c, l, r): def preifx_rank(self, c, r): return self.rank(c, 1, r) - + def select(self, c, k, l, r): if c not in self.alphabet or l > r or l > self.n or r < 1 : return None @@ -59,11 +63,11 @@ def select(self, c, k, l, r): return k+l-1 if k <= r-l+1 else None if c in self.zero_indexed: new_l, new_r = self._left_tree_range(l, r) - recursion_result = self.left.select(c, k, new_l, new_r) - return self.left_indexes[recursion_result] if recursion_result is not None else None + rec_result = self.left.select(c, k, new_l, new_r) + return self.left_indexes[rec_result] if rec_result is not None else None new_l, new_r = self._right_tree_range(l, r) - recursion_result = self.right.select(c, k, new_l, new_r) - return self.rigth_indexes[recursion_result] if recursion_result is not None else None + rec_result = self.right.select(c, k, new_l, new_r) + return self.rigth_indexes[rec_result] if rec_result is not None else None def quantile(self, k, l, r): if k < 1 or k > r-l+1: @@ -76,29 +80,28 @@ def quantile(self, k, l, r): return self.left.quantile(k, new_l, new_r) new_l, new_r = self._right_tree_range(l, r) return self.right.quantile(k-r+l-1+left_num, new_l, new_r) - + def _does_one_range_end_in_another(self, l, r, i, j): - return (i <= l and j >= l) or (i <= r and j >= r) + return (i <= l <= j) or (i <= r <= j) def _ranges_intersect(self, l, r, i, j): return self._does_one_range_end_in_another(l, r, i ,j) or \ - self._does_one_range_end_in_another(i, j, l, r) - + self._does_one_range_end_in_another(i, j, l, r) + def range_count(self, l, r, x, y): if l > r or l > self.n or l < 1 or x > y: return 0 - if y < self.smallest or x > self.biggest: - return 0 if x <= self.smallest and self.biggest <= y: return r-l+1 - if self.leaf: + if self.leaf or y < self.smallest or x > self.biggest: return 0 - if self._ranges_intersect(self.left.smallest, self.left.biggest, x, y) and \ - self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): - new_left_l, new_left_r = self._left_tree_range(l, r) - new_right_l, new_right_r = self._right_tree_range(l, r) - return self.left.range_count(new_left_l, new_left_r, x, y) \ - + self.right.range_count(new_right_l, new_right_r, x, y) + l_node, r_node = self.left, self.right + if self._ranges_intersect(l_node.smallest, l_node.biggest, x, y) and \ + self._ranges_intersect(r_node.smallest, r_node.biggest, x, y): + new_left_l, new_left_r = self._left_tree_range(l, r) + new_right_l, new_right_r = self._right_tree_range(l, r) + return self.left.range_count(new_left_l, new_left_r, x, y) \ + + self.right.range_count(new_right_l, new_right_r, x, y) if self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): new_l, new_r = self._right_tree_range(l, r) return self.right.range_count(new_l, new_r, x, y) diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index cad1e5b..749ebe7 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -18,7 +18,7 @@ def lcp_lr_contains(t, w, n, m): def fm_index_wavelet_contains(t, w, n, m): SA = suffix_array.skew(t, n) BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) - RS = wavelet_tree.wavelet_tree(t, n) + RS = wavelet_tree.WaveletTree(t, n) FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, RS) return fm_index.contains(FM, w, m) diff --git a/test/test_wavelet_tree.py b/test/test_wavelet_tree.py index 3c46d4f..8833b84 100644 --- a/test/test_wavelet_tree.py +++ b/test/test_wavelet_tree.py @@ -4,7 +4,7 @@ from common import wavelet_tree from generator import rand -class dummy_solver: +class DummySolver: def __init__(self, t, n): self.t = t self.n = n @@ -12,11 +12,11 @@ def __init__(self, t, n): def rank(self, c, l ,r): if l > r or l > self.n or l < 1: return 0 - return sum([1 if self.t[i] == c else 0 for i in range(l, r+1)]) - + return sum([1 if self.t[i] == c else 0 for i in range(l, r+1)]) + def preifx_rank(self, c, r): return self.rank(c, 1, r) - + def select(self, c, k, l, r): current_occurrence = 0 if l > r or l > self.n or l < 1: @@ -27,13 +27,13 @@ def select(self, c, k, l, r): if current_occurrence == k: return i return None - + def quantile(self, k, l, r): if l > r or l > self.n or l < 1 or k > r-l+1: return None substring = self.t[l : r+1] return sorted(substring)[k-1] - + def range_count(self, l, r, x, y): if l > r or l > self.n or l < 1: return None @@ -42,7 +42,7 @@ def range_count(self, l, r, x, y): if x <= self.t[i] <= y: result = result + 1 return result - + def rank_result(solver, queries): return [solver.rank(c, l, r) for (c, l, r) in queries] @@ -68,6 +68,7 @@ def create_select_query(n, alphabet): l, r = create_range_for_query(n) return (rand.random.choice(alphabet), rand.random.randint(1, r-l+1), l, r) +# pylint: disable=unused-argument def create_quantile_query(n, alphabet): l, r = create_range_for_query(n) return (rand.random.randint(1, r-l+1), l, r) @@ -80,16 +81,13 @@ def create_range_count_query(n, alphabet): x, y = y, x return (l, r, x, y) - class TestWaveletTree(unittest.TestCase): run_large = unittest.skipUnless( os.environ.get('LARGE', False), 'Skip test in small runs') - model_solver = type(dummy_solver) + test_classes = [wavelet_tree.WaveletTree] - test_classes = [wavelet_tree.wavelet_tree] - runner_functions = [ (create_rank_query, rank_result), (create_select_query, select_result), @@ -107,12 +105,14 @@ class TestWaveletTree(unittest.TestCase): def create_queries(self, n, q, alphabet, genaration_function): return [genaration_function(n, alphabet) for _ in range(q)] - + def test_tree_api_handmade(self): + # pylint: disable=consider-using-enumerate for test_idx in range(len(self.test_inputs)): for cls in self.test_classes: text, test_cases = self.test_inputs[test_idx] solver = cls(text, len(text)-1) + # pylint: disable=consider-using-enumerate for i in range(len(self.runner_functions)): _, runner = self.runner_functions[i] result = runner(solver, test_cases[i]) @@ -124,15 +124,15 @@ def test_small_random(self): def tree_api_random_test(self, n, q, alphabet): text = rand.random_word(n, alphabet) - model_solver = dummy_solver(text, n) - runners_and_queries = [(runner, self.create_queries(n, q, alphabet, fun)) for (fun, runner) in self.runner_functions] - model_results = [runner(model_solver, queries) for (runner, queries) in runners_and_queries] + model_solver = DummySolver(text, n) + runners_args = [(runner, self.create_queries(n, q, alphabet, fun)) for (fun, runner) in self.runner_functions] + model_results = [runner(model_solver, queries) for (runner, queries) in runners_args] for cls in self.test_classes: solver = cls(text, n) - results = [runner(solver, queries) for (runner, queries) in runners_and_queries] + results = [runner(solver, queries) for (runner, queries) in runners_args] self.assertEqual(model_results, results) - + large_test_case_data = [ (1000, 10000, ['a', 'b']), (1000, 10000, 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM[];,./?><+_)(*&^%$#@!1234567890-=)'.split()), @@ -152,8 +152,8 @@ def test_large_random(self): ], [ [4, 1, 4, 1, 1, 0, 0, 3, 1, 2, 1, 0], - [None, None, 3, None, 5, 6, 10, None, None, 7, 8, 10], - ['c', 'a', 'c', 'a', 'a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], + [None, None, 3, None, 5, 6, 10, None, None, 7, 8, 10], + ['c', 'a', 'c', 'a', 'a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], [0, 1, 10, 2, 5, 1, 1, 3, 3, 1, 3, 1] ] ] @@ -194,7 +194,7 @@ def test_large_random(self): ] ] ), - ( + ( '#bcbbbaabca', [ [ @@ -253,6 +253,6 @@ def test_large_random(self): (4, 6, 'a', 'b'), (9, 9, 'a', 'c') ] - ] + ] ) - ] \ No newline at end of file + ] From 2354d3a467f1650df6174662917a1ce47ad70c9b Mon Sep 17 00:00:00 2001 From: prolik123 Date: Sat, 8 Jun 2024 00:33:04 +0200 Subject: [PATCH 13/24] LZIndex works + little refactor --- string_indexing/fm_index.py | 6 +- string_indexing/lz_index.py | 302 ++++++++++++----------------- test/test_exact_string_matching.py | 9 +- 3 files changed, 138 insertions(+), 179 deletions(-) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 1595577..31ee5d6 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -4,12 +4,12 @@ class _RankSearcher: def __init__(self, L, mapper_of_chars, n): self.L = L - #prepare closest samplings current_sample = 0 self.closest_sample = [0] for i in range(1, n+2): - if abs(current_sample-i) > abs(current_sample + self.SAMPLE_SIZE-i) and (i + self.SAMPLE_SIZE < n): + if abs(current_sample-i) > abs(current_sample + self.SAMPLE_SIZE-i) and \ + (i + self.SAMPLE_SIZE < n): current_sample += self.SAMPLE_SIZE self.closest_sample.append(current_sample) @@ -89,5 +89,5 @@ def _get_range_of_occurrences(FM, p, size): r = FM.beginnings[map_idx] + occurrences_after - 1 if r < l: return -1, -1 - + return l, r diff --git a/string_indexing/lz_index.py b/string_indexing/lz_index.py index 1128c7d..2e2b75d 100644 --- a/string_indexing/lz_index.py +++ b/string_indexing/lz_index.py @@ -1,31 +1,6 @@ -#also a bug here -"""def naive_lz78_compress(text): - text = text[1:] - text = text + '$' #for last block uniqness - dictionary = {} - compressed = [] - phrase = '' - - for char in text: - next_phrase = phrase + char - exists = dictionary.get(next_phrase) - if exists is None: - if phrase == '': - compressed.append((0, next_phrase[-1])) - dictionary[next_phrase] = 1 - phrase = '' - else: - index = dictionary.get(phrase) - compressed.append((index, next_phrase[-1])) - dictionary[next_phrase] = index + 1 - phrase = '' - else: - phrase = next_phrase - - return compressed""" class _LZTreeNode: - def __init__(self, parent, character, id, position): + def __init__(self, parent, character, idx, position): self.parent = parent self.position = position if parent is not None: @@ -33,15 +8,15 @@ def __init__(self, parent, character, id, position): self.depth = parent.depth + 1 else: self.depth = 0 - self.id = id - self.left_rank = self - self.right_rank = self + self.idx = idx self.children = {} self.character = character def set_ranks(self, rank): - if self.id is not None: + if self.idx is not None: self.rank = rank + self.left_rank = rank + self.right_rank = rank rank = rank + 1 else: self.rank = None @@ -51,232 +26,211 @@ def set_ranks(self, rank): min_key = min(self.children) max_key = max(self.children) self.left_rank = self.children[min_key].left_rank \ - if self.rank is None or self.children[min_key].left_rank.rank < self.rank \ - else self + if self.rank is None or self.children[min_key].left_rank < self.rank \ + else self.rank self.right_rank = self.children[max_key].right_rank \ - if self.rank is None or self.children[max_key].right_rank.rank < self.rank \ - else self + if self.rank is None or self.children[max_key].right_rank > self.rank \ + else self.rank return rank - #left rank seems to be useless - def print_node(self) -> None: - if self.parent is not None: - print("(",self.id, ", ", self.parent.id, ", ", self.character, ", ", self.rank, ", ", self.left_rank.rank, ", ", self.right_rank.rank, "), ") - for child in self.children.values(): - child.print_node() - - def get_left_rank(self) -> int: - return self.left_rank.rank - - def get_right_rank(self) -> int: - return self.right_rank.rank - - def get_id(self) -> int: - return self.id - - def get_children(self) -> dict: - return self.children - - - -class LZTrieBase: - - def __init__(self) -> None: - pass - - #normal string with hash at the begining - def search(self, t, n): - return self._search_internal(t, n, self.root) - - def _search_internal(self, t, idx, node : _LZTreeNode): - if idx == 0: - return node - if t[idx] not in node.children: - return None - return self._search_internal(t, idx - 1, node.children[t[idx]]) - - def get_size(self) -> int: - return self.size +#normal string with hash at the begining +def search(tree, t, n): + return _search_internal(t, 0, n, tree.root) - def debug(self) -> None: - self.root.print_node() - - +def _search_internal(t, idx, n, node): + if idx == n: + return node + if t[idx + 1] not in node.children: + return None + return _search_internal(t, idx + 1, n, node.children[t[idx + 1]]) -class LZTrie(LZTrieBase): - def __init__(self, t : str, n : int): +class _LZTrie: + def __init__(self, t, n): t += '$' #guaranting unique last node self.root = _LZTreeNode(None, '#', 0, None) current_node = self.root - id = 1 - for i in range(1, n+1): + idx, position = 1, 1 + for i in range(1, n+2): current_char = t[i] if current_char not in current_node.children: - _LZTreeNode(current_node, current_char, id, i) - id += 1 + _LZTreeNode(current_node, current_char, idx, position) + idx += 1 current_node = self.root + position = i+1 else: current_node = current_node.children[current_char] - self.size = id + self.size = idx self.root.set_ranks(0) - -class NodeMapper: +class _NodeMapper: def __init__(self, lz_trie, size): self.arr = [None] * size self._map_tree_to_list(lz_trie.root) def _map_tree_to_list(self, node) -> None: - if node.id is not None: - self.arr[node.id] = node + if node.idx is not None: + self.arr[node.idx] = node for child in node.children.values(): self._map_tree_to_list(child) - def get_node_by_id(self, id : int) -> _LZTreeNode: - return self.arr[id] - -class RangeSearcher: - def __init__(self, points): + def get_node_by_idx(self, idx): + return self.arr[idx] + +class _RangeSearcher: + def __init__(self, points): self.points = points def search_in_range(self, l1, r1, l2, r2): result = [] for (x, y) in self.points: - if l1 <= x and x <= l2 and r1 <= y and y <= r2: + if l1 <= x <= r1 and l2 <= y <= r2: result.append((x, y)) return result +class _RankMapper: + def __init__(self, lz_trie, size): + self.arr = [None] * size + self._map_tree_to_list(lz_trie.root) + + def _map_tree_to_list(self, node) -> None: + if node.rank is not None: + self.arr[node.rank] = node + for child in node.children.values(): + self._map_tree_to_list(child) -class RevLZTrie(LZTrieBase): + def get_node_by_rank(self, rank): + return self.arr[rank] - def __init__(self, lz_trie : LZTrie): - self.root = _LZTreeNode(None, '#', 0) +class _RevLZTrie: + def __init__(self, lz_trie): + self.root = _LZTreeNode(None, '#', 0, None) self._add_recursive(lz_trie.root) self.root.set_ranks(0) def _add_recursive(self, node): - for child in node.get_children().values(): + for child in node.children.values(): self._add_recursive(child) - self._add_block(child, self.root, child.id) + self._add_block(child, self.root, child.idx) - def _add_block(self, lz_node : _LZTreeNode, rev_node : _LZTreeNode, id : int) -> None: - if lz_node.parent is None or lz_node.parent.character is '#': - if lz_node.character in rev_node.get_children(): - rev_node.children[lz_node.character].id = id + def _add_block(self, lz_node, rev_node, idx): + if lz_node.parent is None or lz_node.parent.character == '#': + if lz_node.character in rev_node.children: + rev_node.children[lz_node.character].idx = idx else: - rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, id, None) + rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, idx, None) else: - if lz_node.character not in rev_node.get_children(): + if lz_node.character not in rev_node.children: rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, None, None) - self._add_block(lz_node.parent, rev_node.children[lz_node.character], id) - - + self._add_block(lz_node.parent, rev_node.children[lz_node.character], idx) -class LZIndex: - def __init__(self, lz_trie : LZTrie, rev_lz_trie : RevLZTrie, node_mapper : NodeMapper, range_searcher : RangeSearcher): +class _LZIndex: + def __init__(self, lz_trie, rev_lz_trie, lz_node_mapper, rev_lz_node_mapper, \ + range_searcher, lz_rank_mapper, rev_lz_rank_mapper): self.lz_trie = lz_trie self.rev_lz_trie = rev_lz_trie - self.node_mapper = node_mapper + self.lz_node_mapper = lz_node_mapper self.range_searcher = range_searcher + self.rev_lz_node_mapper = rev_lz_node_mapper + self.lz_rank_mapper = lz_rank_mapper + self.rev_lz_rank_mapper = rev_lz_rank_mapper def create_lz_index(t, n): - lz_trie = LZTrie(t, n) - rev_trie = RevLZTrie(lz_trie) - rev_trie.debug() - lz_node_mapper = NodeMapper(lz_trie, lz_trie.get_size()) - rev_node_mapper = NodeMapper(rev_trie, lz_trie.get_size()) - - for node in lz_node_mapper.arr: - if node.parent is not None: - print((node.parent.id), node.character) - points = [] - for i in range(1, lz_trie.get_size() - 1): - points.append((rev_node_mapper.get_node_by_id(i).rank, lz_node_mapper.get_node_by_id(i+1).rank)) - range_searcher = RangeSearcher(points) - return LZIndex(lz_trie, rev_trie, rev_node_mapper, range_searcher) - -def lz_index_search(lz_index : LZIndex, s, m): - + lz_trie = _LZTrie(t, n) + rev_trie = _RevLZTrie(lz_trie) + lz_node_mapper = _NodeMapper(lz_trie, lz_trie.size) + rev_node_mapper = _NodeMapper(rev_trie, lz_trie.size) + + points = [(rev_node_mapper.get_node_by_idx(i).rank, lz_node_mapper.get_node_by_idx(i+1).rank) \ + for i in range(1, lz_trie.size - 1)] + range_searcher = _RangeSearcher(points) + lz_rank_mapper = _RankMapper(lz_trie, lz_trie.size) + rev_lz_rank_mapper = _RankMapper(rev_trie, lz_trie.size) + return _LZIndex(lz_trie, rev_trie, lz_node_mapper, rev_node_mapper, \ + range_searcher, lz_rank_mapper, rev_lz_rank_mapper) + +def contains(lz_index, s, m): + yield from sorted(_contains_internal(lz_index, s, m)) + +def _contains_internal(lz_index : _LZIndex, s, m): # single block case v = '#' + (s[::-1])[:-1] - root = lz_index.rev_lz_trie.search(v, m) + root = search(lz_index.rev_lz_trie, v, m) result = [] - for i in range(root.get_left_rank(), root.get_right_rank() + 1): - node = lz_index.node_mapper.get_node_by_id(i) - result.append((node.id, m + root.depth - node.depth)) - print("case 1", (node.id, m + root.depth - node.depth)) + if root is not None: + for i in range(root.left_rank, root.right_rank + 1): + rev_node = lz_index.rev_lz_rank_mapper.get_node_by_rank(i) + node = lz_index.lz_node_mapper.get_node_by_idx(rev_node.idx) + for j in range(node.left_rank, node.right_rank + 1): + result_node = lz_index.lz_rank_mapper.get_node_by_rank(j) + result.append(result_node.position + node.depth - m) # two block case for i in range(1, m): rev_prefix = '#' + (s[::-1])[m-i:m] sufix = '#' + s[i+1:] - rev_node = lz_index.rev_lz_trie.search(rev_prefix, i) - node = lz_index.lz_trie.search(sufix, m-i) - for (x, y) in lz_index.range_searcher.search_in_range(rev_node.get_left_rank(), rev_node.get_right_rank(), node.get_left_rank(), node.get_right_rank()): - result.append((x, i)) - print("case 2", (x, i)) + rev_node = search(lz_index.rev_lz_trie, rev_prefix, i) + node = search(lz_index.lz_trie, sufix, m-i) + if rev_node is None or node is None: + continue + for (x, _) in lz_index.range_searcher.search_in_range(rev_node.left_rank, \ + rev_node.right_rank, node.left_rank, node.right_rank): + rev_node = lz_index.rev_lz_rank_mapper.get_node_by_rank(x) + node = lz_index.lz_node_mapper.get_node_by_idx(rev_node.idx) + result.append(node.position + node.depth - i) # other case - used = [[False]*(m+1)]*(m+1) - existance = [[None]*(m+1)]*(m+1) + used = [[False]*(m+1) for _ in range(m+1)] + existance = [[None]*(m+1) for _ in range(m+1)] arr = [{}] for i in range(1, m+1): recorded = {} current_node = lz_index.lz_trie.root for j in range(i, m+1): - if current_node is not None and s[i] not in current_node.children: + if current_node is not None and s[j] not in current_node.children: current_node = None elif current_node is not None: current_node = current_node.children[s[j]] existance[i][j] = current_node if current_node is not None: - recorded[current_node.id] = j + recorded[current_node.idx] = j arr.append(recorded) for i in range(1, m+1): for j in range(i, m+1): - if existance[i][j] is None or used[i][j] == True: + if existance[i][j] is None or used[i][j] is True: continue - start_id = existance[i][j].id - current_id = start_id - 1 - current_end = j - 1 - while current_end < m and (current_id + 1) in arr[current_end+1]: - current_id = current_id + 1 - used[current_end + 1][arr[current_end + 1][current_id]] = True - current_end = arr[current_end + 1][current_id] - size = current_id - start_id + 1 + start_idx = existance[i][j].idx + current_idx = start_idx + current_end = j + while current_end < m and (current_idx + 1) in arr[current_end+1]: + current_idx = current_idx + 1 + used[current_end + 1][arr[current_end + 1][current_idx]] = True + current_end = arr[current_end + 1][current_idx] + size = current_idx - start_idx + 1 if i > 1: size = size + 1 if current_end < m: size = size + 1 if size < 3 or (current_end != m and existance[current_end+1][m] is None): continue - if current_end == m or (existance[current_end+1][m].get_left_rank() <= lz_index.node_mapper.get_node_by_id(current_id + 1) and lz_index.node_mapper.get_node_by_id(current_id + 1) <= existance[current_end+1][m].get_left_rank()): - if start_id == 1 and i == 1: - result.append(start_id, i) + if lz_index.lz_trie.size > current_idx + 1 and \ + ( current_end == m or \ + ( existance[current_end+1][m].left_rank <= \ + lz_index.lz_node_mapper.get_node_by_idx(current_idx + 1).rank <= \ + existance[current_end+1][m].right_rank )): + if i == 1: + result.append(lz_index.lz_node_mapper.get_node_by_idx(start_idx).position) continue - elif start_id == 1: + if start_idx == 1: continue - current_node = lz_index.node_mapper.get_node_by_id(start_id - 1) + current_node = lz_index.lz_node_mapper.get_node_by_idx(start_idx - 1) prev = i - 1 - while prev > 0 and current_node.parent != None and s[prev] in current_node.parent.children and current_node.parent.children[s[prev]] == current_node: + while prev > 0 and current_node.parent is not None and \ + s[prev] in current_node.parent.children and \ + current_node.parent.children[s[prev]] == current_node: prev = prev - 1 current_node = current_node.parent if prev == 0: - result.append(start_id - 1, i - 1) - return result - - - -t = '#aabbababa' -#LZIndex(t) #blocks are not unique in default LZ78 compress (I think it's a bug) -#LZIndex('#aabbababa') #bugged string -#trie = LZTrie(t, len(t)-1) -#trie.debug() - -lz_index = create_lz_index(t, len(t) - 1) - -print(lz_index_search(lz_index, '#aba', 3)) - - - + node = lz_index.lz_node_mapper.get_node_by_idx(start_idx) + result.append(node.position - i + 1) + return sorted(result) diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index 749ebe7..9b6545b 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -6,7 +6,7 @@ from generator import rand from exact_string_matching import forward, backward, other -from string_indexing import lcp, suffix_tree, suffix_array, fm_index +from string_indexing import lcp, suffix_tree, suffix_array, fm_index, lz_index from compression import burrows_wheeler from common import wavelet_tree @@ -28,6 +28,10 @@ def fm_index_contains(t, w, n, m): FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n) return fm_index.contains(FM, w, m) +def lz_index_contains(t, w, n, m): + LZ = lz_index.create_lz_index(t, n) + return lz_index.contains(LZ, w, m) + EXACT_STRING_MATCHING_ALGORITHMS = [ [ 'Morris-Pratt', forward.morris_pratt ], [ 'Knuth-Morris-Pratt', forward.knuth_morris_pratt ], @@ -61,7 +65,8 @@ def fm_index_contains(t, w, n, m): ], [ 'lcp-lr array', lcp_lr_contains ], [ 'fm index', fm_index_contains], - [ 'fm index with wavelet tree', fm_index_contains] + [ 'fm index with wavelet tree', fm_index_contains], + [ 'lz index', lz_index_contains] ] class TestExactStringMatching(unittest.TestCase): From 55c7193afe80a4dae88d6fac536c42c150f41a95 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Thu, 13 Jun 2024 20:47:33 +0200 Subject: [PATCH 14/24] Refactor and make check pass --- common/wavelet_tree.py | 16 ++--- string_indexing/fm_index.py | 30 +++++---- string_indexing/lz_index.py | 122 +++++++++++++++++++++--------------- test/test_wavelet_tree.py | 11 ++-- 4 files changed, 103 insertions(+), 76 deletions(-) diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py index a5f17cc..9de02e7 100644 --- a/common/wavelet_tree.py +++ b/common/wavelet_tree.py @@ -39,8 +39,8 @@ def _left_tree_range(self, l, r): return l - self.prefix_sum[l-1], r - self.prefix_sum[r] def _right_tree_range(self, l, r): - return self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0) \ - , self.prefix_sum[r] + return (self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0), + self.prefix_sum[r]) def rank(self, c, l, r): if c not in self.alphabet or l > r or l > self.n or r < 1: @@ -85,8 +85,8 @@ def _does_one_range_end_in_another(self, l, r, i, j): return (i <= l <= j) or (i <= r <= j) def _ranges_intersect(self, l, r, i, j): - return self._does_one_range_end_in_another(l, r, i ,j) or \ - self._does_one_range_end_in_another(i, j, l, r) + return (self._does_one_range_end_in_another(l, r, i ,j) or + self._does_one_range_end_in_another(i, j, l, r)) def range_count(self, l, r, x, y): if l > r or l > self.n or l < 1 or x > y: @@ -96,12 +96,12 @@ def range_count(self, l, r, x, y): if self.leaf or y < self.smallest or x > self.biggest: return 0 l_node, r_node = self.left, self.right - if self._ranges_intersect(l_node.smallest, l_node.biggest, x, y) and \ - self._ranges_intersect(r_node.smallest, r_node.biggest, x, y): + if (self._ranges_intersect(l_node.smallest, l_node.biggest, x, y) and + self._ranges_intersect(r_node.smallest, r_node.biggest, x, y)): new_left_l, new_left_r = self._left_tree_range(l, r) new_right_l, new_right_r = self._right_tree_range(l, r) - return self.left.range_count(new_left_l, new_left_r, x, y) \ - + self.right.range_count(new_right_l, new_right_r, x, y) + return (self.left.range_count(new_left_l, new_left_r, x, y) + + self.right.range_count(new_right_l, new_right_r, x, y)) if self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): new_l, new_r = self._right_tree_range(l, r) return self.right.range_count(new_l, new_r, x, y) diff --git a/string_indexing/fm_index.py b/string_indexing/fm_index.py index 31ee5d6..e9d451b 100644 --- a/string_indexing/fm_index.py +++ b/string_indexing/fm_index.py @@ -1,4 +1,5 @@ - +#pylint: disable=too-few-public-methods +#pylint: disable=invalid-name class _RankSearcher: SAMPLE_SIZE = 8 @@ -8,30 +9,34 @@ def __init__(self, L, mapper_of_chars, n): current_sample = 0 self.closest_sample = [0] for i in range(1, n+2): - if abs(current_sample-i) > abs(current_sample + self.SAMPLE_SIZE-i) and \ - (i + self.SAMPLE_SIZE < n): + if (abs(current_sample-i) > abs(current_sample + self.SAMPLE_SIZE-i) and + (i + self.SAMPLE_SIZE < n)): current_sample += self.SAMPLE_SIZE self.closest_sample.append(current_sample) #Generate values for occ for given samples O(|A|*n) - self.occ_in_sample_for_char = { self.L[i]: [0] for i in range(1, n+2)} + self.occ_for_char = { self.L[i]: [0] for i in range(1, n+2)} for c in mapper_of_chars: current_value, next_sample = 0, self.SAMPLE_SIZE for i in range(1, n+2): if L[i] == c: current_value += 1 if i == next_sample: - self.occ_in_sample_for_char[c].append(current_value) + self.occ_for_char[c].append(current_value) next_sample = next_sample + self.SAMPLE_SIZE def prefix_rank(self, c, i): if self.closest_sample[i] < i: - to_add = sum(1 for c_prim in self.L[self.closest_sample[i] + 1:i + 1] if c_prim == c) + to_add = sum( + 1 for c_it in self.L[self.closest_sample[i] + 1:i + 1] if c_it == c) else: - to_add = sum(-1 for c_prim in self.L[i + 1:self.closest_sample[i] + 1] if c_prim == c) - return self.occ_in_sample_for_char[c][self.closest_sample[i] // self.SAMPLE_SIZE] + to_add - + to_add = sum( + -1 for c_it in self.L[i + 1:self.closest_sample[i] + 1] if c_it == c) + return (self.occ_for_char[c][self.closest_sample[i] // self.SAMPLE_SIZE] + + to_add) +#pylint: disable=too-few-public-methods +#pylint: disable=invalid-name class _FMIndex: def __init__ (self, SA, BWT, text, n, rank_searcher = None): self.L = BWT @@ -50,8 +55,8 @@ def __init__ (self, SA, BWT, text, n, rank_searcher = None): self.mapper_of_chars[last] = len(self.beginnings) - 1 self.len_of_alphabet = len(self.mapper_of_chars) - self.rank_searcher = _RankSearcher(self.L, self.mapper_of_chars, n) \ - if rank_searcher is None else rank_searcher + self.rank_searcher = (_RankSearcher(self.L, self.mapper_of_chars, n) + if rank_searcher is None else rank_searcher) def from_suffix_array_and_bwt(SA, BWT, text, n, rank_searcher = None): return _FMIndex(SA, BWT, text, n, rank_searcher) @@ -75,7 +80,8 @@ def _get_range_of_occurrences(FM, p, size): map_idx = FM.mapper_of_chars[p[-1]] l= FM.beginnings[map_idx] - r = FM.beginnings[map_idx + 1] - 1 if map_idx != FM.len_of_alphabet - 1 else FM.n + 1 + r = (FM.beginnings[map_idx + 1] - 1 + if map_idx != FM.len_of_alphabet - 1 else FM.n + 1) for c in p[-2:0:-1]: if c not in FM.mapper_of_chars: diff --git a/string_indexing/lz_index.py b/string_indexing/lz_index.py index 2e2b75d..f3586fe 100644 --- a/string_indexing/lz_index.py +++ b/string_indexing/lz_index.py @@ -1,4 +1,5 @@ - +#pylint: disable=too-many-instance-attributes +#pylint: disable=too-few-public-methods class _LZTreeNode: def __init__(self, parent, character, idx, position): self.parent = parent @@ -11,6 +12,9 @@ def __init__(self, parent, character, idx, position): self.idx = idx self.children = {} self.character = character + self.rank = None + self.left_rank = None + self.right_rank = None def set_ranks(self, rank): if self.idx is not None: @@ -18,22 +22,21 @@ def set_ranks(self, rank): self.left_rank = rank self.right_rank = rank rank = rank + 1 - else: - self.rank = None if len(self.children) > 0: for child_key in sorted(self.children): rank = self.children[child_key].set_ranks(rank) min_key = min(self.children) max_key = max(self.children) - self.left_rank = self.children[min_key].left_rank \ - if self.rank is None or self.children[min_key].left_rank < self.rank \ - else self.rank - self.right_rank = self.children[max_key].right_rank \ - if self.rank is None or self.children[max_key].right_rank > self.rank \ - else self.rank + self.left_rank = (self.children[min_key].left_rank + if (self.rank is None or + self.children[min_key].left_rank < self.rank) + else self.rank) + self.right_rank = (self.children[max_key].right_rank + if (self.rank is None or + self.children[max_key].right_rank > self.rank) + else self.rank) return rank -#normal string with hash at the begining def search(tree, t, n): return _search_internal(t, 0, n, tree.root) @@ -44,6 +47,7 @@ def _search_internal(t, idx, n, node): return None return _search_internal(t, idx + 1, n, node.children[t[idx + 1]]) +#pylint: disable=too-few-public-methods class _LZTrie: def __init__(self, t, n): t += '$' #guaranting unique last node @@ -62,12 +66,13 @@ def __init__(self, t, n): self.size = idx self.root.set_ranks(0) +#pylint: disable=too-few-public-methods class _NodeMapper: def __init__(self, lz_trie, size): self.arr = [None] * size self._map_tree_to_list(lz_trie.root) - def _map_tree_to_list(self, node) -> None: + def _map_tree_to_list(self, node): if node.idx is not None: self.arr[node.idx] = node for child in node.children.values(): @@ -76,6 +81,7 @@ def _map_tree_to_list(self, node) -> None: def get_node_by_idx(self, idx): return self.arr[idx] +#pylint: disable=too-few-public-methods class _RangeSearcher: def __init__(self, points): self.points = points @@ -87,12 +93,13 @@ def search_in_range(self, l1, r1, l2, r2): result.append((x, y)) return result +#pylint: disable=too-few-public-methods class _RankMapper: def __init__(self, lz_trie, size): self.arr = [None] * size self._map_tree_to_list(lz_trie.root) - def _map_tree_to_list(self, node) -> None: + def _map_tree_to_list(self, node): if node.rank is not None: self.arr[node.rank] = node for child in node.children.values(): @@ -101,6 +108,7 @@ def _map_tree_to_list(self, node) -> None: def get_node_by_rank(self, rank): return self.arr[rank] +#pylint: disable=too-few-public-methods class _RevLZTrie: def __init__(self, lz_trie): self.root = _LZTreeNode(None, '#', 0, None) @@ -117,15 +125,19 @@ def _add_block(self, lz_node, rev_node, idx): if lz_node.character in rev_node.children: rev_node.children[lz_node.character].idx = idx else: - rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, idx, None) + rev_node.children[lz_node.character] = (_LZTreeNode(rev_node, + lz_node.character, idx, None)) else: if lz_node.character not in rev_node.children: - rev_node.children[lz_node.character] = _LZTreeNode(rev_node, lz_node.character, None, None) - self._add_block(lz_node.parent, rev_node.children[lz_node.character], idx) + rev_node.children[lz_node.character] = (_LZTreeNode(rev_node, + lz_node.character, None, None)) + self._add_block(lz_node.parent, + rev_node.children[lz_node.character], idx) +#pylint: disable=too-few-public-methods class _LZIndex: - def __init__(self, lz_trie, rev_lz_trie, lz_node_mapper, rev_lz_node_mapper, \ - range_searcher, lz_rank_mapper, rev_lz_rank_mapper): + def __init__(self, lz_trie, rev_lz_trie, lz_node_mapper, rev_lz_node_mapper, + range_searcher, lz_rank_mapper, rev_lz_rank_mapper): self.lz_trie = lz_trie self.rev_lz_trie = rev_lz_trie self.lz_node_mapper = lz_node_mapper @@ -134,37 +146,24 @@ def __init__(self, lz_trie, rev_lz_trie, lz_node_mapper, rev_lz_node_mapper, \ self.lz_rank_mapper = lz_rank_mapper self.rev_lz_rank_mapper = rev_lz_rank_mapper -def create_lz_index(t, n): - lz_trie = _LZTrie(t, n) - rev_trie = _RevLZTrie(lz_trie) - lz_node_mapper = _NodeMapper(lz_trie, lz_trie.size) - rev_node_mapper = _NodeMapper(rev_trie, lz_trie.size) - - points = [(rev_node_mapper.get_node_by_idx(i).rank, lz_node_mapper.get_node_by_idx(i+1).rank) \ - for i in range(1, lz_trie.size - 1)] - range_searcher = _RangeSearcher(points) - lz_rank_mapper = _RankMapper(lz_trie, lz_trie.size) - rev_lz_rank_mapper = _RankMapper(rev_trie, lz_trie.size) - return _LZIndex(lz_trie, rev_trie, lz_node_mapper, rev_node_mapper, \ - range_searcher, lz_rank_mapper, rev_lz_rank_mapper) +def _contains_internal(lz_index : _LZIndex, s, m): + yield from _contains_in_single_block(lz_index, s, m) + yield from _contains_within_two_blocks(lz_index, s, m) + yield from _contains_within_three_or_more_blocks(lz_index, s, m) -def contains(lz_index, s, m): - yield from sorted(_contains_internal(lz_index, s, m)) -def _contains_internal(lz_index : _LZIndex, s, m): - # single block case +def _contains_in_single_block(lz_index : _LZIndex, s, m): v = '#' + (s[::-1])[:-1] root = search(lz_index.rev_lz_trie, v, m) - result = [] if root is not None: for i in range(root.left_rank, root.right_rank + 1): rev_node = lz_index.rev_lz_rank_mapper.get_node_by_rank(i) node = lz_index.lz_node_mapper.get_node_by_idx(rev_node.idx) for j in range(node.left_rank, node.right_rank + 1): result_node = lz_index.lz_rank_mapper.get_node_by_rank(j) - result.append(result_node.position + node.depth - m) + yield result_node.position + node.depth - m - # two block case +def _contains_within_two_blocks(lz_index : _LZIndex, s, m): for i in range(1, m): rev_prefix = '#' + (s[::-1])[m-i:m] sufix = '#' + s[i+1:] @@ -172,13 +171,13 @@ def _contains_internal(lz_index : _LZIndex, s, m): node = search(lz_index.lz_trie, sufix, m-i) if rev_node is None or node is None: continue - for (x, _) in lz_index.range_searcher.search_in_range(rev_node.left_rank, \ - rev_node.right_rank, node.left_rank, node.right_rank): + for (x, _) in (lz_index.range_searcher.search_in_range(rev_node.left_rank, + rev_node.right_rank, node.left_rank, node.right_rank)): rev_node = lz_index.rev_lz_rank_mapper.get_node_by_rank(x) node = lz_index.lz_node_mapper.get_node_by_idx(rev_node.idx) - result.append(node.position + node.depth - i) + yield node.position + node.depth - i - # other case +def _prepare_structures_for_third_case(lz_index : _LZIndex, s, m): used = [[False]*(m+1) for _ in range(m+1)] existance = [[None]*(m+1) for _ in range(m+1)] arr = [{}] @@ -194,7 +193,10 @@ def _contains_internal(lz_index : _LZIndex, s, m): if current_node is not None: recorded[current_node.idx] = j arr.append(recorded) + return used, existance, arr +def _contains_within_three_or_more_blocks(lz_index : _LZIndex, s, m): + used, existance, arr = _prepare_structures_for_third_case(lz_index, s, m) for i in range(1, m+1): for j in range(i, m+1): if existance[i][j] is None or used[i][j] is True: @@ -213,24 +215,40 @@ def _contains_internal(lz_index : _LZIndex, s, m): size = size + 1 if size < 3 or (current_end != m and existance[current_end+1][m] is None): continue - if lz_index.lz_trie.size > current_idx + 1 and \ - ( current_end == m or \ - ( existance[current_end+1][m].left_rank <= \ - lz_index.lz_node_mapper.get_node_by_idx(current_idx + 1).rank <= \ - existance[current_end+1][m].right_rank )): + if (lz_index.lz_trie.size > current_idx + 1 and (current_end == m or + (existance[current_end+1][m].left_rank <= + lz_index.lz_node_mapper.get_node_by_idx(current_idx + 1).rank <= + existance[current_end+1][m].right_rank ))): if i == 1: - result.append(lz_index.lz_node_mapper.get_node_by_idx(start_idx).position) + yield lz_index.lz_node_mapper.get_node_by_idx(start_idx).position continue if start_idx == 1: continue current_node = lz_index.lz_node_mapper.get_node_by_idx(start_idx - 1) prev = i - 1 - while prev > 0 and current_node.parent is not None and \ - s[prev] in current_node.parent.children and \ - current_node.parent.children[s[prev]] == current_node: + while (prev > 0 and current_node.parent is not None and + s[prev] in current_node.parent.children and + current_node.parent.children[s[prev]] == current_node): prev = prev - 1 current_node = current_node.parent if prev == 0: node = lz_index.lz_node_mapper.get_node_by_idx(start_idx) - result.append(node.position - i + 1) - return sorted(result) + yield node.position - i + 1 + +def create_lz_index(t, n): + lz_trie = _LZTrie(t, n) + rev_trie = _RevLZTrie(lz_trie) + lz_node_mapper = _NodeMapper(lz_trie, lz_trie.size) + rev_node_mapper = _NodeMapper(rev_trie, lz_trie.size) + + points = [(rev_node_mapper.get_node_by_idx(i).rank, + lz_node_mapper.get_node_by_idx(i+1).rank) + for i in range(1, lz_trie.size - 1)] + range_searcher = _RangeSearcher(points) + lz_rank_mapper = _RankMapper(lz_trie, lz_trie.size) + rev_lz_rank_mapper = _RankMapper(rev_trie, lz_trie.size) + return _LZIndex(lz_trie, rev_trie, lz_node_mapper, rev_node_mapper, + range_searcher, lz_rank_mapper, rev_lz_rank_mapper) + +def contains(lz_index, s, m): + yield from sorted(_contains_internal(lz_index, s, m)) diff --git a/test/test_wavelet_tree.py b/test/test_wavelet_tree.py index 8833b84..0ba62ea 100644 --- a/test/test_wavelet_tree.py +++ b/test/test_wavelet_tree.py @@ -12,7 +12,7 @@ def __init__(self, t, n): def rank(self, c, l ,r): if l > r or l > self.n or l < 1: return 0 - return sum([1 if self.t[i] == c else 0 for i in range(l, r+1)]) + return sum(1 if x == c else 0 for x in self.t[l:r+1]) def preifx_rank(self, c, r): return self.rank(c, 1, r) @@ -125,8 +125,10 @@ def test_small_random(self): def tree_api_random_test(self, n, q, alphabet): text = rand.random_word(n, alphabet) model_solver = DummySolver(text, n) - runners_args = [(runner, self.create_queries(n, q, alphabet, fun)) for (fun, runner) in self.runner_functions] - model_results = [runner(model_solver, queries) for (runner, queries) in runners_args] + runners_args = [(runner, self.create_queries(n, q, alphabet, fun)) + for (fun, runner) in self.runner_functions] + model_results = [runner(model_solver, queries) + for (runner, queries) in runners_args] for cls in self.test_classes: solver = cls(text, n) results = [runner(solver, queries) for (runner, queries) in runners_args] @@ -135,7 +137,8 @@ def tree_api_random_test(self, n, q, alphabet): large_test_case_data = [ (1000, 10000, ['a', 'b']), - (1000, 10000, 'qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM[];,./?><+_)(*&^%$#@!1234567890-=)'.split()), + (1000, 10000, '''qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM +[];,./?><+_)(*&^%$#@!1234567890-=)'''.split()), ] @run_large From e7a6db0c6a43eaa289d309f8aecfcf129a579b5b Mon Sep 17 00:00:00 2001 From: prolik123 Date: Mon, 17 Jun 2024 00:52:32 +0200 Subject: [PATCH 15/24] optimize wavelete_tree --- common/wavelet_tree.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py index 9de02e7..b875b52 100644 --- a/common/wavelet_tree.py +++ b/common/wavelet_tree.py @@ -1,7 +1,6 @@ # pylint: disable=too-many-instance-attributes class WaveletTree: def __init__(self, t, n, sorted_alphabet_list = None): - self.t = t t = t[1:] if sorted_alphabet_list is not None: self.alphabet = sorted_alphabet_list @@ -39,8 +38,7 @@ def _left_tree_range(self, l, r): return l - self.prefix_sum[l-1], r - self.prefix_sum[r] def _right_tree_range(self, l, r): - return (self.prefix_sum[l] + (1 if self.t[l] in self.zero_indexed else 0), - self.prefix_sum[r]) + return (self.prefix_sum[l-1] + 1, self.prefix_sum[r]) def rank(self, c, l, r): if c not in self.alphabet or l > r or l > self.n or r < 1: From 7f301cbca40cce8d856ead1ad39935b3b141d485 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Mon, 17 Jun 2024 01:02:36 +0200 Subject: [PATCH 16/24] Name fix --- common/wavelet_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py index b875b52..fb30233 100644 --- a/common/wavelet_tree.py +++ b/common/wavelet_tree.py @@ -51,7 +51,7 @@ def rank(self, c, l, r): new_l, new_r = self._right_tree_range(l, r) return self.right.rank(c, new_l, new_r) - def preifx_rank(self, c, r): + def prefix_rank(self, c, r): return self.rank(c, 1, r) def select(self, c, k, l, r): From 356fdf68cb4a9801efdc63dd3b01b22159480162 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Tue, 18 Jun 2024 19:15:02 +0200 Subject: [PATCH 17/24] Add optimal range_search function --- common/wavelet_tree.py | 60 +++++++++++++++++++++--------- string_indexing/lz_index.py | 45 ++++++++++++++++------ test/test_exact_string_matching.py | 2 +- 3 files changed, 76 insertions(+), 31 deletions(-) diff --git a/common/wavelet_tree.py b/common/wavelet_tree.py index fb30233..c2e5221 100644 --- a/common/wavelet_tree.py +++ b/common/wavelet_tree.py @@ -3,13 +3,13 @@ class WaveletTree: def __init__(self, t, n, sorted_alphabet_list = None): t = t[1:] if sorted_alphabet_list is not None: - self.alphabet = sorted_alphabet_list + self.alphabet = set(sorted_alphabet_list) else: self.alphabet = set(t) sorted_alphabet_list = sorted(list(self.alphabet)) self.n = n self.smallest = sorted_alphabet_list[0] - self.biggest = sorted_alphabet_list[-1] + self.largest = sorted_alphabet_list[-1] if len(sorted_alphabet_list) == 1: self.leaf = True return @@ -18,21 +18,21 @@ def __init__(self, t, n, sorted_alphabet_list = None): right_alphabet = sorted_alphabet_list[(len(sorted_alphabet_list) + 1)//2:] self.zero_indexed = set(left_alphabet) self.one_indexed = set(right_alphabet) - value_arr = [1 if c in self.one_indexed else 0 for c in t ] + value_array = [1 if c in self.one_indexed else 0 for c in t ] self.prefix_sum = [0] for i in range(n): - self.prefix_sum.append(self.prefix_sum[i] + value_arr[i]) - self.left_indexes = [0] - self.rigth_indexes = [0] + self.prefix_sum.append(self.prefix_sum[i] + value_array[i]) + self.left_indices = [0] + self.right_indices = [0] for i in range(n): if t[i] in self.zero_indexed: - self.left_indexes.append(i+1) + self.left_indices.append(i+1) else: - self.rigth_indexes.append(i+1) - left_text = '#' + ''.join(c for c in t if c in self.zero_indexed) - rigth_text = '#' + ''.join(c for c in t if c in self.one_indexed) + self.right_indices.append(i+1) + left_text = ['#'] + [c for c in t if c in self.zero_indexed] + right_text = ['#'] + [c for c in t if c in self.one_indexed] self.left = WaveletTree(left_text, len(left_text) - 1, left_alphabet) - self.right = WaveletTree(rigth_text, len(rigth_text) - 1, right_alphabet) + self.right = WaveletTree(right_text, len(right_text) - 1, right_alphabet) def _left_tree_range(self, l, r): return l - self.prefix_sum[l-1], r - self.prefix_sum[r] @@ -62,10 +62,10 @@ def select(self, c, k, l, r): if c in self.zero_indexed: new_l, new_r = self._left_tree_range(l, r) rec_result = self.left.select(c, k, new_l, new_r) - return self.left_indexes[rec_result] if rec_result is not None else None + return self.left_indices[rec_result] if rec_result is not None else None new_l, new_r = self._right_tree_range(l, r) rec_result = self.right.select(c, k, new_l, new_r) - return self.rigth_indexes[rec_result] if rec_result is not None else None + return self.right_indices[rec_result] if rec_result is not None else None def quantile(self, k, l, r): if k < 1 or k > r-l+1: @@ -89,19 +89,43 @@ def _ranges_intersect(self, l, r, i, j): def range_count(self, l, r, x, y): if l > r or l > self.n or l < 1 or x > y: return 0 - if x <= self.smallest and self.biggest <= y: + if x <= self.smallest and self.largest <= y: return r-l+1 - if self.leaf or y < self.smallest or x > self.biggest: + if self.leaf or y < self.smallest or x > self.largest: return 0 l_node, r_node = self.left, self.right - if (self._ranges_intersect(l_node.smallest, l_node.biggest, x, y) and - self._ranges_intersect(r_node.smallest, r_node.biggest, x, y)): + if (self._ranges_intersect(l_node.smallest, l_node.largest, x, y) and + self._ranges_intersect(r_node.smallest, r_node.largest, x, y)): new_left_l, new_left_r = self._left_tree_range(l, r) new_right_l, new_right_r = self._right_tree_range(l, r) return (self.left.range_count(new_left_l, new_left_r, x, y) + self.right.range_count(new_right_l, new_right_r, x, y)) - if self._ranges_intersect(self.right.smallest, self.right.biggest, x, y): + if self._ranges_intersect(self.right.smallest, self.right.largest, x, y): new_l, new_r = self._right_tree_range(l, r) return self.right.range_count(new_l, new_r, x, y) new_l, new_r = self._left_tree_range(l, r) return self.left.range_count(new_l, new_r, x, y) + + def range_search(self, l, r, x, y): + if l > r or l > self.n or l < 1 or x > y: + return [] + if x <= self.smallest and self.largest <= y: + return list(range(l, r+1)) + if self.leaf or y < self.smallest or x > self.largest: + return [] + l_node, r_node = self.left, self.right + if (self._ranges_intersect(l_node.smallest, l_node.largest, x, y) and + self._ranges_intersect(r_node.smallest, r_node.largest, x, y)): + new_left_l, new_left_r = self._left_tree_range(l, r) + new_right_l, new_right_r = self._right_tree_range(l, r) + return (([self.left_indices[x] for x in + self.left.range_search(new_left_l, new_left_r, x, y)]) + + ([self.right_indices[x] for x in + self.right.range_search(new_right_l, new_right_r, x, y)])) + if self._ranges_intersect(self.right.smallest, self.right.largest, x, y): + new_l, new_r = self._right_tree_range(l, r) + return ([self.right_indices[x] for x in + self.right.range_search(new_l, new_r, x, y)]) + new_l, new_r = self._left_tree_range(l, r) + return ([self.left_indices[x] for x in + self.left.range_search(new_l, new_r, x, y)]) diff --git a/string_indexing/lz_index.py b/string_indexing/lz_index.py index f3586fe..9b6b173 100644 --- a/string_indexing/lz_index.py +++ b/string_indexing/lz_index.py @@ -1,3 +1,5 @@ +from common import wavelet_tree + #pylint: disable=too-many-instance-attributes #pylint: disable=too-few-public-methods class _LZTreeNode: @@ -81,18 +83,6 @@ def _map_tree_to_list(self, node): def get_node_by_idx(self, idx): return self.arr[idx] -#pylint: disable=too-few-public-methods -class _RangeSearcher: - def __init__(self, points): - self.points = points - - def search_in_range(self, l1, r1, l2, r2): - result = [] - for (x, y) in self.points: - if l1 <= x <= r1 and l2 <= y <= r2: - result.append((x, y)) - return result - #pylint: disable=too-few-public-methods class _RankMapper: def __init__(self, lz_trie, size): @@ -108,6 +98,37 @@ def _map_tree_to_list(self, node): def get_node_by_rank(self, rank): return self.arr[rank] +#pylint: disable=too-few-public-methods +class _RangeSearcher: + def __init__(self, points): + self.points = sorted(points, key= lambda x: x[0]) + values = ['#'] + [y for x, y in self.points] + self.wavelet_tree = wavelet_tree.WaveletTree(values, len(values)-1) + + def search_in_range(self, l1, r1, l2, r2): + l, r = 0, len(self.points) + while l < r: + s = (l+r)//2 + x, _ = self.points[s] + if x < l1: + l = s + 1 + else: + r = s + left = l + l, r = -1, len(self.points) - 1 + while l < r: + s = (l+r+1)//2 + x, _ = self.points[s] + if x <= r1: + l = s + else: + r = s - 1 + right = l + if left > right or left == len(self.points) or right == -1: + return [] + return ([self.points[x-1] for x in + self.wavelet_tree.range_search(left + 1, right + 1, l2, r2)]) + #pylint: disable=too-few-public-methods class _RevLZTrie: def __init__(self, lz_trie): diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index 9b6545b..b6baf2b 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -71,7 +71,7 @@ def lz_index_contains(t, w, n, m): class TestExactStringMatching(unittest.TestCase): run_large = unittest.skipUnless( - os.environ.get('LARGE', False), 'Skip test in small runs') + True, 'Skip test in small runs') def check_first_exact_match(self, t, w, n, m, reference, algorithm): self.assertEqual(next(algorithm(t, w, n, m)), reference) From 7d91b4c6d4852c82018d5e128bfdbbf815728d28 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Tue, 18 Jun 2024 19:16:04 +0200 Subject: [PATCH 18/24] revert LARGE test comment --- test/test_exact_string_matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index b6baf2b..9b6545b 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -71,7 +71,7 @@ def lz_index_contains(t, w, n, m): class TestExactStringMatching(unittest.TestCase): run_large = unittest.skipUnless( - True, 'Skip test in small runs') + os.environ.get('LARGE', False), 'Skip test in small runs') def check_first_exact_match(self, t, w, n, m, reference, algorithm): self.assertEqual(next(algorithm(t, w, n, m)), reference) From 07e0a6c2397e3a12f85938849c1ac38ed320bd1c Mon Sep 17 00:00:00 2001 From: prolik123 Date: Thu, 20 Jun 2024 11:25:19 +0200 Subject: [PATCH 19/24] test fix --- test/test_exact_string_matching.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_exact_string_matching.py b/test/test_exact_string_matching.py index 9b6545b..3138ba7 100644 --- a/test/test_exact_string_matching.py +++ b/test/test_exact_string_matching.py @@ -18,8 +18,8 @@ def lcp_lr_contains(t, w, n, m): def fm_index_wavelet_contains(t, w, n, m): SA = suffix_array.skew(t, n) BWT = burrows_wheeler.transform_from_suffix_array(SA, t, n) - RS = wavelet_tree.WaveletTree(t, n) - FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, RS) + FM = fm_index.from_suffix_array_and_bwt(SA, BWT, t, n, 0) + FM.rank_searcher = wavelet_tree.WaveletTree(FM.L, len(FM.L) - 1) return fm_index.contains(FM, w, m) def fm_index_contains(t, w, n, m): @@ -65,7 +65,7 @@ def lz_index_contains(t, w, n, m): ], [ 'lcp-lr array', lcp_lr_contains ], [ 'fm index', fm_index_contains], - [ 'fm index with wavelet tree', fm_index_contains], + [ 'fm index with wavelet tree', fm_index_wavelet_contains], [ 'lz index', lz_index_contains] ] From 0fc0941e13dd38372cc4cb6cb5013d7801d69859 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Fri, 28 Jun 2024 16:00:29 +0200 Subject: [PATCH 20/24] Add Naive RangeSearcher --- string_indexing/lz_index.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/string_indexing/lz_index.py b/string_indexing/lz_index.py index 9b6b173..73d2aa5 100644 --- a/string_indexing/lz_index.py +++ b/string_indexing/lz_index.py @@ -98,6 +98,14 @@ def _map_tree_to_list(self, node): def get_node_by_rank(self, rank): return self.arr[rank] +#pylint: disable=too-few-public-methods +class _NaiveRangeSearcher: + def __init__(self, points): + self.points = points + + def search_in_range(self, l1, r1, l2, r2): + return [(x, y) for (x, y) in self.points if l1 <= x <= r1 and l2 <= y <= r2] + #pylint: disable=too-few-public-methods class _RangeSearcher: def __init__(self, points): From 92c15d9a31667e0ff1993749e3422c95de93d906 Mon Sep 17 00:00:00 2001 From: prolik123 Date: Mon, 1 Jul 2024 21:23:38 +0200 Subject: [PATCH 21/24] Add Text --- text/LZIndexFMIndex/LZIndexFMIndex.pdf | Bin 0 -> 546098 bytes .../chapters/Comparation/Compare.tex | 3 + .../chapters/Comparation/Sections/Comp.tex | 476 ++++++++++++++++++ .../Comparation/Sections/FMDetails.tex | 0 .../Comparation/Sections/LZDetails.tex | 0 .../chapters/FM-Index/FMIndex.tex | 5 + .../chapters/FM-Index/Sections/FM1.tex | 107 ++++ .../chapters/FM-Index/Sections/FM2.tex | 57 +++ .../chapters/FM-Index/Sections/FM3.tex | 37 ++ .../chapters/Intro/Introduction.tex | 6 + .../chapters/Intro/Sections/Intro1.tex | 44 ++ .../chapters/Intro/Sections/Intro2.tex | 18 + .../chapters/Intro/Sections/Intro3.tex | 39 ++ .../chapters/Intro/Sections/Intro4.tex | 451 +++++++++++++++++ .../chapters/LZ-Index/LZIndex.tex | 7 + .../chapters/LZ-Index/Sections/LZ1.tex | 404 +++++++++++++++ .../chapters/LZ-Index/Sections/LZ2.tex | 225 +++++++++ .../chapters/LZ-Index/Sections/LZ3.tex | 0 .../chapters/LZ-Index/Sections/LZ4.tex | 0 text/LZIndexFMIndex/config.tex | 28 ++ text/LZIndexFMIndex/main.tex | 23 + text/LZIndexFMIndex/packages.sty | 44 ++ text/LZIndexFMIndex/references.bib | 286 +++++++++++ text/LZIndexFMIndex/titlepage.tex | 29 ++ 24 files changed, 2289 insertions(+) create mode 100644 text/LZIndexFMIndex/LZIndexFMIndex.pdf create mode 100644 text/LZIndexFMIndex/chapters/Comparation/Compare.tex create mode 100644 text/LZIndexFMIndex/chapters/Comparation/Sections/Comp.tex create mode 100644 text/LZIndexFMIndex/chapters/Comparation/Sections/FMDetails.tex create mode 100644 text/LZIndexFMIndex/chapters/Comparation/Sections/LZDetails.tex create mode 100644 text/LZIndexFMIndex/chapters/FM-Index/FMIndex.tex create mode 100644 text/LZIndexFMIndex/chapters/FM-Index/Sections/FM1.tex create mode 100644 text/LZIndexFMIndex/chapters/FM-Index/Sections/FM2.tex create mode 100644 text/LZIndexFMIndex/chapters/FM-Index/Sections/FM3.tex create mode 100644 text/LZIndexFMIndex/chapters/Intro/Introduction.tex create mode 100644 text/LZIndexFMIndex/chapters/Intro/Sections/Intro1.tex create mode 100644 text/LZIndexFMIndex/chapters/Intro/Sections/Intro2.tex create mode 100644 text/LZIndexFMIndex/chapters/Intro/Sections/Intro3.tex create mode 100644 text/LZIndexFMIndex/chapters/Intro/Sections/Intro4.tex create mode 100644 text/LZIndexFMIndex/chapters/LZ-Index/LZIndex.tex create mode 100644 text/LZIndexFMIndex/chapters/LZ-Index/Sections/LZ1.tex create mode 100644 text/LZIndexFMIndex/chapters/LZ-Index/Sections/LZ2.tex create mode 100644 text/LZIndexFMIndex/chapters/LZ-Index/Sections/LZ3.tex create mode 100644 text/LZIndexFMIndex/chapters/LZ-Index/Sections/LZ4.tex create mode 100644 text/LZIndexFMIndex/config.tex create mode 100644 text/LZIndexFMIndex/main.tex create mode 100644 text/LZIndexFMIndex/packages.sty create mode 100644 text/LZIndexFMIndex/references.bib create mode 100644 text/LZIndexFMIndex/titlepage.tex diff --git a/text/LZIndexFMIndex/LZIndexFMIndex.pdf b/text/LZIndexFMIndex/LZIndexFMIndex.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b55c83d37a2ebe9bf9a4201f1e7ccc6808bda8fe GIT binary patch literal 546098 zcmeFYWpLy`)~0J}Gqu@nL$#Ti?KU$rGc!Y*nVGT8%*@Qp%*@Q}>z4<%V_*Vk0%)WGOkW!`f&f-#W)@}ujntP7pi%fL zPtVLq3*h2{HncMMTPfiG)biC4`u|Ty6kKc!0W>OdhDKlAFtjpuFaglfesx61)WX5g z?rUwK>tHBgsBdlXRqUT@8C^R|d%#~6_^mA*Ev@VUUpOh3xmD^8w;Dhz5xRh7bhbt zgZ@_uIzu{UMkXd^U414)MixUudKMNvLpFK_Juc0^r@`LA&QRA9`ggj%o`KGmv5uaO z4l5LXvzCv@z%C%xkWHlxQh>f4IL37WFtdgrK>iLMGj`fVzKAbUj0c$c2`fUcM(5qX z_*3ELfS#7_WAg*vf*y32lub-7$lwpll@$h25Z*AW^VT}87`z7(Q05s5=-*TSuV(*G zUj+ZNmjAK;qriU@_>ThrQQ$uc{6~TROBDDby}vy5U&tz@Yiam}Y7DghWGrbLLn~f= z2UBY+0P7c`{)57#9UUybz}6nXLjNx=lhHLcl+e{PwD>3RNjZLzpZZtfzw{^n1%!W5 zkM7?r`hT3wp z3j9|${#!7W|AN*pto*`lI=a7r{V$SbWn}tyw$su10=umX107u*9Sb8w18)%8v;-fx zA%o^FFjW6($f_;=Ty8&EeJ(#2Zy~>UL}QY$Z}J$sTmc0L1!&|jeMVJ%Rp|zA6QnOD zz2NHL`AFd44}1$$l7Av`VR~qun2NNvS#;$M4oqIwk|(NUyBqoG162L;!xU@D{AJI1 z55FON`-a|4fo-i?mU@Osgf`Y0?ckI0>9pLL`#2x9f8lxTGS?<`(3cz1PGgMoUiUP8 zUvFKSTS2eljx?;2t=F*BRbc6}jO8h~Shdtplzyxs8QV5XuT81$=-P|PpQbcDL{G*P ziBdbXABN_^YUSR8o=2&6|LjLCUR5J``EYyZcRvwGuI~6!MJup;Hi8$5Xjtl0$xT_? zpS=$~RmUEg$8}ieqNx+3M3+^=9!X>bV8WAHg(hv80J5#ULnyTYD-iWrR(o~fA!qO+ zm27G@@DW0D;yK9in~&B|fXemY&^Ihdo}I_*B$!kM_!+{K}6CYs>Ejn<*QzMZG#h!5xOR3 z*CrWcryX7Khlobec{OH$^=J-K@*r=rVlhcEG5#p}{ucX&-HGR|p~K1SgKz5xv2?BT zaN;M%^8k`HU#iOP4SkcW>r3JN6I21pj+`{-5t6K6V@NO1WbkJbXp!J+7LqciaP080 z@2*&lBvSjTOXyKXl(!2@l25ZCQ}h9Zp1h3}wH_kS!5jH{T!AwRM~Y=~c}h9a>RG1B z1Mo8QTIVvCT}f?*@wk3%jLX?j7I~hR#zm`Gv4d!aWK}tH?aCMsd?RN6D9Ng%X8-SS zQxJLXS=@qOV{|-_j1VTis37#^I%>c){K*O!80CgP`@^oj0fhmzb^rf^m%oAT|9hMA zTU$98S~>i6IL!aNy(PZ_L|s1XzhU5C7sUJ(C$g~7{|yDd0?MzV0A{v-IRZIDduvBK z{VzYn`d>f(r*HV{NB+$*{H^+b=@|ay2pH*D|IHBu#EQ#f|3vh@e5GLR0p5Ebn#Mq8 zQp$H|!?4dXvQ1i9#thTdThqovl3%)|Ds%O6%OGwknfpeZAZS&;0?EuJr*;0sUT;A~&}Bu0bal18M|Wsii1ul0yip zBHL4Mu_m6`OOI#P;g^|M$8UHa+|5zJ+EaG+Beb0tldt|8Q3x`*20zX#t}R@nZnE4q z9q=vl*!8?15OGL!sxnYwC}b%xH6e)rsKXFz(;itnzlt5O^pNBCV^I!Gg){|_pu*>9 z@j@{ubvw!c@l><@K^JuG(BaK6I$>6-h83hjiBiW47CP8oMj>stN~SiEHxQ_y|Gf7Z`B;`J^C9aeKl zQp#Jq*~Pq7bev0>$R0;gMaWZ|PiSl3@;AOfUE0<=|94l{-;3ja<6P7IV?g=W2tfZe z9?*R~@A#MbcR2j7v4M_`h5jE8VE#wsZ|PzvqdDF{b#XBP?%UehYJLUuV@buv`D=l> z0CR@AxVXUe9UXn1_;t8Y?ppsLfvIUfHeK7&Y9<>iy1dM8{(H$!1yeOe;1n$9TkP$UW>e`%{bJz^sc>52bBJ4 zt_u2Tj`_LxV!bf^zKMnmLP60rH`9ly154jnC#dt0WL48(ZsO>aCH#aBBf`jBCurk)a+<~5_L9?E+-qh<2K2P z$gdy#ME~T5R5jkx{v}{J{z*P`a~Yi<7oX2Dq|iU}T)Ah&bJ{8Qj=Tu+?&kHOc(hsi znF#W>t}M@?ZXWeX_SBf-H9}Wf6jEGNI{WDi`ynd6lDd?lwvq}u_ve!w+JW)idMmRI zdg^AA;SP`dGldN3^z$W@$5>e0*&7#TwlmOwUwhN5%=$A73Axt(QO`i%5Q4U*)(%*u zS*01ct&EfFGpHgYESF$t;TdoF^Wg^XGt(BtXN{K_vQ*rO_^n&Xl$(FlgDA|1JnPuF zAiQISrrMWA+DZE~TOD3484FhfY0VUIul475X$TCM3k{3W4W>BIT^$WF zYx}4n=guTyKY3zcEgcaa`EKW)7=U5lX8#ntPXw318n28qzdmdv}t0C4v5_HBbZVzL0Z(h3Zz5ofRQ^0{kn}}RGqg(8;>6@MWcIhJgiS#1A*yUeZIYZ!~(=8n}CpCQ)*mA!*SgREGuilwO`rw`dn(J*33m$6f642cRsDbmp8t) z*3$sH0M?C%(ihK3LYYj(ZVxD8Ob;DhhCLX9%iyzOQ26)j+k(q&QRG(=w(f$DOgk?2U6b2hKOHBYbaxhCtK|@2AELsnI2laMwJjpJrVhn(U|of{z-@gt~b~B38v%^ ztRhZWCydu-5E2gCdh1IRa720P5zVrx48D7KG2#-cNN9h4w)wW&jJ@J?Ail0G607Qm zhITENb4lNuvWi)Rx*djpht!qqqZ!?wf#(`HpdompE3hj)5(009CGDXKmC2NOtTk30 zt|aSwa^-5kS37)EfYiuB>BnyaDqbeXMr`XHYv5gv63s@WGaZ&HS(Dw5ksQ3JFlOUH zq%0y)m1)$mTwLkx=hJJQb2rrJe{-jR1qs<{g335Slmur=BQ%H?f@*1U$QN0@1MT<- zzvxUDYutbft|cHh8zYchEfQT8EC4Mk((xc3irg6K3>t#+)kVO^TZYv{gj1)?NnvW$ z#T$Yf7MF_E)grtsXqCe4_8~EgBj#@8Cp6F>3JmYS$(qrVLwizz%+JP0<3>YBfs5m# z5DMGpbxKgR`d!9z5I-bm~&66XBMbCmR zAR7H=J9n>cYRQD-0b;jA%`wouf;oMNG}!&p$w1uVxF6TsKy(yYe~Cxb4sdNt z_cJ17aSC~BW41-PG7y=40hbo<Hz%qc2N*!EOSu73KP2@Z!M{ZukaE0 zcvc`+Q^v0Zwtda1<%w;o2U90X=AfWuY~wu9bM0xr23-qOe($A7U2J}m{6cUo^sbfu zjLW;eG!{L0SpG6>2=-TwW@WWT=HUR+FM1E#AQ zP4@|AhJc~85n&V7BiEH?x~j5R0sd`Fdh8FQj2_`r2Go_4n(HB=?jlc78W*-p#A~lS z0*yfG_sTbN>9RVyAwT)?KhMn*%o`Y0!r_rUl^*NkHBBIUmj$ukx76Vy+~Wn{UQZlk zSd;n9BuYnpK8IRfNk>oB5F$?JOFscdXsJ0@`V;R%OsM)`zYsmD#X<8u`^4~^bHw_` zOVQnPqq4ryw>-P!ki)l={!u zN@V|#BMUJKg@K8S!ehr*xPfBuK=+Aaws0)W>PxHd`*z1K;%=U>ZH-d z;9Z>mY*?5*&Byi`K7^2;_j6fk77x0rJxJpCel|YfIFiVrGo>Bla##O&xBX07E^bsE zkE^46Txq?X%6z%h`vfyzA=`BT7yBK0!Tf6tlnk*pU*Zc@4)%vxFtN` zIQSy#Aq`jOAK0y2px7)66l-)G>L^?}ZlSN$&{Sm3f6GE3SO>a&xxX?Ge4eo-Fzk0u3mko>F^=Xk|}8osobyr9N1(h@o|^ zB{B#E|E5Ov=XoCXLI>i>ZwK*pR_QQhY09PojE!<53VcqV&iZf3j~b(?;J4`UwY`to zW#NlF{KO2bZR6d-pi^s?Rp*vkqk(0j6&=Bjlx$!X{!!oeSIVw=+%d8;MF z`4pk*OEX7^&&#?85(%ZWBYUatVuz}T?onaTjCnY2E49X=9cnso9m6!?2sRcbvm3+I z1}=NNN`$v@{Pwpe0cX;j9P#F<0?uk9syFA52KAr{!z|9a0R2o`bCuaurVXY1JX}{L zOSt8E0%uAM`*i{ye2x)HNXPEbIVHfhtsLsgAZ%Tt`kWZ9UYbM&0P-hBaNl8V_02uT z@apU|frv22wbNkMI?a-&fjEeXh$TZ4JY7O0iTU?aeNI_;5hDU9CNx7`ghKdlBp5{z z(WU*!zJT`Ns6g7abhwdL57j4vvP>fEh!xz9;XV3njbDkp+lVxQnFG>z3eO4_=&KCB zM31}MPp@fx>iH%>AUczVc*0@6O@PP`##AB-y7%59QLR{u1{I0~YWXs*k8R$J>)sM{ zi&4S3(mkMiY)esJnl{pwr&TMYj6{wLk4?$!Q-g!0=Xcx-ff4rX-&Hbo$hE ztq`vGt9x2RPm>c#TW>#!Y(>|+&CU#1hJLVcw3GsYNu)&uInrhqtQt+ znb@^kTPS$tz|Rnhcd!$1eu40vec#M$6ynscIC>oX+W|8K!3J%zjcZtn{KOAPT+74^ zcnbq)dz&8|)!z|3%)DOViSx12eYNgQCyGI5obd4Im~0K<_Zkg`W13TiYW(XYdA`$b z2%`ucHzKCC6{@t95&mFW&rvpZSuM}(ZWq)Uk9+ESYJN+tqm|1SBoX)8<~mW)I8SMv zN*BqZk&ePZgC5sGtlsiEro%hU377<;Qw++N&|l=ZAr`nlG?lI!nza1>uo}=;`TZFP zjC`DJEBAZZ903`Hf}jRbZ9^j`DpC1@DjQ|@VS$I0*&!#;NpA9TFN{c?%dVV~6nq8m ztAxURK{@yLoA1rhdFaiLTaTU+1?OV3IBNG9vH?LmxsK1Xh0BRj}%TTC{vDDw%&9XvG0f z;yAh+BZ^8pMdJwMknlN^r-7roRAKA5*k-WV zx@wg|{Px_Ozlv^PeSJY!$DKqF8ijnh$z4R|kc{LoJU|K?d_4)jIJhxxjLIp2!8-L6 zV-Fd1Un$fgBfQxb>NHV^9yzem*|c3%y3)f=^?;PJksoBAG5Z-W5RPF;3l+NqDbm}8qrIAzCD}X85^K7Se`^ezU#?`VZoGzXjmVtyToZ{QC;OWrsScbQckgJ8FEe-IU9lW zludInxbeHk%KB)iG~EO8R3qm>+A(sW(4>t2?2V_nN3_%$6Q2F^zPCP8UZqVU42 zRiyS?l)n6TB~4BF%0rajzo-+72d7hDWHPHh>ow;IM;wsZNJ|8^2mP@x@B~Rc&%H0O zF4$C+YOTA2-%8~QoK|e?Jl(W0+!>b*5<=`YD2uxS`UE6d-@;?qe8y6p?sI)xts3p} z)csN;?9B>-ZgMb+i45V)o`z#drbh&X=mTqE@>jaJnjgygeuC~fRS33U;*9Y^w2c{*Fk@7p*N69qW^EHgOF( zq%LoGR>WcXu#{XF$^ z)*sWFDBPZmT~o-Ja&gF5LHwxJU>tb*9Me)s>u2lAjn8n~KN@k#8XP*gjAuWU*c*g+uK22?DER*y&wy={Qs%r#1t85ey z-YPRjKU+^`J1fnlvt|=8k4oj<_)TiEg_~w22R(F%h-pb-xn=bPS{Ne%EQt{+GL~B> z`596faH@mAI=iuu2@Kk2)Y}m@-9pnODWm6?D%I{dRcj|0Dsd$MQTvvKrTszf!fy1! zaoJ`64dP}OL~ugL&h-K#zQCu8A41u%+*fGu&fVmiOt|=mZS$Oz{a7c#3asI|D+{Dr zFzcZ9yKv8JK2}BzFCM+`meByq&#QuY0;*_1J<70njhEbF{$70TX}$-T`8A+s#xoO^Nzj@idB6P7}gDJW(`8Q1wNkY$^XoA471 ziP^}`Yc^qHZ${c!d@9CxFkgKm!_agQXNlc+v+P8)HW<&&hk1HM=*g_Q{zE!i%b!K9 zIgkWi$q`y4Gfb9RVsd|HB>pOD@xwPRB#;W}4=s_An^9HuOddzT(T6naLj-bD_vNH( z%k1J(N1V$W<|2ir&eul00TwP635gLkq1zGW)(`xC1t2scZPyT(qrpT=%)d^B>FGD;oi^qP;h z{)(oz**L>@@Q9=*N%sB^=%9KS;ZddUme!nEihCIq3qMCIHnF9{$(!Vnw;Nk}i$?Gz z9$0Jf4?{JAdLVt?Y7K^5TgPeAuJY5uU4;77lX;Dv^i+^F5_?iPyxmi}!rw#VmH2E| zb7;)vjCL>72K#XZBqv?(elMc}nN1(2q-7in5W~3E5W4V@Kh~mPQ09o^CZ3r6bufN+ zN%r+l{x+MwiqNOW9cgN>jp(pbfe=Tv7`jv(lv0Y|xMqTLW53bqcV@dU3WsIcz zs8AEkRlprQGUJZS3XY(C307srNLn({|FWK{^AT^Zz6{FTD>l!_Vk-0Cq_g*Ubcdt~ zj7)l?YfC}oC-LqJ5ek`H#av&6m5;73f{VAWKE*os3R}*Ga*WT!dp+k1WRV0IuZ<-7(81 zGO-&8;^Vkry%xK6;eVJ~s_+jz?jA4_?7B;se+In|1Cuy)LQGt|UoI~7(?p!LI`te3 zF5HAhe+=eKsHwGGW8b$&-l?rsyA>;1jM5f42i`Ltp|d?l z2cIBeu|2VpYCf9Cjxr4n_$SRfZQ%W2n^gELnF~w!ZU_G1|=G z*t?3CREw*>T5E>`SDt)1G>*zV!9a?xx``mgWlkCPZgx?EAmXA5c__N2?%x0BKz&&U6citB;c*eDc7_F91Y|R8T|V>b{;VVx za@51PF*yf?7;0Z-Ge2CJdBp7p@(T@0=v`LUty~O08YRL+QIn=I$~ErKxQYI-I&xq= zo|Mr|jBoRz(f9a-c}l5F-B99k>UBZs!R+Mjq4lDX0tP1(G;z-$~b8IzZACc0cs1N^c8CFo&9_V>03=ps1@U6<8V#qXHK7 z@W}^+hm;n*8?DwTjYW@Y4ycK88H-OtU*PWGvEkD8aLT~??YhT>gKd#nn%zpQR2gTz!re|P%qYVg|zQ9c6}7MgmLNXz_AI7k79lR)cTYS*9I`#{Nb#O@dS65zU01Lb=R z$@>B7f@-$PvLn66ylD5o_F*M5eXcQUdxmBMqbNx=z|^5Hgu$6`0uozA!A)f= z+)lo?Yj)BBOW(Orm44@Ee{dcBK7@(0)81;6q2J;hTMf#TM^>ASU?lq++Jpm!DD<1ZegDA=|6-GnZJGId(4@k=3I|DqCMp>Pos|^N>E*$J0iK zI>YK@gc+?q5;a`p=Vd_bTjUQlew$I6QAoBD;W5E9yv`?m;Tck(v&H#mH$j%sA#f_q z(X;#p#)QwuGxmm|W`X`XX6JKdsU}Q0&mc}Vp9FhylGOT;U${)KHW|o>{7nFx^4Qxd zS3H8u_NSJa1MhBrkSwJotogi=Y!v%)Chj)J0msr$kCG^~V-^T>4D2-X!&CaXs;55d7TQ+=p{jlg z$;Q`s?O4PH&src2jTjoU{UT<8^>ABz{A%%e3|z<*nrYR8Yx(bfO&a7qZb@@38F43j zK+>)@uWV~CkUSrgP@dpq^`4J7-+FKBU)1W_N$o#1(Xio3b{DM}YVh@4l#FQ&nnTZO z4htjGR@Q6AiXc{?t>=1=I<7#tj~$NsH<9pof8TnB5J}elrfN9&Mgwkkylnq|-+-D@B4s3A>SpfN3**le->F$-X4T>pPP0Okv7*4f0+8xj|A1yO5I?C+blzD>!yh+wGulN z`;Z-^+MBL@hpM8iM$OnbY00M;H`dQ4fYU zj6)GXB(1*?DnEP^(0Qkd%k+g3Tz5E^f%1o z^4X0Kin;Y5%>8!(tORC1wgAXzDO25P11BpOVq~ojZZ|gz{Zt@iNVWuM3$DfR)6mRG znjE=?pciW9Z;mmgdUx=@TMaUPn&Xh{Pkt8&fIkehk)JcVIxP~^Anq268EwT(9R$%I$=B!G`?g5bLM*Nn5cfaKL_xIh7iyPqa@y%S&J^pK#-rM4}`^$LVF)+kyAW zQf{n(mKad`^W*7|Vr=3ktJvieDG$H{5I#h+ys(W}00^ zZAB{Mh|-B-H`Ke6R9-AomtE2*7}0CB4O49+Ih9t$b1#b%^=j3onYm9bHt(TfDED3x zv}s(r*T(0GpxL*u3v;^c%S;}>&+H6I_mO-@BsikuhzU zarA2Z!c*s|PSTKqET5McsjMJCfsueLBwxo~fW%O8)Oj=4+Y-D2fk~XqY$i|vqK#15 zXg*T@i6%rOsR;n%8`FgwbVaxkRVU##iushd4#(J9X}fvVpA=WrOZknZeo*R4oO+KkDpla}}e) zd`LV%`?Q0nSI@Ofs!Ry>J?9fX%Ess(i6*cVX&ZI&o7b%AoL?T59W9~>J1}*X zk{(*j3{jJ-R(K~Fr5bMJB@k$X7k8UMqq6lm+yV&V?tZV6xCW?J()_YNyeS}`%tmEk3hu9wi+}ZJOA3sZWR#;w@j9$u_ zrH>EaOR5zG%}?Vc)5X$8YHOmT`Rr2SrLuxFhs*&1)id_DEPJw@#0SL|M(A@JNN?>< zXfXD&E0B!u8fut|x(iQ^>w-}hsBy~thgV``+YHyj#8C~-uqTZP2^7fZd9+k`x8u2v zZmEE~*V5jUrB*hNo@+_O&0~~jXGyD#i{-ZHJ2OK!qlTRFUcEYN2YkCr@nJ3A!_f%~ zp6?nXdo}gkRyF(a9F769o;W8e*}N#I@H|de`8#Z~5C)(oEEFU5WQ*fFO|l=C5f|@5 z&o^(NM2cWpFf%4cwR;0hrZeRrin)CbX5QO39+@k^kp7RGH{MuavvD)}!Z&nZ;6ZA3?qihHtmJhCvh?amMX%zh3FrMt=8 z(pfu)HK1%TSy9F9KhLrL*nl*wN5jJfYCoieG1`ZGj?fD#L##y!yaDz zDg>qfbrhJABoQ^e>6)+_83hFLilaFN%t1OB6pP$og(@&zjg%c<`!Ybub0q!@0q5Ri z^8qMVncG6vj5Ig98B>;y;@cy=(q53gT^#a{=LMrS?hQF9zbmoCZuaviEz$w(xo$$w-Q$aC1!jfk<8?SRmJ|=e z=PfSYL>kmv8jE)s66Jo4%q2pGH!6z@CbN?}Q0;Fs2JdhFPjA~}v)kz1i8P?T(@UQ1 z5EUsHcT${zzk;!Jci8LibT#$|S}KCw>jRQVBFRvkfSgSE?(TFzIqdDQ7!vYpB%4Yg@=M|fqkfk68< zYqOCEOe*k*n?sP~s#1IUt)QQL#Wv|L_(nEiS|^rVB@zOVPSoS$@(zp)!S{eOXl9YW zeKN6+EG|VySvUuO8SsqAM7bur@{2S9E1jOLYv)p$n;~?IfF3cRyorg{d|&{Vy3(@; zK^QfTqVe7@W2Pxxdj4_mE804XyBMw8^XT3A^^o1V+-R@T0#SAzAsA^pF{Go(F<`lY zcT0z{cDST3|6L5J2Js^;m4tr5=8H7eMlf;LeP^uw8Lar_(+hO&8L-?2PB@eT>-$)W+nPP0V{} z(?gw2YC?_a(|4HYq|X%?qz=@vy)%aEg2@F>1*IYPq(yIO)C#-$=mkWi>{6yz5AvK0 z3o~aIf8cid-_Vi^wdLUQSVrIo*qz?rkMvGTUg!$vX5IDR9vWbXnC)LviGPzW80ODY$%SEAAZ&JN$CHkOW%xi^5V_5vOR#N4w=n& zc?wBgId7-rwr=s>)FWu(TN)IF!hn@tj$PQ}!)%F?!s=9K?9SjtOQ;q(1Zuc|IXHY) z@iDPU(RX=-Y6>pjIRi@Eea3PJk+ZLH8gfwL@D6B-<%SBR!?xLf zW){i(h_G^YC>OqTZ~6Vi3LVxI1aly6DkyriwyOXE&9`9rYWT;;EG~!Hu_|ZHp!*fe z9k1RqGmRw^vE9^>>V-$3s-^j-0#r(AXM`D?J8Gid2r!LNhTB!gU8D%Fc*bhIlT_wQ zT&ao7TOF6cy$zOQbR38jGMH~R0jsNXq?@5-Gw$$;nX?~TS>&48P@XQFX%nJ4D{p4q zpY3408)?v0CZSU zB+kNPvdI1LRXbAB9oVts0_g&V^Otc?I#BEZEf-C}C-q&*3^1iwWx^L$A7WlKjM`*b zHccDEm!3W{q(hP=!OnIKm-C>Xn8S(%l zy}Q)fkONu5xxBY@@D$;m^FH@m*uuS^e{nUyXA_3sJvWLgs4`7?S%TX z>IjVTS?(H?7OGy^mYJYn5Ue`foFNmHN?v%8{f&rKqjS}uW=Wn-dc71S=SZxv7gKYzGO}|q{ zUv>g^Bk;ZRtD;FP&X%^|SzRU4!jwOlmiujdYYr*~X`>u#6IA%fU2GnoaUW z3qLO)ginc<^_YVsf5rvBDl_m$f;i+^-+FR-$@${wvGY&}Y6hR<_>UK_fHIpE8X13! z;+9~YjINaRady=%<(o1dW!=bP$jJAS6LigjZdLEPR^pF_^k53q0D58Ro8ucdOIZ|@ z>|%D&eB(|HbpLNt$Nx@d`rk|)|2v!iAJgE!QpXwS{!Z!tr}=NG;|wgUEdMs2z_Ll- z#@ZT*3VU@x^d`)5wc+n4jiljE++N?;w(BQ@y|LALN#jG({CerBYJ0{8-UBfaN^PUP z?6m`}VV+^0{uT&n7}+p=O%*K=#!<4ah-knc2{=v4^K$`MO^IfplLBHOLwN`w0%Y$H zAD{1mC;*Ggtz(0)tZ|Sm+JZM{q%0$S6FpP&V=$T8#%kwU#s;9Prsig-CbrfYw&t7Z z_t`uPGiaM?+Hc9KE6E`K`>2dW`+dMDJ9Z^Nw(=bEB>X?TeM@5^^67X6W|rnAmgA9h zjK1C&JWB)7HZ(F+za{b!8h|CMV6_5;ON>*U>l}UV zr{(=g`Q?4iZHlZM^qKhaeeW%b3#gwPR}Mr%V*Eq5b!BM?b0rmo)c@QPM?+KM4^Yhm zae6DZmpHUk6$|NN697$e3J|-zYBR<WqI%}^*CuSu)y{&K%#DV3QOLKmtPCZ3QAyk(RN7o9+CZ;uUm1Z! z{C1{sCcIP+hTWCijwB)0ha_-FveoiJR!)7Lw`!+A>tGT%()26VUBw#pSr^!7HmIol zMkXU!V{rIqQH(9|8b6F-03+V{!H>X_q>P$#B}D!tHrK$^qc`P3ei1OP=8vZNbasyGZz1Zv<7k zRdyxQq1BBXxD+IU0Nyrh5`s7}tdJlAgRW#Hx=&245@)D5!MS8soenavEzT-W3->oH z%dB{rA2LlOC+3&)zhV%A!QAK&6kCLGoMP?}M#N$A41EsVb&Z_|a)*GF*vP$He6LbR z9K+D#EV`9t_X;1ccnC1MnQwl6%w3x8v*gzC9135^&EHU%uti+;)3uIx90izULF*4W z>4E*plX8G6;7i!oO!@)E28)i}ppzb1-QvTb z$WmIiWo;nHHYiT`%W0J{KM0Uyg9mol5Z@md^rpbhyqA^-P)>R zORMshE-gdTWdKc2!l@xS7NPk#Y5c5IK)X)iCY%Oi^X^@0Vn&R8aZC!g)g!oz|IkWk zKGnDSBP6ctRqVJNf}L5YAgcI^`f3kVAJ(#Q++pmI&Ix;r%;h*93pEGYeK0=3$wkaEmubY+s&s75Cbc*`}t`s6+hi? zFyn2Vj*Henj*HVNX*Vmsf30`dfeXt@nPxH%R6xPq&&h*1#Rf3#Xh-d@nRn_@d8}c& z6P*}Z)vw(Kq+&11chZJlu-*0)0ZlTFFZJa~WeqKKfpd3KbP&-L#C@j?DJCla1S5Dl zK;(T}0b)XF4@M>LXIlks>oHKri`-6&3p~0p8N~dFz$JwYvN+~vq_q_H-D5heP3Du@ zqse9P0QBgjhz&2!>ldsSCOh4$`dtRg#Xy#_q*#Pzi>#V3^9 zgbuQF7zCa~tf*1g7N1UGm9AWyk2_u!ucsGmCc_43gu;4JeS>$V*o`15_cc+Nzc_%& zKTxq)#!@Onmi)jgeuEDBxX>Henstp_{D`@yJrXsGeK~7kvE95^`@4PBx&6DY6co+t z>Rq?tJR5!ah4ZMM?(UKKd$ZQw!J9BE<#l!cInQFu(Zz_r9DieAr@r??;8VTVrv_9}ZtsUgKEmR@Kvc*E<`t$9K88X0x^*to;x<&&%wIt?xW;Nr3a+ z8m!NwcvuNwa%V~f$4u137RkOVgzDe@D@eQaeG|xi@iJ}wmvL8l;-2^Hbc>p_?GzZ| z1P$V@aI|^r0<8CL&ul}tTCt9G&;onx18e9ohcm|E>Z@e0`GUX8RM8qonJt6U2pT9G z78S4;6i*@#{ZIfIb=}-Q5zZ*^q&jmMuY=R6x}5aYNZZyNA32S81W&cno~=t0o>w-X zo1X@qBHPsh&tPJPhi^BNe;YGruI<0>h>=HkSUgQt3D`F=q07?}pV%N(LOODei!-2x=a+Mb{=~WdEvI}}+Fz(%@iQ<>9#t^^c zpZNz3^`N196e@ZnQuEy#@#Po0g?IYh<8RRIuCHJDM zke{zSGGs9S1CN9C)L5oL2v$x~4mbR6?lG|;WZk#WD;~&C_sd4nn6mHGcs0N?^7edZ z`a<&2fOzK{f?1Cc{}VF&_K|#4@93VR6!_^fn}p)w>x6U9C*8b}=?pSaNdvVX8TX2n zqBZNIko)?PWRaxQOeOfih(F#^j2*H@AGwnK8h!^80LrLkE08b5FZpRUm$f&?HXek$ zhb-FsUcp-@^TgwJQSey(_x(~9T%PlP?K^X$+N+S{2JM}Rh`6951Prs?By$=g8;KWj zBcnhB1(Pb8;Col{aB%#pXce(MqhFe9<5+hgiWtAE2Sw?24bbsFxE!gY1CNmvNK0ZG z4ArT6UW>HA99UHDiQDhpJs37{ z-8D1%T>W8iR$NXoZ)`5i!?D)b&wckW1H+2*))T}1%tFLgW`r3T#3YdSp|{H{!VgWu zxM1VHnR4X~Uw7m*xX)HbXn6dv0k;Q zjeqE&-Y)5;oS@)=T$^@vq+3<`$ZamqyAVE zHANGTcJYRLMi-zH_x&p)Svjgi4NaK{nnkwjhevwH9Ohk~xB<b? zpVvEXKlMk{sH)5vF)QR+8+g@2N2=DDow5mij=S^O*+V~Iq~Sqt$VyWr;B-UGQ9$z* z9MnKGICJzmb-pelnc+YoQ|8x?FyzvE$rGyq*7Pv2gj}n)se=-aB5L&_T2B_0v3xP+ za3scWpWwQmcGfZ-knrpj^7*H}`xO{rYC753bl1Q`}ERTtE`{aH_f)tp~&B`G0O zOA}>6B5p+JPYmC*dYBcX>cHKNaQQevPtj9svU0b>2~#Lu74von?l2SVJBp(kbs4Ml z`LD~>-{COoAtVEPPi2!zyj$)S7)C*fg zC&cn%r)&!@R9JOG5E$~-NzzFW$C5pEOjzTS7>F$VOe8a5X(75i4>?Gz!^CV&ZJxhM zRRb@Oedkt|onCVUXc%e?{#Ali4s6VcMkiv%`m5D|ij+ov4JuK*tOwP;|LdnSE79O2 zpM)PE{OFsUB$b_-Pb;sT!P>$|gZ7(#E8-UBJkRm91#H4iFiFReL4vdg3*2)a-yV2OaVw)hd08)m2JtY3?Mc=#kIE1? zjehWOMKz6xP_KIV2)iK2^Os&&<;WN9B_pch!pEZ>GCyKyd}Y#raxWv99b=*PF=0mL z3j+HvPmtTh5N8^QuUwP?S{OM7vFQF*<@94*4|q)?GP{2I9NyR2#s-cl)p9Zi;_IZKt64EM}nh-Jw^bxh{&f@ zQi9ne$+jYsx`Ei(BkjJ3foTrTs)COszC0erC)308Q?Hp`To{BUb4EOOc!iZ^8+SP4 z1XGd-L_D!d0qc-&o~vN~!5lj`8tCdm=raJ}pVGbp6|c@wBxX>5d_YFNAe>I7q_X4Q zkn4C+fOFq}en4~aI}cat{2RjQdAVCNpUsojspL$0$2^Lb zX8tC)7b}|x0immoP^_ceIv$a4_-f0LL$&w3V+# z5RA6W|A&~%@-^hhIDtlKb3#?uad^WB!tUc{75jo|z0`uj-3iW`7GvqeW=ZO$!dy}eI zTNf%lBl~B%=d9ads4^KVvm&Z?E?J9*Mx9qDw@@(BdmiC;SOmRf*;V-HC~Q+@E0FCM zzlWM9Vp&#YKDIc1vkb=vf98iIQ!yZ&Ao+^-Pxz$6cf;lzH>OPTt=IR+6=ul>2lsO5 zcJ{VQEI;r_lu)X5mjWX#q{TJhz%p3&$)cNpB|@nfEXOiaNnD5=zoND#BI5-n6S3_Y znQ@Opf|&B*y_{rzRwqlS9qoWny>cz7{HW-K*a6`P)@;$VJYcau!9+_I$ryMpov&{~ z)RSCs1U(-kp#IcrZkLkZC7`yY1h$@yk_I=EAd0UF1KWXWmAibwULx7~%c#UKhkoyw zNaR1ig~g_2PFYVe_7v)h##N@{E}8W~3cZ!D>>q_znFWv<$S0z$_m`ae9Ay(V zQ!mCE^q*hD@i%>YMlkY=A6+=qNy>b_*OZ(JIXrnQ#YWG2whYv0@%9^BJV@@yF%{hB za0|W#KsH-SCMuSnp3h@*UEBAH{kR*hpm9@FUB5*wYZUm88$xhI!%jKEQ!ocpM@$=4 z4EgCpG3G9{ht=Qr;`m5cuIm@!55|@ox8aTO{!fQpg?^wZ_c0t7^?cY{B80|%#5!wT zjJ;-T%db5$TeTz=zkvR75MWkx}FMUXss;U^Z`&kzesQ^L<+QR&p zv15TLkJSTM+Y>Y#yQqjv5@D78^mD84{jNs=M=?LkVLB+TGL?W`fjF~&?%`=ccID^9 zAKm|CjmIqN;QGVzkTo5OjVWLG@_7JhJ$I>OlT%Ih$g5FncN(oPXEZK`fLLaXq_Rs3 z=^3-lWuY?1ws%E_B)HF>^7Nr>&WieYa1;SKnL<&UV;vzVB+@WI=$Kvn2vRGsjHaMrN%3>682M zrsi22=TBzab#BopuU6Y2Ll}>B!;oFfk#T`xD~SSq>u^khTeU?xGK(FX6uom;DkmLc zvC8)Qh8UjrG8R8BLa8$pWFm}+UWUUCSd5_09@rt-P|*H7pVEHjb<>wmOZ9!Q7n)x3 zr$w3KBgvji2!mtGd@2fcifrMpOzMnSf^Q!q&3fAM{2+|-B$|nyN&u$|2v*6ntQ^C4G zP78g*=j$m4Bk^Cu8X_2Yi91-=hfn;zN;#BliH)J#yJuL~eT&^zULnfq2ux{1{0%no zgZb+X?ex1C@E14d+U<#KbsB%gxX{ekcN00Z#F~y^2F7;5qeAMiq1okI6^IjX*s}-L zCBqK=>|`ZAhG42kJCD*z6!n6J$tgGAlhJasqnzjeaJVc5kOh?`xb0{fFNM>*E=9p7>F=?3LD8G*3))iBN9%8e&K*G3u@ z7PEG|IMZAL?%p*_g7z4Ai92mvD2wZdNP`aD$b&-V1BwEa6cDs~%rPSe^#Mgu+YzDL zkd|jD6H6-|eIQ($G!5#s68T1%a~)OJx*&`p4VS<>H$TX@>!o2OTHb^)z0do^^UD7% zhji)lZd8fkN+))W<)@wXM~Py@2?bIfvo?{lvMFKyNXhZo=pGcL8o4`&8$1FRf{ZOW z!PZvDg*#ie40M_K+=&roB9acrk?yhuC-mGfg(R2DI=`@qhI$M#lg1g}?G`xFGpPP| zF(Wv|#2HVSm7pxZ@x4JBK_1p}E)PTYnm>zm@wJH3HC9sT+8np7e?*U( zHs7GlOpLv=R8g?FZ&p`c9^CO8R5Yq~%4Vokf39&Lg1J7H!}?2^q=IxKFZQ`ZTT1M! z%{t#|2adgEK!2}d1cm($^a4e2n>-h+?$fHcl+a|8%9O!W4L_|;0?Ygp^Z2QrqUInR z5LU!ozBjToFnUI%pPy7=w+JDlbDko?xt6IGplw2GcKr|Qy80JbY0cavc6M|Ee3|$} z({~6mf+R_FV*VBeCCu|dao!1EWIwQ*$>7(o!GuC0rRKx?B}J9@K6Et|dO>^rR5FpA zlnL{Nqc&Ts&clUv9rraP#B2XU1k&C2*Y?{T*IQgKCdj6%R|m#}rJ5nIq2K7)aN>IW zP>wmYSnv9h4Pm`k4`Nh0YN~>u>qC?!DBr2M$Th-4M8W3!bhUo8+K8a$%&Zuilz|IG zyCm(%V(?IQ&q99RT<=Caq5Ly_+X7y##?auX94H0R|0NfWb`fv=aY&MDM}*`1?@NcQfpjxGJ>8T1)gAzyill+Y$KW|h85?$VJ{4~oy5nG3l>Cly z2XSUjQwmQBmYKP1m!|lT)cGo@_myVxD7J--btM}*c`!aw9N(r(LtjY&JTS~*b{Y(E zEXGjn05<8OghG2E72e2b1fd&PcFX~6A-g>NSl)~Gg;L4}12{?gH2B=_#NAR}5v*IX zdn9}vX#KY`kWo#7r_K`4y;kH9Y5BT0E#fjGOY?6^D6{4A_fX|SK=?BSUH4@Q2la&m znS=_sZsER43-K*vP7IfOrV(&uvnN&VFctDd6dPtu+95ihh?#5V{YQj9)l9>Rm)qqz zz$m-6Ii-(my7wGR#9h_`0R)Rqu;zJ{?U}4gn3#XxLNzfcZiR{%fpeez4fbp2e79f(a|Vr(`Kmgt<#ti z&tv|qgGCC7`hC>O?PYhH8jjf&%$9h4Xs;MG=X9zmV;FT;;XD){&pPVn{+w^ZK+ynB zOA$-RLkjNKy{%||jD=sK^NkLvM_hxPFmt#Mhe;ZXYTsFV;`aL0-2l*%bem!-*yjN0p;V=CZMfk8a!pWD8~Dyy6Hf`u(40VawH- zv7txP&pGC2#8nz8N?vti$5F%SCe1AC8j_Rh_e*E+sk9@{EemPTo zc*wIwy`h=(8AvO;eAk0J{OPJ`yu= zi5-4(h?t|h5vo6~Ow1XPsuCv4f(}aQHBv(M4a-^Jar*#|VSg=G5N1BWl_Kx+Q3s#4 zX>6EFx2rW0Rp(|K9Xmp6y+>et0fqch1NNLEo-(N81l}2vdSHW~_)njkg;2=ASE`dS zPTj4h;;Lb;Z58=BomsFa)q+#6Y8pJu41EaWSb8juk&(GL1xFCt;VE@p*u*R(pG*i> zLneu~QZk6G#r=YxJs~nuqu(ibXy%kk^sxXbqmWByPPaBGry(Dyj=`WL-B7dlIk+Po zwfGMRtf1st(b=8XqgDqQ1ngu;OJYk5Vs|^+#6$=MT|!3ZlgknIRW{~uJC+zobC0CF z()clX=Hl|RAaE)B1L<&n_~6Xv+Hndq3+f7W;R*9gD zKramfs~?fC(*TxfgX4lmdSGASIFn6Bqh1x@P3Xk<{X2T*E_JyUX0+(( z-GBK;F)K<4djK5L{fb||r6BtJm0#rKsHCk08r6C>9}&#@sJFzIJ_GGxxrr^jMZ@Ya@fD&_a3Xg+yF*?@?b%d zTA~vE8`4%#Ofqbf2>oe6n@5l~4*`(8A`G>DRlj;0awQ1v#*Si!hbQ zjn+fHw-xK!6O5zG$6s2}4yzW`LXitzOy!00aGitBI*miUPQqixz&KJC!2!~O&O)?NyB3L|trJ9W^q3nAuzz&o#j z%)1TRoCSmDd%mc6rGX4&3(&C-DBW-WYJnp}gol=e?)8cIHCt#E(P3zHtEoKXaJ|vK z-SD1X?=Yh$Z+aR7*9|Yu$B>LursVR5u9E(<8 zo7v`i9Tx|W@N2uIdu{iH;;q&Fc$RspSeO?F)61c72SlaU*nDzm$BXy}cCzwJvN4*| z2%7gfro(`YKrV<|vwby(2TC0&b3$Gfv~ZD<5~o9?XY1Zx_}CE(`-PF<#3NTpzo2~j<*t>s*otobWGyjQv*Eh%ISx91Dn!(SP9Q1GBw=n| zByiBpGmm}ThFlm$qj!X2)^HE2ShZbyqlqabKer@ zkoMG3?qL?nl3taWj4cO%CcYzi=J|y%5d7C4DvjF!3ONb;j4bfG* z7ihx$iVN|Q*npx$@_(Ean)U_{iyEb7>KWPX;NF==9sByc``e>(>=$kAN z?bl@vBv<@yh72$RB1Km#ML2#AVF|-}h+KLCh5@X~6O4!oJG3w!(Z@OmliPZfLYJ&o zV{!BPfasMRmxRD)K{J{{;67%tk~1&fng6Jk_Ya7ye@FmHd6?qEbRfWi+LBc|+UuYH ze!6P2@OMXIrZMk`a3fq+Y~FfL^kKa#m&q=OO?{+N$rDYN-PEvP?Yvt|Yl%m%<)xqw zy68TBMU9y-_y2R+)IL6InuiY~V{k_|t8A*-pL?-=lPX6;f9yDKjnl5dl;!@(UlDY3-^ zqv7!~i8$<|VwXC!6^g2A#oK>gnaegu4(DkX7$7h7KbJ81Keni2K%JIili*uU*W$?n zf#04ZiVzSA?6oyj@`1|EI!L4f^tZ9ebm)B((_1m&D}Lqv4%ai1jK$nG8|>tZCZ@t9 zS|Hf7gr}li0qPYYU`lrVtPY?QzyA9Yt1b;Yh4L=TGPAlK1gL4!lR@Lq$3{g?I)D+A zURV>4rnC(D?LH=@mh-Q+$aP2`#r;U^V9pVdYGGVht)Anks9M}lk?CD$?W$wBI+F1F zIwqysJXbH-b>E~fmbro3UoR9Af0ZUdTCFKajh2fFxsP11-jF6EM|h)+0bi)8Ih52I z(4*~JdmGCju%1APaqj#t#Kg#*xR6xDNI@~%p{`MUqrP2I1WQM;QVsm{)I-!CQNn`+ zIT+VuMxZsISqN-opBx~hFKYG!S>{RgzK$e1feZV{eSnabl-yjez+k%+g1M}%o?5Kp zSP>gv4Z4YIV+?j3(3aIf;ujKC`X7Cph0WXGy;SxunC7L~GAmEtt0uFwSliQQ%I~QP zRv}V#Wuob{^8iL%mxvx>PKLA!OnY6GEqDlLuot~i8KnahOT+&2ailY3Uuw2uJMPa4 zH8#NxBtN+yn$@kov<;jY@mkg-!17=kz<-{-4kc1ft&{XFulIiuw8U<29ghV+!mtYdqx_JW?m{xe$XMzg&5?Qp3s=vL&sb- zj#Cmq&?(MZmHjXEiTD{Eqj#aB)tbDt8GHVx(w%aIt!Z9434+GF?~re(YZ~RqE9y6? zVzYFFcblml8lwnQ@d(9{<+xacOSnlFIgMSC`q@iM5rtp}|C zx%FZc?e@8bDYV;S=YJ|Az_?jGHt!A-!?j^uTMp)0k6As8ukvknDE+Vs`j8Mb8>+&u zJg^z3>{&^IH~}NKA63tBbnHIdomO|inI8 zy(c17I2kS2%yGzN>=h2&;Vj2MYkLB{@^@C|?Kv)^DopbfqLG{SkL(xx9uHE$kiR^z>pw($uR>!7 zJ9`fk`llW0&fP?qsP^Tb+Ixe%9OQh>kn`I5^kPfnzHyd{KDR8SRSW{fPV5*}gUv1H z&l6qNRm3qcM4j#i{N$u@1MdS*e6F&|LOn&xhmu^o5;VL{MO#$taiWp-_q|3c;XmU_ zm5Qk!<+F569wn#2DS}y+YNJOuCFno(XuL@FsuES)jda0KznNDP$ecG*kA>2 zjtM+&KkXOn**{oxr7-u(CUfj8(hO8MTN|?eD!A}p!ipB9YnQswS&cGXlHh7arQp(% zB$Uf2$zrpVuy~G~v5*&2DTtK50P+5se#6R90LM!3w+Jo!3Os-P)^Mzh0iasch`^Um zUA1MD6iwI(fK&JWYRhCrc0NN#34p5Qy_$u|+=8EcVm4EZqBREo0fWy73ktxE#=IIm za#zz!KbtcaJ|8lMU{Ov0XgK2nflgrc^TF3=`aU)lq+dQ3*iMd|;TVaIJ)>|ASx>{; z=5f_CqgJf`RM0hWc{ye*fLgBk2f#0O>Sf2}T8xz{AKq!gj+q^rPPGbae~IvO1}6{U zed`8t)w6RfqK$!Hv)$?NXU{NjQ2dGGyh}e8V(J}f>i@CJ?8LU>Q9^%4r-v6pPDnGa zg=TR2L>~?MaIYZO&=Fy|_B@my$nue~lik`De(6X9N2|k1^jU#srU0PbN&I7cA|G@O zOXo=p_3IK7gE&wDmUan%>EtNx0F3J(Md9IH92J!LZ!R;?)&EJxD?6e)DDsuvLYEu3 zasGYMcD6gtR27as4i6v8gD6tVM$Ruy{UncIP#qAV4a^DRvg9Pc6o`WfsVO&4l)|)Q z&!8bvSN6&VyMs>n+{g-8%K8&C8GK55;UpKk#?V9}0jKCwsbEgQtuuwl5k3F?5Ag;4VJaN3f) z5>g$~wpOieoNaWSn*ce>!xa4^QrbsCw--u#TStSO80}_U4Pkv&)e5%d!B%Bf|D?4^ z$S#ndd4)Nq0>Q_y`a+zOa(yeYZL8!{6*9T7XI0B#3+b7d0(-G*OPSs+v#im-_z+#q zPf$gb_78(i+M~bi8}K#X@OJRns1)gW(=Q7dG?AY$d6P zubhUT$pS1^hQeu^rO5fihk7!G?2e)1AIjh?LW}e>tZ7d2L;qrvkYIrK;{)Xq6t<59r$;S%Ec|DV?S0m~P)yo*iD~!5SUf80F2m)d* z#I}FK?qo-R?pJLJ(0yu!lks(l$sK=2j4FKvdebyVwYR-z3=dFr>|=o|zkDU=vPCxv z*!PkugJ#JK;iLJtv<2pmF#E={K?V%W)MwAG9W`y7Lb)1D2rt=`FPidIF#l2@~htE%pQg7N_Q+74DD#3$)zD1~<(ys|OU&d3HvabJRhW}t#K#w*R;`zO?jtvJ3CvZBw)Iw|@Tcb|ll zOhSKULDPSLh*xlyS-J@s{}>6;Nd+yD^Kt^`i1@Tj;^T!77M%JPBo5T0-@R4J+aI9k$0tvcJx5Lgc}HkILdVjf(=3Qajh z=C#Kt&8B3 zHszpJik_joLryV*Hd%SLL+dn?fccC{;PjE|YlI-t@MIdg*>W<{591phMzt6|P5@uA zka>D#v#7hsh%2*vX?4Vq0A=0H$H)4!Qh8JZ!Z%~Tb;$M-mmgr2?xm}=yy z)!!XZIZ^dnZ*hK1LBBE z?Yp%xH@u#(Cd%BTv~{<1gz0>GUv418K<#IrEuUUm?xa!C=7JpsK}LWz%OF~?3h-Mn z9?`xUR6IZ+c_|Ml_;O2^vEoRCQ7F*%@V0q*&!e!crh8SSKTA4e!xAWiQ~b7pb+q1Yd4e6}AXvg^1$Q|Jc6-jUPy8klJvE^Rvbp zFgdEcr(Uv!<$SM(2VZDA`VDEz8hIa|ep6CzeIbvIFAMkhRol36rOz5J{$U`b6w5qG zHQ^ra3*EJc8@ohS+?oruzWGvQ=4q6Ng>5COYnA6WM~y&`n~ouvspxsjU4bG+A>2SK z;Y=l+?TvpmVJ|W~pCA8o6*elf-zZ=ATZP-QqGbyqs3V|6avpME%;)YD+i^n!i8q~d zLuB%RuIhvTP1BYhmnm5lN@MJ*0WS2Mt5b2{S_V4=@w8kcsr?ytKv_wnZ>*DVfwI29 zVmo<$5*d-^|CQk2$iLmxab;X~QDxbZ!`YiEoQHnv_m;-VvKac#*M`4aA@bICD*R01 z@F~2lON*w;jNbQuOV=>3_V8;Ggr@YJNHVa8d2Cd-L89+SP*Ahq=~&kaY|ub1FU~Eb z>i-Jk4$ADm1Q)Fia!P{+z1e>T(~C&kIg(kFZd7Ef5zP2t>YyJ^HxKS8mJr?*P0nz z>J!|t-F6&;b6f(tlapWhVMLF0GVy^n#e$~g5*QM2$k8O0E)%kajLxu8o^4vri{DUh zp;_tzP{&@S&WL_CR9u z;>;Ak1lE7i{gJg0h()jlpNm@`{rZc;cbkc3VfteOyWjVrdb{@zbY#S>*O{kAYEQrt zh4sXw)b}7~fiO=$IuyS&Mq)q+%$Tc$kVvOD0VAO2@5n=Y)G*l9YszB2n78%(T*bnND{8rHfETrI;QJBGGz8l?kag|3Vwi zFil(${=+%WiOl`k)yB@2MD-$v;JHmk1&|1Fgx!REWjHvPW~EwEK>;|Zs-~MUj(yxc zpt9M9`jgfoq`JpJ=cAM<1DVo~ED=MC>r-^~V*A~nZM01z4f3$={Z z$ji+(q({N5)DuEiveVA_Mp-5zIsrPHiS&$~S6d5b6avLGCoE+;4!K+QQ(m=Z>+A)# zr+|W9o+nmOMA=f%VL1JsNwJ4g^F}CFLwQd&o?y#q#&u#KHmN?g zH-nwu12taUvFIKhb+5HuysIx_gK1;uS%=L?d5&EP_OQA5)}f`n#C>aZs*1xMZs*lM z;dGP5;s^rSCA%&i@z)OuU?8Y+Esgw+FSbe}0{D1S<-MqwiD=m`T;a>0^^r&yq`P=U zs}49l^^?W3hut|$qi$5Qm~#@=F|$#fFLS=)4^M#@Ip!agkKyCw%jZIi>)avp#n1U2 zKqpJ>Ub&d;e&ipouOVZK*~2sw69~)Lgs-i~#?s-tuU4Rb^;6Ftg|L3`%{UMVWr55L zK3E-Q=qIB0*z(j!QgQ^x$sp|1LaqG%Neg2ho=#vxp}?^Y;+~?bMm45v?s3@o z#qh8d2Q8kd5)AW}MZl!NbZ-D&{Az5{{^2~}Z%T?0!{_YA&@fHj`)fNy>}e-Et5|IF z0xgsv$j5#?RxA=hteK3Iw2x7IE;@S4Lv&`Nc~P@|@xPbyZSC$5DLubkB=}T#R$mvY zNGKPHBq@RPzJ$`+R+Sv4L)MbtP5YO&rFbf1)6X7^G z+SUwY*1_#OmCRmL%gGnws0=sQYg|Nw02Lh$H-t>0qqp0B>F$+k^VZ$6*?N1ZngU4s zkz?EIj=XuDZ5EyoGHPGXBKL=GV?wH_nNp%nK|0>B7{`KV{dzhB;Yh7fpX=UNOeY$!i!Auodzj(C{r`JC<1Mv)oR zu|W9QxY75)=jdmg*WBbeF)ziw&Dw;{U`eQWnTVIh!K`LR28qyz5yTGh8;y;j!D?8FEwe>Y=2AG8*M(&s4@_ozrzLp&D0 zA$#QCd5);PIE`+I5Ign}{NR#Mi=}xMiWEi%iEGEc7F!gw<&m$#KXeeOt*CGYjp>^In_?vd8ZDVK#Yp}W(%L~Y z6SE`028KprC8emqfH68XxYxrmIsv`_vIUp|C>M~1Cct$K4Gly}KqI)gJOXNHX>@>C z${T&bfUw%e%+y>|h=K!yt9>(5E69C!XJ=$LXE#%zPAH}yN+nPrK)N)6fX-A}1cI`P z653({(1OJOcE}QdH-QAk+5t(`k+rFm0&`O+7I!cupc&i&0B!!<0LtLr%+-GmPG+Cc z0lnA=$B*F|oD+j^1oVYuM3ofZKo zrv~;{AA9P*mPhr_nW_C1(G^ikf87B7rKH!gxHC1^v;b9f{w*wEnLpU?)U_ryFK@5k z!l(JRKn=g~^!C?4O^*QQO$R}$0G_6_FjiaNDyqtXe%-tOrl zVL-dX(vUG281QB$mt~UhZyYw7;=ASyw~)yVSbYq`$PVv7rLeT3Eh*{4^N*yz>k_Xz zUko=^?wW|)ns7U7{Prai{i>L9n}&4?z?XA)HECqq^l3Zw4LX0?78%SrjWr2u&s2P& ziywF)D#sz_TNt*D0RTD(HMQ+FfQDz=6!GB;zpL(A)ZFdU&gc{MxjC9uWMdtepPc{; zg(jA+Nop@dU=jSdnk^)~&~gA8@dU8IU~p}WBQSA2YE=Hd1YoL>j@@>HD#ykh8nN0b zs(#fUhI`K}EKu>-38!(Yx}0U8h}~?bUTttK9$GAh%8u$Jry{#_R&^O-z$ds&2-}hu z#x5pugW~c%!ld*UPL!l5Bu0zt-cT3&tS~%)PE%r!ATi)VS9rq~5cb@sDz~MbV*l4- z7Kc4&(-^?ZoyWKM!y8$QBH==WW(;~C{0O_3J?Zw3r(Tc zCQ2%QG2Fdr3H?Yey2bO90)GSC;U7yvp5IfeVw)CkS5mTNjTF}sTC+Kw zrCzC>;FRBuY&ln=RB`-irQ6whW<9ZJ_<@yaLBc76eA8b$@$cg*yyGZ{M%I4n1#bFr zFyhYm!$Yfm#$gvA?jKgF9cih*jT$03iEHqqn?rHA*4}8d=Dk8h(b5D}Y=zaY>h4-v z7I3Ilbd`;}!b%Y;xJzTF_=%?qLi7f^9ObH?pYnN?LB6vJL0jzXM)e0`=qB%Kl(~ba z{7bA0R+Q-R5m=3-7X0P2x-<>X33AkaW9`A?J&L+*fHzR9gKP2-5*t2zI4Ot19MmZ| zp3*mG>70D@gK=Qi(idEWyw0mp%$gM3jU~w`v$0hS1VOoTe&#;t_Z*?W5IkyD;|x;R zl|ofvS%!A+JqzMHyaf9asOA;+7H3o!bl}0@1S*}!#$wK6nE0NJTX%R9taPw_2!W6q%ieN;>xqO!VPROi+!TYg_8}Mivj&60-=e$9kG3W>E*t7y; zUal~>z&Pcz`7jef;Fh8FBjTE;Dosz7iSEi6>^jsX$0E|Xpvv=r151D=HR}k50cU7`rNW}I z=QJ98`{?CbFO8y>Isog?7YpMlIsfK?QTm-62_A%Q!|?{r#hT99X^3Tzf!v9@D4wAl zs%{AK@8A6In7_zQ+D$tPC~H}DKV=X)7j|vftGGiD3dT|mwzaWDna%?70xQegok;2< zwv{CGzx3Sv1A4L;m-o!lFies6qig4Nk*af3qi8$E!GGU2_K>u87}iz8CG@_1Rz#RoCO|6_~c;Ob&W2 z^2$RV&G-KM*VG_(Ukn#zTgz*Gm->@VogEi!Mfb58Fe;&6i^~_}!Rqys&1y`nhC^!@ z!DQh9W@bmsa_~3|q+C}^!Xz+I?$way4g^|&Xz|)JF;3O1*9E=DEQsaV&T5!R@9TyP#$eVN{Mi}&6fg43F9bsi3`nG zuHKffRnGJ2W()kk{5<7jrGi{O4RL&U7UAMvp`zO+)BIkfDKu}cC2KXun>UgD1B(-p z&N=0}GP>q@#J6q#7$RCblfbhD$(Bb22d`Q|o`&4y_NAI}qN4MoSw(s88Vy+MQ*Dg9 zC&o%}q3?gvKUiHGI;tcof_1{q7WykJ%kH~qLY&ws-fn$Lvhk@8C2+68wP$ZvO-6ixc#++F8V2~d5L#gdD z8WY~*YkGUCL9So^Wm>@pY`{i-<~8nKB`d&~2W5gR`!ESEB-xo4FF2{-JkS9sfipZA zJFXs%4)C20abrifU~p9UU4$A6Oo@y}T%O0xA~cHo19x+rCk=7*_tN<^VgiI+ZWqXU zq6*~6ZIucL`W<}`y^r+maXbv`!?55KrYRjI{D~YLsSO@aSkV4laSD+#CUj2V(mS1Y znumpvQCjJ+R7!|-pPT5|P7N5s2K=jP_({ zY@0x_@y|N+hR->>@6>ea(w0rFG#?~Ftb1rE3(-qx>l-|w*6pGccMEL8yQ zwAF5kJzdPCS#pOtpyU6xnqI@ih`o_=2KpYlZcX!q5M*j(xy&=NXj7oo5fn#R3!^u!xxCYj&UY60h*^Hd7|_fXb-K< z(=xy&*fj?KF=6Y|hDwxRtvr6GTV8f~;*)fqN(!W18;m{vP54G(=lf2=a|-z{;m^>g zRH+mnPN}I1Rs-{o=WOT}Hu_iL^VX!AA z4?chwgBB`a^Am8zo5a2bcUQ7Q|{i zA=`oz%xPkNLqe>A8%0-&Nqu>w`xq0IkC{fBZ7+!7ED{&#LE~(Z9n}uYB$|z!xj_pF!vtK@#aX<&Eo!U+ZuW z_P{8t;q}J;ic;cRcr+ZD*XGQ99?k9YdvVY?urZ7p-HXjaCznv@M=2yS^!oN_KpPxLwfCbR zBCcQM*q0S+jhLUVh7XD7AbPr9nv2agFq9TNwe|!HA+>UFf8MCDf#F|h$&F<#8g=Sy zkDVozYq3@xtD$q%PM+46qMNwM-e@qgk3u=*9+UQ8KgKqyI1Xp-KN3*bS}cZ)30N7z z=QJE