From 92f76dc624c277a7c731733a4e51997c0e9ad981 Mon Sep 17 00:00:00 2001 From: Allan Jude Date: Fri, 27 May 2016 02:42:46 +0000 Subject: [PATCH] Import Skein 1.3 Bruce Schneier's hashing algorithm Used by newer versions of ZFS --- Additional_Implementations/Atmel_AVR.c | 77 + .../skein_8bit_estimates.xls | Bin 0 -> 26112 bytes .../skein_MSC_v9_perf.txt | 129 + .../skein_block_x64.asm | 1335 ++++++ Additional_Implementations/skein_block_x64.s | 1328 ++++++ .../skein_block_x86.asm | 1180 +++++ .../skein_block_xmm32.asm | 1167 +++++ .../skein_block_xmm32.s | 1110 +++++ .../skein_perf_core2.txt | 1440 ++++++ .../skein_rot_search2.c | 2538 +++++++++++ Additional_Implementations/skein_test.c | 1380 ++++++ Optimized_32bit/SHA3api_ref.c | 115 + Optimized_32bit/SHA3api_ref.h | 66 + Optimized_32bit/brg_endian.h | 148 + Optimized_32bit/brg_types.h | 188 + Optimized_32bit/skein.c | 753 +++ Optimized_32bit/skein.h | 327 ++ Optimized_32bit/skein_block.c | 689 +++ Optimized_32bit/skein_debug.c | 247 + Optimized_32bit/skein_debug.h | 48 + Optimized_32bit/skein_iv.h | 199 + Optimized_32bit/skein_port.h | 124 + Optimized_64bit/SHA3api_ref.c | 115 + Optimized_64bit/SHA3api_ref.h | 66 + Optimized_64bit/brg_endian.h | 148 + Optimized_64bit/brg_types.h | 188 + Optimized_64bit/skein.c | 753 +++ Optimized_64bit/skein.h | 327 ++ Optimized_64bit/skein_block.c | 689 +++ Optimized_64bit/skein_debug.c | 247 + Optimized_64bit/skein_debug.h | 48 + Optimized_64bit/skein_iv.h | 199 + Optimized_64bit/skein_port.h | 124 + README/readme.txt | 166 + Reference_Implementation/SHA3api_ref.c | 115 + Reference_Implementation/SHA3api_ref.h | 66 + Reference_Implementation/brg_endian.h | 148 + Reference_Implementation/brg_types.h | 188 + Reference_Implementation/skein.c | 747 +++ Reference_Implementation/skein.h | 327 ++ Reference_Implementation/skein_block.c | 369 ++ Reference_Implementation/skein_debug.c | 247 + Reference_Implementation/skein_debug.h | 48 + Reference_Implementation/skein_port.h | 44 + .../Skein Cover Sheet.pdf | Bin 0 -> 44070 bytes .../Skein_Implementation_Statement.pdf | Bin 0 -> 43116 bytes .../Skein_Submitter_Statement.pdf | Bin 0 -> 22471 bytes Supporting_Documentation/skein1.3.pdf | Bin 0 -> 479368 bytes Supporting_Documentation/skeinround3Mods.pdf | Bin 0 -> 33906 bytes Supporting_Documentation/tex/key_recover.pdf | Bin 0 -> 5877 bytes .../tex/reverserounds256.pdf | Bin 0 -> 7745 bytes Supporting_Documentation/tex/skein-21.mps | 161 + Supporting_Documentation/tex/skein-22.mps | 832 ++++ Supporting_Documentation/tex/skein-23.mps | 327 ++ Supporting_Documentation/tex/skein-24.mps | 398 ++ Supporting_Documentation/tex/skein-25.mps | 1440 ++++++ Supporting_Documentation/tex/skein-31.mps | 161 + Supporting_Documentation/tex/skein-32.mps | 812 ++++ Supporting_Documentation/tex/skein-33.mps | 1384 ++++++ Supporting_Documentation/tex/skein-41.mps | 349 ++ Supporting_Documentation/tex/skein-42.mps | 163 + Supporting_Documentation/tex/skein-51.mps | 200 + Supporting_Documentation/tex/skein-52.mps | 334 ++ Supporting_Documentation/tex/skein-53.mps | 259 ++ Supporting_Documentation/tex/skein-61.mps | 247 + Supporting_Documentation/tex/skein-71.mps | 90 + Supporting_Documentation/tex/skein-81.mps | 279 ++ Supporting_Documentation/tex/skein1.3.tex | 4025 +++++++++++++++++ .../tex/skeinround3Mods.tex | 76 + 69 files changed, 31494 insertions(+) create mode 100644 Additional_Implementations/Atmel_AVR.c create mode 100644 Additional_Implementations/skein_8bit_estimates.xls create mode 100644 Additional_Implementations/skein_MSC_v9_perf.txt create mode 100644 Additional_Implementations/skein_block_x64.asm create mode 100644 Additional_Implementations/skein_block_x64.s create mode 100644 Additional_Implementations/skein_block_x86.asm create mode 100644 Additional_Implementations/skein_block_xmm32.asm create mode 100644 Additional_Implementations/skein_block_xmm32.s create mode 100644 Additional_Implementations/skein_perf_core2.txt create mode 100644 Additional_Implementations/skein_rot_search2.c create mode 100644 Additional_Implementations/skein_test.c create mode 100644 Optimized_32bit/SHA3api_ref.c create mode 100644 Optimized_32bit/SHA3api_ref.h create mode 100644 Optimized_32bit/brg_endian.h create mode 100644 Optimized_32bit/brg_types.h create mode 100644 Optimized_32bit/skein.c create mode 100644 Optimized_32bit/skein.h create mode 100644 Optimized_32bit/skein_block.c create mode 100644 Optimized_32bit/skein_debug.c create mode 100644 Optimized_32bit/skein_debug.h create mode 100644 Optimized_32bit/skein_iv.h create mode 100644 Optimized_32bit/skein_port.h create mode 100644 Optimized_64bit/SHA3api_ref.c create mode 100644 Optimized_64bit/SHA3api_ref.h create mode 100644 Optimized_64bit/brg_endian.h create mode 100644 Optimized_64bit/brg_types.h create mode 100644 Optimized_64bit/skein.c create mode 100644 Optimized_64bit/skein.h create mode 100644 Optimized_64bit/skein_block.c create mode 100644 Optimized_64bit/skein_debug.c create mode 100644 Optimized_64bit/skein_debug.h create mode 100644 Optimized_64bit/skein_iv.h create mode 100644 Optimized_64bit/skein_port.h create mode 100644 README/readme.txt create mode 100644 Reference_Implementation/SHA3api_ref.c create mode 100644 Reference_Implementation/SHA3api_ref.h create mode 100644 Reference_Implementation/brg_endian.h create mode 100644 Reference_Implementation/brg_types.h create mode 100644 Reference_Implementation/skein.c create mode 100644 Reference_Implementation/skein.h create mode 100644 Reference_Implementation/skein_block.c create mode 100644 Reference_Implementation/skein_debug.c create mode 100644 Reference_Implementation/skein_debug.h create mode 100644 Reference_Implementation/skein_port.h create mode 100644 Supporting_Documentation/Skein Cover Sheet.pdf create mode 100644 Supporting_Documentation/Skein_Implementation_Statement.pdf create mode 100644 Supporting_Documentation/Skein_Submitter_Statement.pdf create mode 100644 Supporting_Documentation/skein1.3.pdf create mode 100644 Supporting_Documentation/skeinround3Mods.pdf create mode 100644 Supporting_Documentation/tex/key_recover.pdf create mode 100644 Supporting_Documentation/tex/reverserounds256.pdf create mode 100644 Supporting_Documentation/tex/skein-21.mps create mode 100644 Supporting_Documentation/tex/skein-22.mps create mode 100644 Supporting_Documentation/tex/skein-23.mps create mode 100644 Supporting_Documentation/tex/skein-24.mps create mode 100644 Supporting_Documentation/tex/skein-25.mps create mode 100644 Supporting_Documentation/tex/skein-31.mps create mode 100644 Supporting_Documentation/tex/skein-32.mps create mode 100644 Supporting_Documentation/tex/skein-33.mps create mode 100644 Supporting_Documentation/tex/skein-41.mps create mode 100644 Supporting_Documentation/tex/skein-42.mps create mode 100644 Supporting_Documentation/tex/skein-51.mps create mode 100644 Supporting_Documentation/tex/skein-52.mps create mode 100644 Supporting_Documentation/tex/skein-53.mps create mode 100644 Supporting_Documentation/tex/skein-61.mps create mode 100644 Supporting_Documentation/tex/skein-71.mps create mode 100644 Supporting_Documentation/tex/skein-81.mps create mode 100644 Supporting_Documentation/tex/skein1.3.tex create mode 100644 Supporting_Documentation/tex/skeinround3Mods.tex diff --git a/Additional_Implementations/Atmel_AVR.c b/Additional_Implementations/Atmel_AVR.c new file mode 100644 index 000000000000..11cfdd8d74f8 --- /dev/null +++ b/Additional_Implementations/Atmel_AVR.c @@ -0,0 +1,77 @@ +#include +#include "skein.h" + +#define SKEIN_CODE_SIZE (1) /* instantiate code size routines */ +#define SKEIN_LOOP (111) /* unroll only 8 rounds */ +#define SKEIN_USE_ASM (512+1024) /* what to exclude here */ +#include "skein.c" +#include "skein_block.c" + +/* for code size limitations, make "dummy" versions of unused block functions */ +#if SKEIN_USE_ASM & 256 +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) { } +#endif +#if SKEIN_USE_ASM & 512 +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) { } +#endif +#if SKEIN_USE_ASM & 1024 +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) { } +#endif + +const u08b_t msg[1] = + { + 0 + }; + +int main(int argc,char *argv[]) + { + u08b_t hash[1024/8]; + u08b_t i,x; + static size_t aBytes,bBytes,uCount; + +#if !(SKEIN_USE_ASM & 256) + Skein_256_Ctxt_t ctx; + + aBytes = 2*Skein_256_API_CodeSize(); + bBytes = 2*Skein_256_Process_Block_CodeSize(); + uCount = Skein_256_Unroll_Cnt(); + + Skein_256_Init (&ctx,256); + Skein_256_Update(&ctx,msg,sizeof(msg)); + Skein_256_Final (&ctx,hash); + + Skein_256_Process_Block(&ctx,msg,1,256); +#endif + +#if !(SKEIN_USE_ASM & 512) + Skein_512_Ctxt_t ctx; + + aBytes = 2*Skein_512_API_CodeSize(); + bBytes = 2*Skein_512_Process_Block_CodeSize(); + uCount = Skein_512_Unroll_Cnt(); + + Skein_512_Init (&ctx,512); + Skein_512_Update(&ctx,msg,sizeof(msg)); + Skein_512_Final (&ctx,hash); + + Skein_512_Process_Block(&ctx,msg,1,512); +#endif + +#if !(SKEIN_USE_ASM & 1024) + Skein1024_Ctxt_t ctx; + + aBytes = 2*Skein1024_API_CodeSize(); + bBytes = 2*Skein1024_Process_Block_CodeSize(); + uCount = Skein1024_Unroll_Cnt(); + + Skein1024_Init (&ctx,1024); + Skein1024_Update(&ctx,msg,sizeof(msg)); + Skein1024_Final (&ctx,hash); + + Skein1024_Process_Block(&ctx,msg,1,1024); +#endif + printf("API size = %4d bytes. Block size = %4d bytes. Unroll=%d\n", + aBytes,bBytes,uCount); + for (i=x=0;i<5;i++) + printf("hash[%d] = %02X [%02X]\n",i,hash[i],x ^= hash[i]); + } diff --git a/Additional_Implementations/skein_8bit_estimates.xls b/Additional_Implementations/skein_8bit_estimates.xls new file mode 100644 index 0000000000000000000000000000000000000000..ecc66a28f2054a8a19f5ac2405f70e971f95e33f GIT binary patch literal 26112 zcmeHPdvqLEdB6M6YGuh@Nw(ydERFTBBwKc5$#ERVvDUU?>_m=*Y-0x;thF?<7p=8o zcNNDpE=ogA4^2u_I}e(;B!SWcga$}>ro?Re2at0NmI+TSdi%=pc z2m#T6(ulGcWeG|XN;Aq*lw~N(QC6U=L}@`;h0==BhN7XYMrlXsK-cgWnZYF|DX7*f$Elk8x}2P!1_Y z|Lm~BtP%&roH&JFOxArbtS}{YA|xZ?IG*uf+5`E_fown2!2`A6(o4o!l^BME38{lF zJy4lGTOi-8d%5yyuX-wtttO0C9I6r@ZTnjF^T_5QT>QT9i%$lh!(NDgML8;F+1`hf zb0-q9w08JpJQJTeq5WF7ol=LGk2++b>-9MDi3@<%Bc~7w&ttz>?mr>%z9{$CqCjkrX>7LCi| z7HJK(AVZ@!ZtraG=%q9re-!`UC-dc#%jsXF>ZWC!k ze<2^7lT|lVY7v;Aro42B3X?JM=o%6Fmkg8!#xq zu;5saoo=mWCt9l&Pz+({u(eQ_szISVshUmX&`@6JW{f~EPO^lW^p;$cSc=Y5z9dIQ zLi|roLtL6$RwJBaEeZeG75E!r2U^i5G}D;KcMdy$+PD;aw9x(*{BKIYKUM<%WC{2q zCE%YZ0snXj_+OTQ-%|o!N}p2vpO$<$-k0{b?C^mSaHWqMU+Lq9|5XV-pDF?WbP4#y z67WAM0sle?c%}M$M)Ik4*+<1m*o5ET(C|?0L$xCKpv32#N!s)Nsi`lueW^_ZKPTZ; z$RQH`zd(?i8K1o+;C&_FYm34;Pb=1rICm@buZm~R=Op}FVibp%k|}*S$FspfuHc;a z8E_4U7!o?*jy%RWqQtlCqvD_QMuX2d4lzAcd^nesa4Qa4@)@b zohE-}XJ!8?jA$g>VQ0=!CETIs5)=L{bm(HcWe3h>i{S%|JcAi4E5gmlGdz^}N5h|p zI^e+X=dctB%85@XHv>+6B%hu_J~p^Sci}T2xmkQT!qwEo-3I5Op(jipb?f7hYvDWi zTksw(0~F;$6iXkj3rM*RK3pr1_FP6}Y=-_kErw4N2_GL9te%OFBNILjxT9XM|Lk}X z{MEv@rI=p;FX`W6e89gS3fbi5{Z$NK!vaSkd$X?pf{Ui6nnyzS3k(VO1e{_H)y>AmgGm3wb`Z~JrA z-<#gs{=8%0P48`g-fhU! zfAwYQz3tzA8G3cUsdW44xGcT5{W~v9?`{9C%hG$>f6Zm-z3tz9S$c2#_gM5Caa<6g zoH>4B{1cY@%GYyw``_#6KYD=?lKls9e|c88=wE&HRg1pb;Q8)%zdNTHTet|U#s&jh zxh*IHI~#WD!@Gb=AC8K+IqUP`4}aLD&(P42OCLMT)Q2#|m20%hK6aRU%(o8crTSv$ zSdw=x%f;m#&T?@#RDgSxg-d_ZPp19ceSbeNRPG@zX2lk+89QJ8FMI4b#||4UdisjR zl*O3fxp(fx%CIf>x#@F5-t>c)p?Btfr@uCVKJNp&e0N-44cvS?85+9HWvkq!zi`Fn z8`n;pw%=^k-Pvx<_L!UZS^Btre@y}X1>NoY+>bBV=$-9!i)9bm%|353V<(rpWYL>> zRuwcLlg_ssMLS1-N@Fz`d~o z+?y)E9jO3!UjZ)T-|URsF?n+VZeineb`tw5z#Xjs_stdHj#YqrpaR^372qDK0QW5w z;2y33_ece}w^V@p)(UXnRsrtO3UF_&0Qc<`;2x_0cf11JSOvJpE5Mzo0C%ziT)iaj zd4D7B@nA7;pubC7ukDza&}T9m&xQAhbDX;wn=ja!TZo4l`tk6hh6fZiu>%iuO$hZk zml_KD@bF}HP@npzSALSK;xSVTdQwVga7q|(OKA6ypa#oQLX|%C>i53ulF;asFzA-B z#zTS{PD=?Ped^zT@;#S?#ZC#E+!EG$NZ^2*i~F!lwLbOAOaI}Lu*50hYPW<99uhcA zHzb7NhyVPkOG1-V!e+OG0S^gk;4SM=qffo|Z!fweG*iMR?^tSsgiU3{(xs8lUwYXk zVX0F>kyz^RkYJCcOCvx0!slHQmN_L9iKT823HDgJH1fjBUvx=W?vzj@mezSlu*cG+ zkxSX{xFoD_N+=RbS9wUV$I_*d&wlSmE(t505{ksqpoau|EL|G;)we(GlF&j4o4sR6 zgM`gx#L~dv15dx8BrqvqV_sV2lu#s=Iz1%VV`*UU#fKhnNoaLSC=yFO9un-aG%)yg z|M;*=LYq@Ukyz^UkYJCcfx)L9|B6e3=9Ew*mij#;*kfs6@ch?|+-mc~YNv!Ev9!rU zf<2Z727j5o&ryduJhO+TOsjX>=IGs4<*G~eXhQ72^H~?ajyylajFaDL2S0n92$3D# z=i=9y=Z6S!^6PT&vqyyxkNoW|E`D8keuxJrzg`DFdkhFMKlwoyzcqP&@V=AZdIvwd z$A!4*<6n01>(29oubuohI{4ZBE5y$q-0k2O#)}k~jqi+2o(pd(^sR4YE+BSdeUIZM z)jL*kf{%q^j#7Bch2!)7dOUH7o29#AiHW&HEE7-8oD1Jk$jjfHs}+1CHX$EpPv9v3 z_VNqu5k+odNG=fdxD zsI)Y<04kltP;3J47-%@wpu)@>g#eK#U-6-+TzTJr3Lu-nq)-YT|83-<$v_n)UNR(5!LL1ao1~ z%;7oYlpJ(U81HaU%7*!BP;78eROjkk6os;y#8rH}>4w{96S0|CCYd^;9n^2noD1(P zWaYm)SC3amZpSME^!SYQI$!BH19KmQI=3SVxZq{_oPIv2dC zqHXV4G^j-wo}0;xd zZTltPO?JLWYL12;SX_iJ6Pt2OwPAAH)LiJZFFnC z6>*K*^bOa*%~n)-v#!UG1*v8vJ~ORP8tr6+^~wBv)SxC~gK%)-Q#qVXm+xMy!0BwY-3pakelqFWfE6$HaCs zA~+@AEFu@f(mou@iC+!gZ~*nq$&7w3eB4pN1!w{!&p;Kvl*d_x%*@1`6=o4|U%thw92_Oz|?ol4X^&MTqLOeNHrsf0SC z#kS5@!D5|-(yOXq`B79iWlf!FC`ROUjL3uWOag}5SV$F!z#j(@2^n~brc&NFePVWLC_zUbvyt?&&Z>Fs4=q|-(R~Hwu_6~ zWXR+(*hWsE|H~lfZ5HD`F5urSR*SdtCIsH2IxwxrXS6NrkH<6GuF*r9p3cNi#WH&O z;qWr?uuTmWwG3h*K7?1sK4iN_R`(*5efT|zm+|l)T#7eAOq%C>VeuSJR{hu~ekJw? zc#nc94UssOoST_UqZ7YLKcmgYQt`|goE$toZJeNej3i@|aQ^{xK{~S3j*&y$Pfh}F zZ2t&WWsaZ8@WPQ~3KJsoNl%>AC+8CSRyu!0 z$zBsF4R;=7plB0`dOliEab63HnAJQxf4WK>USKto@| zBUvpR8w$oV27D(uqB3mrlryTQ)A3Vr7=1#6@+7sC<>h32CIi^%RBRR*aks8dP9;;P zA-}=Y5$@8po>OUkq8Boh-pN^NG?GltA|!#icV;3nH>sz!O_~|;Ul{dU}P;ht7Xp2>af*lEEPMYL$fq!88#D%X>BT{zf9jTr(-=zisv4!~4(+yBVh**9B~lm`(EVCUKap0<;iG0$O?q8hrzF9d zbF?!aSUPw*nfE}Nq^3cn9r4~H;=*zf?vyIspa~pv3f9%HVq~aFTa8J>MQumsl%CLb zyk(3H4gT4$%!x*zold4E)6C1WBwi0s&m|IPv^nV&m<91_1@%?&8HW}*yxJ5Ne}Gv4 zLPmTU|6DllfpTBK|9kPj0*%j)+p_2J{}}!^A`Q&rwrB%PJP~e20ivkHm2t@STc?6L1bxws2&h97`7P*W5ABxD{q=GMhNS%iIe;>vTuB{)WeiMu5B zd*pr%u&3nq!y&%1W*+xgTz)jvYdhX2c7y#%jCwTW*D3tsvX6QP@-N?<=J+Emd#*V4 zX;Tr$zc08t`dx9dI`A*^SpB8t1Soyb=ymj;*Z#dZRWPp80Ln@rqu* zK?@)*D#-`D8B{7;^kF8$F92}xxgV`Rfze%`7)A!TO|As|I`YA9&~jHqU$MaNl?5v4 z-X6zNyHED|909QB$5cpe|1f0!<0}_f;k%F@(73F%jk71Y9RiQAI4ehfLVTr(<0c`` z$$!hD{CO27weR#Dv95m@=77D% zg@0@}sc(?^853N$Bb-4sf_8?nDBxVOG2vnHX8CO@VzHfU7v0$ZA(n@@!nY5{^-2l#__c>d5_n<56tt9)@0T zm6F4dwp}i=(=cI67PVpnjNsOS(y<2oj!LttYXRs%)2Xu{&RBDDuffql1J0K}`vE8S z8gS2R%b4AWwW-+;9<|_6O{uL|*?bnCHmbFhRH`cL*IG(|z*2(sY(ua?iCS{DD^XjZM4eP( zy;P!wwkfa`Oo650VOFD&AB(9obO5@v=;l_SW_ zuNDES0I-$kUKs6C@J0-LH<#!mMbj{Bo~u*ZU<+y@cwi-1+-;KWWh0vjq?cDJN8 zsYt6u6>YtXwn5Ue{WJWz+@fgdV+Y4jR`l!dwj&?`r9f2I(Dk zXRKeNl*0~+l>Bg4UQVM!PLqlyATgFYfNjbVVr!J~?8mbXh#@vzq{3)bfC@A^6ljzR zv>6I4mI^SLF^%z?VWo_;*y3a6@+Fd%y)|k1W>B8ibdfSJzv!dGD0w$Gfp!SGv2AxC zJpE!E$K2Sqn`Kon$O6->3`+wofZLyHQ)mIySfUwGX`t~gsoHLcl3UP3GlEh^Op{dV zMk6Najbq;LSScG;(8PM| zbKr%ftFTxhiN(gb6qPA#{>z|4vF6WlR0J9;y_Z9o2$G8GsQ7g|6=6aq*X7XL>ZpY5 zsFqu8hAkBcR!gM^;X1Zd@X4aPsl#PcRy^r zD{P{Po)^tN666LtZlY243Ww|!QuaDS_DU(6#$eN0Y01v#y-iUztvXyYo0h7>O38;q zMLNC(e0GC_YGy6qV@5Qy`YI@e_OqMN z7Yvaxb4_BHyj1XU5Xm8A6C#@E`G^KTB+Q^fHzV4P-VJmsw8tL+_jaH+kh>Y$a)=6N zE&^&{d^h7uu2l3G8t#qgmh4G}nKuIWs|vN*D%8OgZdOQ)LL{QdO6gT9=is{~9J!c{ zPBsKo7?1M|gF4%&a#9N`=fII86_92dnP1wZ%Ixbg7=wap8;4Nj8}{LCxL}PiI5=yQ zK4J2I0gG_2U^bV)O^Y>nCU-4Hn%uV_`Mh0x1~*c(;{Axa8*z8}q_`9NZQ=p(qBsZG zJ>m|u$!~~{qH>(@Y7p$4)T}N7YcB%pC<5y&0_!RQTT=wq?SL`jG{7QouyPkd+K4}n zI5o>%Dp3YtNj~YGxBN{}MrF{L)G%ptQWH|yT9Ynd?qGwW3p%4Tv}PHaj#>>Nx5}2Z z8u$m$=obKHPE)8NO%s9fm5N+Pi5R(l6Ruei75x~L=|HHo8lf_bjvyNt#+VR$v?c6O zqii4qXJWtyC(G@z4@$a#qGMY@*C6S*N28K-*eklAN%w(gC0$U_vC%9QK}p9wT9>55 zp0#Y3wqQ77fNOI2>LcLLt-?_nDH~ElPuhTwW~$*NZNN~7z%I%L9X1=Zv(9F#iarC5 zov;CVGMENBVdj>V{auHx#!mqYW>HwIaTv8UunOyk@vy>T{lch|?1wvKZKx$5_{@)b z7@QI03#DbJP0LPdWoSA7=r^ZpNcmv*d^(ijreTG z0ZcUWavAIqyThNni_cw~qh9E^mxx+Swau4pc-os(6wT+?CV(P&40%#Js_n|CA zxq!kI%a5Q?(#s&?(5)WD@lol^!uQ1|Qpt33Dx=+S`-Gm5I*42D`gtF|FCGOy@u@G> zKHc%WkMUg2=2d_Hx4-u73maC}pMMDT@B77*tp7_WT*PAiM^QMDKZ3$#w*(4D@w{`- zy4``odcOyStFHH=&>r`r&_2(g*zG~sp7_*EpN2VQRQv^=#uoJ>upPwILmEd`Om|6}tZGX#(z*fOx3E;(cspWV#Id4KJnnu<^8%5B;} zfB&FnX&^Js&iK^S7TIe^&u%zC-8O6yKfCs+cPfTg_HVoed>B6S$@EzaZja+0b)_2@ cbL?5M9D25&R5^%c;#4ZXOtxjC{VvP@KN&5jAOHXW literal 0 HcmV?d00001 diff --git a/Additional_Implementations/skein_MSC_v9_perf.txt b/Additional_Implementations/skein_MSC_v9_perf.txt new file mode 100644 index 000000000000..9e8f125a45c5 --- /dev/null +++ b/Additional_Implementations/skein_MSC_v9_perf.txt @@ -0,0 +1,129 @@ +File STDIN: + 1_ || 2802.00 2814.00 | 5952.00 5952.00 | 30606.00 30606.00 | //: 32-bit, MSC_v9.00 [ C =...] + 10_ || 278.40 278.40 | 593.40 593.40 | 3063.00 3063.00 | //: 32-bit, MSC_v9.00 [ C =...] + 100_ || 65.52 65.58 | 88.02 88.08 | 306.30 306.30 | //: 32-bit, MSC_v9.00 [ C =...] + 1000_ || 41.26 41.41 | 47.96 47.96 | 135.28 135.29 | //: 32-bit, MSC_v9.00 [ C =...] + 10000_ || 38.86 39.08 | 44.13 44.21 | 119.88 120.11 | //: 32-bit, MSC_v9.00 [ C =...] + 100000_ || 38.85 39.09 | 43.56 43.77 | 105.79 114.18 | //: 32-bit, MSC_v9.00 [ C =...] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =...] + Block || 10192 bytes | 22960 bytes | 53072 bytes | //: 32-bit, MSC_v9.00 [ C =...] + 1_ || 780.00 786.00 | 1110.00 1110.00 | 3288.00 3318.00 | //: 64-bit, MSC_v9.00 [ C =...] + 10_ || 78.60 79.80 | 109.80 109.80 | 331.20 331.80 | //: 64-bit, MSC_v9.00 [ C =...] + 100_ || 16.74 16.80 | 15.54 15.54 | 33.30 33.30 | //: 64-bit, MSC_v9.00 [ C =...] + 1000_ || 9.88 10.67 | 7.38 7.38 | 14.16 14.17 | //: 64-bit, MSC_v9.00 [ C =...] + 10000_ || 9.21 9.22 | 6.60 6.60 | 12.27 12.39 | //: 64-bit, MSC_v9.00 [ C =...] + 100000_ || 9.98 10.01 | 7.04 7.08 | 12.36 13.14 | //: 64-bit, MSC_v9.00 [ C =...] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =...] + Block || 2272 bytes | 4944 bytes | 15264 bytes | //: 64-bit, MSC_v9.00 [ C =...] + 1_ || 2484.00 2490.00 | 4830.00 4836.00 | 22182.00 22188.00 | //: 32-bit, MSC_v9.00 [asm=...] + 10_ || 250.20 252.00 | 485.40 488.40 | 1936.80 1959.00 | //: 32-bit, MSC_v9.00 [asm=...] + 100_ || 58.62 58.68 | 70.74 70.80 | 221.76 221.76 | //: 32-bit, MSC_v9.00 [asm=...] + 1000_ || 34.12 34.16 | 35.44 35.44 | 85.27 85.31 | //: 32-bit, MSC_v9.00 [asm=...] + 10000_ || 34.78 34.98 | 35.36 35.36 | 86.31 86.35 | //: 32-bit, MSC_v9.00 [asm=...] + 100000_ || 32.96 33.40 | 33.29 33.60 | 75.79 76.81 | //: 32-bit, MSC_v9.00 [asm=...] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=...] + Block || 7588 bytes | 16636 bytes | 38262 bytes | //: 32-bit, MSC_v9.00 [asm=...] + 1_ || 672.00 672.00 | 1068.00 1068.00 | 1920.00 1926.00 | //: 64-bit, MSC_v9.00 [asm=...] + 10_ || 64.80 65.40 | 107.40 108.00 | 192.00 192.60 | //: 64-bit, MSC_v9.00 [asm=...] + 100_ || 15.54 15.60 | 16.20 16.26 | 21.06 21.06 | //: 64-bit, MSC_v9.00 [asm=...] + 1000_ || 8.18 8.18 | 6.97 6.97 | 7.77 7.78 | //: 64-bit, MSC_v9.00 [asm=...] + 10000_ || 7.59 7.59 | 6.23 6.23 | 6.69 6.69 | //: 64-bit, MSC_v9.00 [asm=...] + 100000_ || 7.55 7.71 | 6.14 6.38 | 6.56 6.86 | //: 64-bit, MSC_v9.00 [asm=...] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=...] + Block || 2323 bytes | 4733 bytes | 11817 bytes | //: 64-bit, MSC_v9.00 [asm=...] + 1_ || 2952.00 2958.00 | 6030.00 6036.00 | 13668.00 13674.00 | //: 32-bit, MSC_v9.00 [ C =111] + 10_ || 295.80 295.80 | 603.00 603.60 | 1366.80 1366.80 | //: 32-bit, MSC_v9.00 [ C =111] + 100_ || 69.96 70.02 | 88.98 89.04 | 136.92 137.52 | //: 32-bit, MSC_v9.00 [ C =111] + 1000_ || 43.90 43.96 | 48.78 48.85 | 60.08 60.11 | //: 32-bit, MSC_v9.00 [ C =111] + 10000_ || 41.53 41.59 | 44.76 44.80 | 53.01 53.01 | //: 32-bit, MSC_v9.00 [ C =111] + 100000_ || 41.32 41.60 | 44.52 44.62 | 51.75 51.92 | //: 32-bit, MSC_v9.00 [ C =111] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =111] + Block || 1712 bytes | 3664 bytes | 7200 bytes | //: 32-bit, MSC_v9.00 [ C =111] + 1_ || 780.00 786.00 | 1422.00 1434.00 | 3810.00 3816.00 | //: 64-bit, MSC_v9.00 [ C =111] + 10_ || 75.60 76.20 | 140.40 140.40 | 380.40 381.00 | //: 64-bit, MSC_v9.00 [ C =111] + 100_ || 17.16 17.22 | 20.52 21.00 | 38.22 38.28 | //: 64-bit, MSC_v9.00 [ C =111] + 1000_ || 9.69 9.69 | 10.42 10.42 | 16.51 16.51 | //: 64-bit, MSC_v9.00 [ C =111] + 10000_ || 8.97 8.97 | 9.38 9.38 | 14.38 14.40 | //: 64-bit, MSC_v9.00 [ C =111] + 100000_ || 9.18 9.71 | 9.35 9.49 | 14.79 14.99 | //: 64-bit, MSC_v9.00 [ C =111] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =111] + Block || 704 bytes | 1456 bytes | 2976 bytes | //: 64-bit, MSC_v9.00 [ C =111] + 1_ || 2580.00 2598.00 | 4842.00 4848.00 | 10578.00 10602.00 | //: 32-bit, MSC_v9.00 [asm=111] + 10_ || 259.80 259.80 | 484.20 484.20 | 1059.60 1060.20 | //: 32-bit, MSC_v9.00 [asm=111] + 100_ || 57.18 57.24 | 66.42 66.48 | 98.40 98.46 | //: 32-bit, MSC_v9.00 [asm=111] + 1000_ || 35.56 35.59 | 35.96 35.96 | 42.79 42.80 | //: 32-bit, MSC_v9.00 [asm=111] + 10000_ || 33.69 36.50 | 33.29 33.42 | 37.98 41.34 | //: 32-bit, MSC_v9.00 [asm=111] + 100000_ || 33.96 34.57 | 33.93 35.69 | 38.04 38.20 | //: 32-bit, MSC_v9.00 [asm=111] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=111] + Block || 1276 bytes | 2532 bytes | 4983 bytes | //: 32-bit, MSC_v9.00 [asm=111] + 1_ || 678.00 678.00 | 1098.00 1098.00 | 2034.00 2040.00 | //: 64-bit, MSC_v9.00 [asm=111] + 10_ || 66.60 66.60 | 109.80 109.80 | 204.00 204.00 | //: 64-bit, MSC_v9.00 [asm=111] + 100_ || 15.48 16.68 | 16.98 16.98 | 22.38 22.38 | //: 64-bit, MSC_v9.00 [asm=111] + 1000_ || 8.45 8.45 | 7.93 7.93 | 8.39 8.39 | //: 64-bit, MSC_v9.00 [asm=111] + 10000_ || 7.81 7.81 | 6.50 6.50 | 7.18 7.18 | //: 64-bit, MSC_v9.00 [asm=111] + 100000_ || 8.08 8.09 | 6.40 6.71 | 6.98 7.21 | //: 64-bit, MSC_v9.00 [asm=111] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=111] + Block || 664 bytes | 1074 bytes | 2221 bytes | //: 64-bit, MSC_v9.00 [asm=111] + 1_ || 2988.00 2994.00 | 6240.00 6246.00 | 13794.00 13800.00 | //: 32-bit, MSC_v9.00 [ C =332] + 10_ || 297.60 299.40 | 623.40 624.00 | 1379.40 1380.00 | //: 32-bit, MSC_v9.00 [ C =332] + 100_ || 70.26 70.32 | 91.92 91.92 | 138.00 138.06 | //: 32-bit, MSC_v9.00 [ C =332] + 1000_ || 44.88 44.89 | 50.20 50.20 | 60.44 60.45 | //: 32-bit, MSC_v9.00 [ C =332] + 10000_ || 42.42 42.42 | 46.30 46.31 | 53.29 53.31 | //: 32-bit, MSC_v9.00 [ C =332] + 100000_ || 42.21 42.50 | 43.60 45.77 | 49.55 50.03 | //: 32-bit, MSC_v9.00 [ C =332] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =332] + Block || 4560 bytes | 9232 bytes | 12560 bytes | //: 32-bit, MSC_v9.00 [ C =332] + 1_ || 780.00 798.00 | 1920.00 1920.00 | 3732.00 3732.00 | //: 64-bit, MSC_v9.00 [ C =332] + 10_ || 76.80 76.80 | 189.00 191.40 | 402.60 402.60 | //: 64-bit, MSC_v9.00 [ C =332] + 100_ || 17.10 17.16 | 27.66 27.90 | 37.62 37.62 | //: 64-bit, MSC_v9.00 [ C =332] + 1000_ || 9.98 10.12 | 14.23 14.25 | 16.13 16.13 | //: 64-bit, MSC_v9.00 [ C =332] + 10000_ || 9.27 9.28 | 12.89 12.99 | 13.98 13.98 | //: 64-bit, MSC_v9.00 [ C =332] + 100000_ || 9.32 9.56 | 13.12 13.19 | 14.15 14.23 | //: 64-bit, MSC_v9.00 [ C =332] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =332] + Block || 1200 bytes | 2928 bytes | 5008 bytes | //: 64-bit, MSC_v9.00 [ C =332] + 1_ || 2598.00 2604.00 | 4866.00 4878.00 | 10614.00 10632.00 | //: 32-bit, MSC_v9.00 [asm=332] + 10_ || 260.40 261.00 | 490.20 490.20 | 1067.40 1067.40 | //: 32-bit, MSC_v9.00 [asm=332] + 100_ || 60.78 60.78 | 72.00 72.00 | 106.86 106.92 | //: 32-bit, MSC_v9.00 [asm=332] + 1000_ || 38.38 38.42 | 39.17 39.19 | 46.49 46.61 | //: 32-bit, MSC_v9.00 [asm=332] + 10000_ || 40.98 47.69 | 35.81 35.86 | 40.96 43.93 | //: 32-bit, MSC_v9.00 [asm=332] + 100000_ || 34.46 36.34 | 34.07 37.16 | 39.60 43.18 | //: 32-bit, MSC_v9.00 [asm=332] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=332] + Block || 3060 bytes | 6300 bytes | 8835 bytes | //: 32-bit, MSC_v9.00 [asm=332] + 1_ || 684.00 690.00 | 1104.00 1104.00 | 2028.00 2034.00 | //: 64-bit, MSC_v9.00 [asm=332] + 10_ || 70.80 70.80 | 120.00 120.00 | 219.00 219.00 | //: 64-bit, MSC_v9.00 [asm=332] + 100_ || 15.72 15.72 | 16.74 16.74 | 22.20 22.20 | //: 64-bit, MSC_v9.00 [asm=332] + 1000_ || 8.42 8.42 | 7.22 7.22 | 8.30 8.30 | //: 64-bit, MSC_v9.00 [asm=332] + 10000_ || 7.85 8.51 | 6.58 6.58 | 7.11 7.12 | //: 64-bit, MSC_v9.00 [asm=332] + 100000_ || 7.80 9.43 | 6.90 7.71 | 7.18 8.48 | //: 64-bit, MSC_v9.00 [asm=332] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=332] + Block || 1288 bytes | 2182 bytes | 3449 bytes | //: 64-bit, MSC_v9.00 [asm=332] + 1_ || 2994.00 2994.00 | 6240.00 6240.00 | 14598.00 14604.00 | //: 32-bit, MSC_v9.00 [ C =335] + 10_ || 300.60 301.20 | 624.00 624.60 | 1459.20 1461.00 | //: 32-bit, MSC_v9.00 [ C =335] + 100_ || 70.62 70.68 | 91.86 91.92 | 146.10 146.16 | //: 32-bit, MSC_v9.00 [ C =335] + 1000_ || 44.65 44.65 | 50.20 50.20 | 62.74 62.76 | //: 32-bit, MSC_v9.00 [ C =335] + 10000_ || 42.16 42.42 | 46.31 46.73 | 55.11 55.13 | //: 32-bit, MSC_v9.00 [ C =335] + 100000_ || 40.09 40.55 | 45.76 45.97 | 51.00 53.08 | //: 32-bit, MSC_v9.00 [ C =335] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =335] + Block || 4560 bytes | 9232 bytes | 29280 bytes | //: 32-bit, MSC_v9.00 [ C =335] + 1_ || 780.00 798.00 | 1890.00 1920.00 | 3498.00 3498.00 | //: 64-bit, MSC_v9.00 [ C =335] + 10_ || 77.40 78.00 | 190.80 195.00 | 350.40 379.20 | //: 64-bit, MSC_v9.00 [ C =335] + 100_ || 17.10 17.10 | 27.72 28.08 | 35.28 35.28 | //: 64-bit, MSC_v9.00 [ C =335] + 1000_ || 9.95 10.00 | 14.23 14.24 | 15.09 15.10 | //: 64-bit, MSC_v9.00 [ C =335] + 10000_ || 9.30 10.06 | 12.94 14.10 | 13.07 14.36 | //: 64-bit, MSC_v9.00 [ C =335] + 100000_ || 9.33 9.58 | 13.94 13.95 | 13.24 13.92 | //: 64-bit, MSC_v9.00 [ C =335] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =335] + Block || 1200 bytes | 2928 bytes | 10880 bytes | //: 64-bit, MSC_v9.00 [ C =335] + 1_ || 2586.00 2592.00 | 4896.00 4902.00 | 10668.00 10668.00 | //: 32-bit, MSC_v9.00 [asm=335] + 10_ || 263.40 263.40 | 489.60 489.60 | 1069.20 1069.80 | //: 32-bit, MSC_v9.00 [asm=335] + 100_ || 61.08 61.14 | 72.30 72.36 | 107.04 107.10 | //: 32-bit, MSC_v9.00 [asm=335] + 1000_ || 35.57 35.57 | 36.11 36.12 | 43.07 43.12 | //: 32-bit, MSC_v9.00 [asm=335] + 10000_ || 33.68 34.51 | 33.29 36.32 | 37.91 39.80 | //: 32-bit, MSC_v9.00 [asm=335] + 100000_ || 36.32 36.43 | 35.91 35.98 | 38.02 38.19 | //: 32-bit, MSC_v9.00 [asm=335] + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=335] + Block || 3060 bytes | 6300 bytes | 20391 bytes | //: 32-bit, MSC_v9.00 [asm=335] + 1_ || 684.00 690.00 | 1104.00 1104.00 | 2022.00 2022.00 | //: 64-bit, MSC_v9.00 [asm=335] + 10_ || 65.40 65.40 | 109.80 109.80 | 201.60 202.20 | //: 64-bit, MSC_v9.00 [asm=335] + 100_ || 15.78 15.78 | 16.80 16.80 | 22.02 22.08 | //: 64-bit, MSC_v9.00 [asm=335] + 1000_ || 8.41 8.42 | 7.21 7.22 | 8.24 8.26 | //: 64-bit, MSC_v9.00 [asm=335] + 10000_ || 7.84 7.84 | 6.45 6.50 | 7.12 7.12 | //: 64-bit, MSC_v9.00 [asm=335] + 100000_ || 8.11 8.11 | 6.49 6.74 | 6.95 7.26 | //: 64-bit, MSC_v9.00 [asm=335] + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=335] + Block || 1288 bytes | 2182 bytes | 7133 bytes | //: 64-bit, MSC_v9.00 [asm=335] diff --git a/Additional_Implementations/skein_block_x64.asm b/Additional_Implementations/skein_block_x64.asm new file mode 100644 index 000000000000..b5221ae423ad --- /dev/null +++ b/Additional_Implementations/skein_block_x64.asm @@ -0,0 +1,1335 @@ +; +;---------------------------------------------------------------- +; 64-bit x86 assembler code (Microsoft ML64) for Skein block functions +; +; Author: Doug Whiting, Hifn +; +; This code is released to the public domain. +;---------------------------------------------------------------- +; + .code +; +_MASK_ALL_ equ (256+512+1024) ;all three algorithm bits +_MAX_FRAME_ equ 240 +; +;;;;;;;;;;;;;;;;; +ifndef SKEIN_USE_ASM +_USE_ASM_ = _MASK_ALL_ +elseif SKEIN_USE_ASM and _MASK_ALL_ +_USE_ASM_ = SKEIN_USE_ASM +else +_USE_ASM_ = _MASK_ALL_ +endif +;;;;;;;;;;;;;;;;; +ifndef SKEIN_LOOP ;configure loop unrolling +_SKEIN_LOOP = 0 ;default is all fully unrolled +else +_SKEIN_LOOP = SKEIN_LOOP +endif +; the unroll counts (0 --> fully unrolled) +SKEIN_UNROLL_256 = (_SKEIN_LOOP / 100) mod 10 +SKEIN_UNROLL_512 = (_SKEIN_LOOP / 10) mod 10 +SKEIN_UNROLL_1024 = (_SKEIN_LOOP ) mod 10 +; +SKEIN_ASM_UNROLL = 0 + irp _NN_,<256,512,1024> + if (SKEIN_UNROLL_&_NN_) eq 0 +SKEIN_ASM_UNROLL = SKEIN_ASM_UNROLL + _NN_ + endif + endm +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_ROUNDS +ROUNDS_256 = 72 +ROUNDS_512 = 72 +ROUNDS_1024 = 80 +else +ROUNDS_256 = 8*((((SKEIN_ROUNDS / 100) + 5) mod 10) + 5) +ROUNDS_512 = 8*((((SKEIN_ROUNDS / 10) + 5) mod 10) + 5) +ROUNDS_1024 = 8*((((SKEIN_ROUNDS ) + 5) mod 10) + 5) +endif +; +irp _NN_,<256,512,1024> + if _USE_ASM_ and _NN_ + irp _RR_,<%(ROUNDS_&_NN_)> + if _NN_ eq 1024 +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + else +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + endif + endm + endif +endm +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_CODE_SIZE +ifdef SKEIN_PERF +SKEIN_CODE_SIZE equ (1) +endif +endif +; +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_DEBUG +_SKEIN_DEBUG = 0 +else +_SKEIN_DEBUG = 1 +endif +;;;;;;;;;;;;;;;;; +; +; define offsets of fields in hash context structure +; +HASH_BITS = 0 ;# bits of hash output +BCNT = 8 + HASH_BITS ;number of bytes in BUFFER[] +TWEAK = 8 + BCNT ;tweak values[0..1] +X_VARS = 16 + TWEAK ;chaining vars +; +;(Note: buffer[] in context structure is NOT needed here :-) +; +r08 equ +r09 equ +; +KW_PARITY = 01BD11BDAA9FC1A22h ;overall parity of key schedule words +FIRST_MASK = NOT (1 SHL 62) +; +; rotation constants for Skein +; +RC_256_0_0 = 14 +RC_256_0_1 = 16 + +RC_256_1_0 = 52 +RC_256_1_1 = 57 + +RC_256_2_0 = 23 +RC_256_2_1 = 40 + +RC_256_3_0 = 5 +RC_256_3_1 = 37 + +RC_256_4_0 = 25 +RC_256_4_1 = 33 + +RC_256_5_0 = 46 +RC_256_5_1 = 12 + +RC_256_6_0 = 58 +RC_256_6_1 = 22 + +RC_256_7_0 = 32 +RC_256_7_1 = 32 + +RC_512_0_0 = 46 +RC_512_0_1 = 36 +RC_512_0_2 = 19 +RC_512_0_3 = 37 + +RC_512_1_0 = 33 +RC_512_1_1 = 27 +RC_512_1_2 = 14 +RC_512_1_3 = 42 + +RC_512_2_0 = 17 +RC_512_2_1 = 49 +RC_512_2_2 = 36 +RC_512_2_3 = 39 + +RC_512_3_0 = 44 +RC_512_3_1 = 9 +RC_512_3_2 = 54 +RC_512_3_3 = 56 + +RC_512_4_0 = 39 +RC_512_4_1 = 30 +RC_512_4_2 = 34 +RC_512_4_3 = 24 + +RC_512_5_0 = 13 +RC_512_5_1 = 50 +RC_512_5_2 = 10 +RC_512_5_3 = 17 + +RC_512_6_0 = 25 +RC_512_6_1 = 29 +RC_512_6_2 = 39 +RC_512_6_3 = 43 + +RC_512_7_0 = 8 +RC_512_7_1 = 35 +RC_512_7_2 = 56 +RC_512_7_3 = 22 + +RC_1024_0_0 = 24 +RC_1024_0_1 = 13 +RC_1024_0_2 = 8 +RC_1024_0_3 = 47 +RC_1024_0_4 = 8 +RC_1024_0_5 = 17 +RC_1024_0_6 = 22 +RC_1024_0_7 = 37 + +RC_1024_1_0 = 38 +RC_1024_1_1 = 19 +RC_1024_1_2 = 10 +RC_1024_1_3 = 55 +RC_1024_1_4 = 49 +RC_1024_1_5 = 18 +RC_1024_1_6 = 23 +RC_1024_1_7 = 52 + +RC_1024_2_0 = 33 +RC_1024_2_1 = 4 +RC_1024_2_2 = 51 +RC_1024_2_3 = 13 +RC_1024_2_4 = 34 +RC_1024_2_5 = 41 +RC_1024_2_6 = 59 +RC_1024_2_7 = 17 + +RC_1024_3_0 = 5 +RC_1024_3_1 = 20 +RC_1024_3_2 = 48 +RC_1024_3_3 = 41 +RC_1024_3_4 = 47 +RC_1024_3_5 = 28 +RC_1024_3_6 = 16 +RC_1024_3_7 = 25 + +RC_1024_4_0 = 41 +RC_1024_4_1 = 9 +RC_1024_4_2 = 37 +RC_1024_4_3 = 31 +RC_1024_4_4 = 12 +RC_1024_4_5 = 47 +RC_1024_4_6 = 44 +RC_1024_4_7 = 30 + +RC_1024_5_0 = 16 +RC_1024_5_1 = 34 +RC_1024_5_2 = 56 +RC_1024_5_3 = 51 +RC_1024_5_4 = 4 +RC_1024_5_5 = 53 +RC_1024_5_6 = 42 +RC_1024_5_7 = 41 + +RC_1024_6_0 = 31 +RC_1024_6_1 = 44 +RC_1024_6_2 = 47 +RC_1024_6_3 = 46 +RC_1024_6_4 = 19 +RC_1024_6_5 = 42 +RC_1024_6_6 = 44 +RC_1024_6_7 = 25 + +RC_1024_7_0 = 9 +RC_1024_7_1 = 48 +RC_1024_7_2 = 35 +RC_1024_7_3 = 52 +RC_1024_7_4 = 23 +RC_1024_7_5 = 31 +RC_1024_7_6 = 37 +RC_1024_7_7 = 20 +; +; Input: reg +; Output: <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024 +; +RotL64 macro reg,BLK_SIZE,ROUND_NUM,MIX_NUM +_RCNT_ = ( RC_&BLK_SIZE&_&ROUND_NUM&_&MIX_NUM AND 63 ) + if _RCNT_ ;is there anything to do? + rol reg,_RCNT_ + endif +endm +; +;---------------------------------------------------------------- +; +; MACROS: define local vars and configure stack +; +;---------------------------------------------------------------- +; declare allocated space on the stack +StackVar macro localName,localSize +localName = _STK_OFFS_ +_STK_OFFS_ = _STK_OFFS_+(localSize) +endm ;StackVar +; +;---------------------------------------------------------------- +; +; MACRO: Configure stack frame, allocate local vars +; +Setup_Stack macro BLK_BITS,KS_CNT,NO_FRAME,debugCnt + WCNT = (BLK_BITS)/64 +; +_PushCnt_ = 0 ;save nonvolatile regs on stack + irp _reg_, + push _reg_ + .pushreg _reg_ ;pseudo-op push for exception handling +_PushCnt_ = _PushCnt_ + 1 ;track count to keep alignment + endm +; +_STK_OFFS_ = 0 ;starting offset from rsp + ;---- local variables ;<-- rsp + StackVar X_stk ,8*(WCNT) ;local context vars + StackVar ksTwk ,8*3 ;key schedule: tweak words + StackVar ksKey ,8*(WCNT)+8 ;key schedule: key words + if (SKEIN_ASM_UNROLL and (BLK_BITS)) eq 0 + StackVar ksRot ,16*(KS_CNT+0);leave space for "rotation" to happen + endif + StackVar Wcopy ,8*(WCNT) ;copy of input block + if _SKEIN_DEBUG + ifnb ;temp location for debug X[] info + StackVar xDebug_&BLK_BITS ,8*(debugCnt) + endif + endif + if ((8*_PushCnt_ + _STK_OFFS_) and 8) eq 0 + StackVar align16,8 ;keep 16-byte aligned (adjust for retAddr?) +tmpStk_&BLK_BITS = align16 ;use this + endif +LOCAL_SIZE = _STK_OFFS_ ;size of local vars + ;---- + StackVar savRegs,8*_PushCnt_ ;saved registers + StackVar retAddr,8 ;return address + ;---- caller parameters + StackVar ctxPtr ,8 ;context ptr + StackVar blkPtr ,8 ;pointer to block data + StackVar blkCnt ,8 ;number of full blocks to process + StackVar bitAdd ,8 ;bit count to add to tweak + ;---- caller's stack frame +; +; set up the stack frame pointer (rbp) +; +FRAME_OFFS = ksTwk + 128 ;allow short (negative) offset to ksTwk, kwKey + if FRAME_OFFS gt _STK_OFFS_ ;keep rbp in the "locals" range +FRAME_OFFS = _STK_OFFS_ + endif + if FRAME_OFFS gt _MAX_FRAME_ ;keep Microsoft .setframe happy +FRAME_OFFS = _MAX_FRAME_ + endif +; +ifdef SKEIN_ASM_INFO + if FRAME_OFFS+128 lt savRegs +%out +++ SKEIN_&BLK_BITS: Unable to reach all of Wcopy with short offset from rbp. + elseif FRAME_OFFS+128 lt Wcopy +%out +++ SKEIN_&BLK_BITS: Unable to reach end of Wcopy with short offset from rbp. + elseif FRAME_OFFS+128 lt _STK_OFFS_ +%out +++ SKEIN_&BLK_BITS: Unable to reach caller parms with short offset from rbp + endif +endif + ;put some useful defines in the .lst file (for grep) +__STK_LCL_SIZE_&BLK_BITS = LOCAL_SIZE +__STK_TOT_SIZE_&BLK_BITS = _STK_OFFS_ +__STK_FRM_OFFS_&BLK_BITS = FRAME_OFFS +; +; Notes on stack frame setup: +; * the most frequently used variable is X_stk[], based at [rsp+0] +; * the next most used is the key schedule arrays, ksKey and ksTwk +; so rbp is "centered" there, allowing short offsets to the key +; schedule even in 1024-bit Skein case +; * the Wcopy variables are infrequently accessed, but they have long +; offsets from both rsp and rbp only in the 1024-bit case. +; * all other local vars and calling parameters can be accessed +; with short offsets, except in the 1024-bit case +; + sub rsp,LOCAL_SIZE ;make room for the locals + .allocstack LOCAL_SIZE ;pseudo op for exception handling + lea rbp,[rsp+FRAME_OFFS] ;maximize use of short offsets + ifb + .setframe rbp, FRAME_OFFS ;pseudo op for exception handling + endif + mov [FP_+ctxPtr],rcx ;save caller's parameters on the stack + mov [FP_+blkPtr],rdx + mov [FP_+blkCnt],r08 + mov [FP_+bitAdd],r09 + .endprolog ;pseudo op to support exception handling + + mov rdi,[FP_+ctxPtr ] ;rdi --> context +; +endm ;Setup_Stack +; +FP_ equ ;keep as many short offsets as possible +; +;---------------------------------------------------------------- +; +Reset_Stack macro procStart + add rsp,LOCAL_SIZE ;get rid of locals (wipe??) + irp _reg_, + pop _reg_ +_PushCnt_ = _PushCnt_ - 1 + endm + if _PushCnt_ + .err "Mismatched push/pops?" + endif + + ;display code size in bytes to stdout + irp _BCNT_,<%($+1-procStart)> ;account for return opcode +_ProcBytes_ = _BCNT_ +if _BCNT_ ge 10000 +%out procStart code size = _BCNT_ bytes +elseif _BCNT_ ge 1000 +%out procStart code size = _BCNT_ bytes +else +%out procStart code size = _BCNT_ bytes +endif + endm ;irp _BCNT_ +endm ; Reset_Stack +; +;---------------------------------------------------------------- +; macros to help debug internals +; +if _SKEIN_DEBUG + extrn Skein_Show_Block:proc ;calls to C routines + extrn Skein_Show_Round:proc +; +SKEIN_RND_SPECIAL = 1000 +SKEIN_RND_KEY_INITIAL = SKEIN_RND_SPECIAL+0 +SKEIN_RND_KEY_INJECT = SKEIN_RND_SPECIAL+1 +SKEIN_RND_FEED_FWD = SKEIN_RND_SPECIAL+2 +; +Skein_Debug_Block macro BLK_BITS +; +;void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X, +; const u08b_t *blkPtr, const u64b_t *wPtr, +; const u64b_t *ksPtr,const u64b_t *tsPtr); +; + irp _reg_, + push _reg_ ;save all volatile regs on tack before the call + endm + ; get and push call parameters + lea rax,[FP_+ksTwk] ;tweak pointer + push rax + lea rax,[FP_+ksKey] ;key pointer + push rax + lea rax,[FP_+Wcopy] ;wPtr + push rax + mov r09,[FP_+blkPtr] ;blkPtr + push r09 ;(push register parameters anyway to make room on stack) + mov rdx,[FP_+ctxPtr] + lea r08,[rdx+X_VARS] ;X (pointer) + push r08 + push rdx ;h (pointer) + mov rcx, BLK_BITS ;bits + push rdx + call Skein_Show_Block ;call external debug handler + add rsp,7*8 ;discard parameters on stack + irp _reg_, + pop _reg_ ;restore regs + endm +endm ; Skein_Debug_Block +; +; +; the macro to "call" to debug a round +; +Skein_Debug_Round macro BLK_BITS,R,RDI_OFFS,afterOp + ; call the appropriate (local) debug function + push r08 + if (SKEIN_ASM_UNROLL and BLK_BITS) or (R ge SKEIN_RND_SPECIAL) + mov r08, R + else ;compute round number using edi +_rOffs_ = RDI_OFFS + 0 + if BLK_BITS eq 1024 + mov r08,[rsp+8+rIdx_offs] ;get rIdx off the stack (adjust for push r08) + lea r08,[4*r08+1+(((R)-1) and 3)+_rOffs_] + else + lea r08,[4*rdi+1+(((R)-1) and 3)+_rOffs_] + endif + endif + call Skein_Debug_Round_&BLK_BITS + pop r08 +; + afterOp +endm ; Skein_Debug_Round +else ;------- _SKEIN_DEBUG (dummy macros if debug not enabled) +Skein_Debug_Block macro BLK_BITS,afterOp +endm +; +Skein_Debug_Round macro BLK_BITS,R,RDI_OFFS,afterOp +endm +; +endif ; _SKEIN_DEBUG +; +;---------------------------------------------------------------- +; +addReg macro dstReg,srcReg_A,srcReg_B,useAddOp,immOffs + ifnb + lea dstReg,[srcReg_A&&srcReg_B + dstReg + immOffs] + elseif ((useAddOp + 0) eq 0) + ifndef ASM_NO_LEA + ;lea seems to be faster on Core 2 Duo CPUs! + lea dstReg,[srcReg_A&&srcReg_B + dstReg] + else + add dstReg, srcReg_A&&srcReg_B + endif + else + add dstReg, srcReg_A&&srcReg_B + endif +endm +; +;=================================== Skein_256 ============================================= +; +if _USE_ASM_ and 256 + public Skein_256_Process_Block +; +; void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; +; code +; +Skein_256_Process_Block proc frame + Setup_Stack 256,((ROUNDS_256/8)+1) + mov r14,[rdi+TWEAK+8] + jmp short Skein_256_block_loop + align 16 + ; main hash loop for Skein_256 +Skein_256_block_loop: + ; + ; general register usage: + ; RAX..RDX = X0..X3 + ; R08..R12 = ks[0..4] + ; R13..R15 = ts[0..2] + ; RSP, RBP = stack/frame pointers + ; RDI = round counter or context pointer + ; RSI = temp + ; + mov r13,[rdi+TWEAK+0] + add r13,[FP_+bitAdd] ;computed updated tweak value T0 + mov r15,r14 + xor r15,r13 ;now r13.r15 is set as the tweak + + mov r12,KW_PARITY + mov r08,[rdi+X_VARS+ 0] + mov r09,[rdi+X_VARS+ 8] + mov r10,[rdi+X_VARS+16] + mov r11,[rdi+X_VARS+24] + mov [rdi+TWEAK+0],r13 ;save updated tweak value ctx->h.T[0] + xor r12,r08 ;start accumulating overall parity + + mov rsi,[FP_+blkPtr ] ;esi --> input block + xor r12,r09 + mov rax,[rsi+ 0] ;get X[0..3] + xor r12,r10 + mov rbx,[rsi+ 8] + xor r12,r11 + mov rcx,[rsi+16] + mov rdx,[rsi+24] + + mov [FP_+Wcopy+ 0],rax ;save copy of input block + mov [FP_+Wcopy+ 8],rbx + mov [FP_+Wcopy+16],rcx + mov [FP_+Wcopy+24],rdx + + add rax, r08 ;initial key injection + add rbx, r09 + add rcx, r10 + add rdx, r11 + add rbx, r13 + add rcx, r14 + +if _SKEIN_DEBUG + mov [rdi+TWEAK+ 8],r14 ;save updated tweak T[1] (start bit cleared?) + mov [FP_+ksKey+ 0],r08 ;save key schedule on stack for Skein_Debug_Block + mov [FP_+ksKey+ 8],r09 + mov [FP_+ksKey+16],r10 + mov [FP_+ksKey+24],r11 + mov [FP_+ksKey+32],r12 + + mov [FP_+ksTwk+ 0],r13 + mov [FP_+ksTwk+ 8],r14 + mov [FP_+ksTwk+16],r15 + + mov [rsp+X_stk + 0],rax ;save X[] on stack for Skein_Debug_Block + mov [rsp+X_stk + 8],rbx + mov [rsp+X_stk +16],rcx + mov [rsp+X_stk +24],rdx + + Skein_Debug_Block 256 ;debug dump + Skein_Debug_Round 256,SKEIN_RND_KEY_INITIAL +endif +; +if ((SKEIN_ASM_UNROLL and 256) eq 0) + mov [FP_+ksKey+40],r08 ;save key schedule on stack for looping code + mov [FP_+ksKey+ 8],r09 + mov [FP_+ksKey+16],r10 + mov [FP_+ksKey+24],r11 + mov [FP_+ksKey+32],r12 + + mov [FP_+ksTwk+24],r13 + mov [FP_+ksTwk+ 8],r14 + mov [FP_+ksTwk+16],r15 +endif + add rsi, WCNT*8 ;skip the block + mov [FP_+blkPtr ],rsi ;update block pointer +; +opLoop macro op1,op2 + if (SKEIN_ASM_UNROLL and 256) eq 0 + op1 + else + op2 + endif +endm +; + ; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 256 +_UNROLL_CNT = ROUNDS_256/8 +else +_UNROLL_CNT = SKEIN_UNROLL_256 + if ((ROUNDS_256/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_256" + endif + xor rdi,rdi ;rdi = iteration count +Skein_256_round_loop: +endif +_Rbase_ = 0 +rept _UNROLL_CNT*2 + ; all X and ks vars in regs ; (ops to "rotate" ks vars, via mem, if not unrolled) + ; round 4*_RBase_ + 0 + addReg rax, rbx + RotL64 rbx, 256,%((4*_RBase_+0) and 7),0 + addReg rcx, rdx + opLoop + xor rbx, rax + RotL64 rdx, 256,%((4*_RBase_+0) and 7),1 + xor rdx, rcx + if SKEIN_ASM_UNROLL and 256 + irp _r0_,<%(08+(_Rbase_+3) mod 5)> + irp _r1_,<%(13+(_Rbase_+2) mod 3)> + lea rdi,[r&_r0_+r&_r1_] ;precompute key injection value for rcx + endm + endm + endif + opLoop + Skein_Debug_Round 256,%(4*_RBase_+1) + + ; round 4*_RBase_ + 1 + addReg rax, rdx + RotL64 rdx, 256,%((4*_RBase_+1) and 7),0 + xor rdx, rax + opLoop + addReg rcx, rbx + RotL64 rbx, 256,%((4*_RBase_+1) and 7),1 + xor rbx, rcx + opLoop + Skein_Debug_Round 256,%(4*_RBase_+2) + if SKEIN_ASM_UNROLL and 256 + irp _r0_,<%(08+(_Rbase_+2) mod 5)> + irp _r1_,<%(13+(_Rbase_+1) mod 3)> + lea rsi,[r&_r0_+r&_r1_] ;precompute key injection value for rbx + endm + endm + endif + ; round 4*_RBase_ + 2 + addReg rax, rbx + RotL64 rbx, 256,%((4*_RBase_+2) and 7),0 + addReg rcx, rdx + opLoop + xor rbx, rax + RotL64 rdx, 256,%((4*_RBase_+2) and 7),1 + xor rdx, rcx + opLoop ;"rotate" the key + opLoop ;precompute key + tweak + Skein_Debug_Round 256,%(4*_RBase_+3) + ; round 4*_RBase_ + 3 + addReg rax, rdx + RotL64 rdx, 256,%((4*_RBase_+3) and 7),0 + addReg rcx, rbx + opLoop ;precompute key + tweak + opLoop ;"rotate" the tweak + xor rdx, rax + RotL64 rbx, 256,%((4*_RBase_+3) and 7),1 + xor rbx, rcx + Skein_Debug_Round 256,%(4*_RBase_+4) + opLoop ;precompute key+tweak + ;inject key schedule words +_Rbase_ = _Rbase_+1 + if SKEIN_ASM_UNROLL and 256 + addReg rax,r,%(08+((_Rbase_+0) mod 5)) + addReg rbx,rsi + addReg rcx,rdi + addReg rdx,r,%(08+((_Rbase_+3) mod 5)),,_Rbase_ + else + inc rdi + addReg rax,r08 + addReg rcx,r10 + addReg rbx,r09 + addReg rdx,r11 + endif + Skein_Debug_Round 256,SKEIN_RND_KEY_INJECT +endm ;rept _UNROLL_CNT + +; +if (SKEIN_ASM_UNROLL and 256) eq 0 + cmp rdi,2*(ROUNDS_256/8) + jb Skein_256_round_loop +endif ; (SKEIN_ASM_UNROLL and 256) eq 0 + mov rdi,[FP_+ctxPtr ] ;restore edi --> context + + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..3} + xor rax,[FP_+Wcopy + 0] + mov r14,FIRST_MASK + xor rbx,[FP_+Wcopy + 8] + xor rcx,[FP_+Wcopy +16] + xor rdx,[FP_+Wcopy +24] + mov [rdi+X_VARS+ 0],rax ;store final result + and r14,[rdi+TWEAK + 8] + dec qword ptr [FP_+blkCnt] ;set zero flag + mov [rdi+X_VARS+ 8],rbx + mov [rdi+X_VARS+16],rcx + mov [rdi+X_VARS+24],rdx + + Skein_Debug_Round 256,SKEIN_RND_FEED_FWD,, + + ; go back for more blocks, if needed + jnz Skein_256_block_loop + mov [rdi+TWEAK + 8],r14 + Reset_Stack Skein_256_Process_Block + ret + + if _SKEIN_DEBUG +Skein_Debug_Round_256: + mov [FP_+X_stk+ 0],rax ;first, save X[] state on stack so debug routines can access it + mov [FP_+X_stk+ 8],rbx ;(use FP_ since rsp has changed!) + mov [FP_+X_stk+16],rcx + mov [FP_+X_stk+24],rdx + push rdx ;save two regs for BLK_BITS-specific parms + push rcx + mov rdx,[FP_+ctxPtr] ;ctx_hdr_ptr + mov rcx, 256 + jmp Skein_Debug_Round_Common + endif + +Skein_256_Process_Block endp +; +ifdef SKEIN_CODE_SIZE + public Skein_256_Process_Block_CodeSize +Skein_256_Process_Block_CodeSize proc + mov rax,_ProcBytes_ + ret +Skein_256_Process_Block_CodeSize endp +; + public Skein_256_Unroll_Cnt +Skein_256_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_256/8 + mov rax,_UNROLL_CNT + else + xor rax,rax + endif + ret +Skein_256_Unroll_Cnt endp +endif +; +endif ;_USE_ASM_ and 256 +; +;=================================== Skein_512 ============================================= +; +if _USE_ASM_ and 512 + public Skein_512_Process_Block +; +; void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +rX_512_0 equ r08 ;register assignments for X[] values during rounds +rX_512_1 equ r09 +rX_512_2 equ r10 +rX_512_3 equ r11 +rX_512_4 equ r12 +rX_512_5 equ r13 +rX_512_6 equ r14 +rX_512_7 equ r15 +; +;;;;;;;;;;;;;;;;; +; MACRO: one round for 512-bit blocks +; +R_512_OneRound macro r0,r1,r2,r3,r4,r5,r6,r7,_Rn_,op1,op2,op3,op4 +; + addReg rX_512_&r0, rX_512_&r1 + RotL64 rX_512_&r1, 512,%((_Rn_) and 7),0 + xor rX_512_&r1, rX_512_&r0 + op1 + addReg rX_512_&r2, rX_512_&r3 + RotL64 rX_512_&r3, 512,%((_Rn_) and 7),1 + xor rX_512_&r3, rX_512_&r2 + op2 + addReg rX_512_&r4, rX_512_&r5 + RotL64 rX_512_&r5, 512,%((_Rn_) and 7),2 + xor rX_512_&r5, rX_512_&r4 + op3 + addReg rX_512_&r6, rX_512_&r7 + RotL64 rX_512_&r7, 512,%((_Rn_) and 7),3 + xor rX_512_&r7, rX_512_&r6 + op4 + Skein_Debug_Round 512,%(_Rn_+1),-4 +; +endm ;R_512_OneRound +; +;;;;;;;;;;;;;;;;; +; MACRO: eight rounds for 512-bit blocks +; +R_512_FourRounds macro _RR_ ;RR = base round number (0 mod 8) + if SKEIN_ASM_UNROLL and 512 + ; here for fully unrolled case. + _II_ = ((_RR_)/4) + 1 ;key injection counter + R_512_OneRound 0,1,2,3,4,5,6,7,%((_RR_)+0),,, + R_512_OneRound 2,1,4,7,6,5,0,3,%((_RR_)+1),,, + R_512_OneRound 4,1,6,3,0,5,2,7,%((_RR_)+2),,, + R_512_OneRound 6,1,0,7,2,5,4,3,%((_RR_)+3),, + ; inject the key schedule + add r08,[FP_+ksKey+8*(((_II_)+0) mod 9)] + addReg r11,rax + add r09,[FP_+ksKey+8*(((_II_)+1) mod 9)] + addReg r12,rbx + add r10,[FP_+ksKey+8*(((_II_)+2) mod 9)] + addReg r13,rcx + addReg r14,rdx + addReg r15,rsi,,,(_II_) + else + ; here for looping case ;"rotate" key/tweak schedule (move up on stack) + inc rdi ;bump key injection counter + R_512_OneRound 0,1,2,3,4,5,6,7,%((_RR_)+0),, , + R_512_OneRound 2,1,4,7,6,5,0,3,%((_RR_)+1),,, + R_512_OneRound 4,1,6,3,0,5,2,7,%((_RR_)+2),, , + R_512_OneRound 6,1,0,7,2,5,4,3,%((_RR_)+3),, + ; inject the key schedule + add r08,[FP_+ksKey+8*rdi+8*0] + addReg r11,rax + addReg r12,rbx + add r09,[FP_+ksKey+8*rdi+8*1] + addReg r13,rcx + addReg r14,rdx + add r10,[FP_+ksKey+8*rdi+8*2] + addReg r15,rsi + addReg r15,rdi ;inject the round number + endif + ;show the result of the key injection + Skein_Debug_Round 512,SKEIN_RND_KEY_INJECT +endm ;R_512_EightRounds +; +;;;;;;;;;;;;;;;;; +; instantiated code +; +Skein_512_Process_Block proc frame + Setup_Stack 512,ROUNDS_512/8 + mov rbx,[rdi+TWEAK+ 8] + jmp short Skein_512_block_loop + align 16 + ; main hash loop for Skein_512 +Skein_512_block_loop: + ; general register usage: + ; RAX..RDX = temps for key schedule pre-loads + ; R08..R15 = X0..X7 + ; RSP, RBP = stack/frame pointers + ; RDI = round counter or context pointer + ; RSI = temp + ; + mov rax,[rdi+TWEAK+ 0] + add rax,[FP_+bitAdd] ;computed updated tweak value T0 + mov rcx,rbx + xor rcx,rax ;rax/rbx/rcx = tweak schedule + mov [rdi+TWEAK+ 0],rax ;save updated tweak value ctx->h.T[0] + mov [FP_+ksTwk+ 0],rax + mov rdx,KW_PARITY + mov rsi,[FP_+blkPtr ] ;rsi --> input block + mov [FP_+ksTwk+ 8],rbx + mov [FP_+ksTwk+16],rcx + + irp _Rn_,<0,1,2,3,4,5,6,7> + mov rX_512_&_Rn_,[rdi+X_VARS+8*(_Rn_)] + xor rdx,rX_512_&_Rn_ ;compute overall parity + mov [FP_+ksKey+8*(_Rn_)],rX_512_&_Rn_ + endm ;load state into r08..r15, compute parity + mov [FP_+ksKey+8*(8)],rdx ;save key schedule parity + + addReg rX_512_5,rax ;precompute key injection for tweak + addReg rX_512_6,rbx +if _SKEIN_DEBUG + mov [rdi+TWEAK+ 8],rbx ;save updated tweak value ctx->h.T[1] for Skein_Debug_Block below +endif + mov rax,[rsi+ 0] ;load input block + mov rbx,[rsi+ 8] + mov rcx,[rsi+16] + mov rdx,[rsi+24] + addReg r08,rax ;do initial key injection + addReg r09,rbx + mov [FP_+Wcopy+ 0],rax ;keep local copy for feedforward + mov [FP_+Wcopy+ 8],rbx + addReg r10,rcx + addReg r11,rdx + mov [FP_+Wcopy+16],rcx + mov [FP_+Wcopy+24],rdx + + mov rax,[rsi+32] + mov rbx,[rsi+40] + mov rcx,[rsi+48] + mov rdx,[rsi+56] + addReg r12,rax + addReg r13,rbx + addReg r14,rcx + addReg r15,rdx + mov [FP_+Wcopy+32],rax + mov [FP_+Wcopy+40],rbx + mov [FP_+Wcopy+48],rcx + mov [FP_+Wcopy+56],rdx + +if _SKEIN_DEBUG + irp _Rn_,<0,1,2,3,4,5,6,7> ;save values on stack for debug output + mov [rsp+X_stk+8*(_Rn_)],rX_512_&_Rn_ + endm + + Skein_Debug_Block 512 ;debug dump + Skein_Debug_Round 512,SKEIN_RND_KEY_INITIAL +endif + add rsi, 8*WCNT ;skip the block + mov [FP_+blkPtr ],rsi ;update block pointer + ; + ;;;;;;;;;;;;;;;;; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 512 +_UNROLL_CNT = ROUNDS_512/8 +else +_UNROLL_CNT = SKEIN_UNROLL_512 + if ((ROUNDS_512/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_512" + endif + xor rdi,rdi ;rdi = round counter +Skein_512_round_loop: +endif +; +_Rbase_ = 0 +rept _UNROLL_CNT*2 + R_512_FourRounds %(4*_Rbase_+00) +_Rbase_ = _Rbase_+1 +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 512) eq 0 + cmp rdi,2*(ROUNDS_512/8) + jb Skein_512_round_loop + mov rdi,[FP_+ctxPtr ] ;restore rdi --> context +endif + ; end of rounds + ;;;;;;;;;;;;;;;;; + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} + irp _Rn_,<0,1,2,3,4,5,6,7> + if (_Rn_ eq 0) + mov rbx,FIRST_MASK + endif + xor rX_512_&_Rn_,[FP_+Wcopy+8*(_Rn_)] ;feedforward XOR + mov [rdi+X_VARS+8*(_Rn_)],rX_512_&_Rn_ ;and store result + if (_Rn_ eq 6) + and rbx,[rdi+TWEAK+ 8] + endif + endm + Skein_Debug_Round 512,SKEIN_RND_FEED_FWD + + ; go back for more blocks, if needed + dec qword ptr [FP_+blkCnt] + jnz Skein_512_block_loop + mov [rdi+TWEAK + 8],rbx + + Reset_Stack Skein_512_Process_Block + ret +; + if _SKEIN_DEBUG +; call here with r08 = "round number" +Skein_Debug_Round_512: + push rdx ;save two regs for BLK_BITS-specific parms + push rcx + mov rcx,[rsp+24] ;get back original r08 (pushed on stack in macro call) + mov [FP_+X_stk],rcx ;and save it in X_stk + irp _Rn_,<1,2,3,4,5,6,7> ;save rest of X[] state on stack so debug routines can access it + mov [FP_+X_stk+8*(_Rn_)],rX_512_&_Rn_ + endm + mov rdx,[FP_+ctxPtr] ;ctx_hdr_ptr + mov rcx, 512 ;block size + jmp Skein_Debug_Round_Common + endif +; +Skein_512_Process_Block endp +; +ifdef SKEIN_CODE_SIZE + public Skein_512_Process_Block_CodeSize +Skein_512_Process_Block_CodeSize proc + mov rax,_ProcBytes_ + ret +Skein_512_Process_Block_CodeSize endp +; + public Skein_512_Unroll_Cnt +Skein_512_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_512/8 + mov rax,_UNROLL_CNT + else + xor rax,rax + endif + ret +Skein_512_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 512 +; +;=================================== Skein1024 ============================================= +if _USE_ASM_ and 1024 + public Skein1024_Process_Block +; +; void Skein1024_Process_Block(Skein_1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; use details of permutation to make register assignments +; +r1K_x0 equ rdi +r1K_x1 equ rsi +r1K_x2 equ rbp +r1K_x3 equ rax +r1K_x4 equ rcx ;"shared" with X6, since X4/X6 alternate +r1K_x5 equ rbx +r1K_x6 equ rcx +r1K_x7 equ rdx +r1K_x8 equ r08 +r1K_x9 equ r09 +r1K_xA equ r10 +r1K_xB equ r11 +r1K_xC equ r12 +r1K_xD equ r13 +r1K_xE equ r14 +r1K_xF equ r15 +; +rIdx equ r1K_x0 ;index register for looping versions +rIdx_offs equ tmpStk_1024 +; +R1024_Mix macro w0,w1,_RN0_,_Rn1_,op1 +_w0 = 0&w0&h ;handle the hex conversion +_w1 = 0&w1&h +_II_ = ((_RN0_)/4)+1 ;injection count + ; + addReg r1K_x&w0 , r1K_x&w1 ;perform the MIX + RotL64 r1K_x&w1 , 1024,%((_RN0_) and 7),_Rn1_ + xor r1K_x&w1 , r1K_x&w0 + if ((_RN0_) and 3) eq 3 ;time to do key injection? + if _SKEIN_DEBUG + mov [rsp+xDebug_1024+8*_w0],r1K_x&w0 ;save intermediate values for Debug_Round + mov [rsp+xDebug_1024+8*_w1],r1K_x&w1 ; (before inline key injection) + endif + if SKEIN_ASM_UNROLL and 1024 ;here to do fully unrolled key injection + add r1K_x&w0, [rsp+ksKey+ 8*((_II_+_w0) mod 17)] + add r1K_x&w1, [rsp+ksKey+ 8*((_II_+_w1) mod 17)] + if _w1 eq 13 ;tweak injection + add r1K_x&w1, [rsp+ksTwk+ 8*((_II_+0 ) mod 3)] + elseif _w0 eq 14 + add r1K_x&w0, [rsp+ksTwk+ 8*((_II_+1 ) mod 3)] + elseif _w1 eq 15 + add r1K_x&w1, _II_ ;(injection counter) + endif + else ;here to do looping key injection + if (_w0 eq 0) + mov [rsp+X_stk+8*_w0],r1K_x0 ;if so, store N0 so we can use reg as index + mov rIdx, [rsp+rIdx_offs] ;get the injection counter index into rIdx (N0) + else + add r1K_x&w0, [rsp+ksKey+8+8*rIdx+8*_w0] ;even key injection + endif + if _w1 eq 13 ;tweak injection + add r1K_x&w1, [rsp+ksTwk+8+8*rIdx+8*0 ] + elseif _w0 eq 14 + add r1K_x&w0, [rsp+ksTwk+8+8*rIdx+8*1 ] + elseif _w1 eq 15 + addReg r1K_x&w1, rIdx,,,1 ;(injection counter) + endif + add r1K_x&w1, [rsp+ksKey+8+8*rIdx+8*_w1] ;odd key injection + endif + endif + ; insert the op provided, if any + op1 +endm +;;;;;;;;;;;;;;;;; +; MACRO: one round for 1024-bit blocks +; +R1024_OneRound macro x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,xA,xB,xC,xD,xE,xF,_Rn_ + if (x0 ne 0) or ((x4 ne 4) and (x4 ne 6)) or (x4 ne (x6 xor 2)) + .err "faulty register assignment!" + endif + R1024_Mix x0,x1,_Rn_,0 + R1024_Mix x2,x3,_Rn_,1 + R1024_Mix x4,x5,_Rn_,2, ;save x4 on stack (x4/x6 alternate) + R1024_Mix x8,x9,_Rn_,4, ;load x6 from stack + R1024_Mix xA,xB,_Rn_,5 + R1024_Mix xC,xD,_Rn_,6 + R1024_Mix x6,x7,_Rn_,3 + R1024_Mix xE,xF,_Rn_,7 + if _SKEIN_DEBUG + Skein_Debug_Round 1024,%(_Rn_+1) + endif +endm ;R1024_OneRound +;;;;;;;;;;;;;;;;; +; MACRO: four rounds for 1024-bit blocks +; +R1024_FourRounds macro _RR_ ;RR = base round number (0 mod 4) + ; should be here with r1K_x4 set properly, x6 stored on stack + R1024_OneRound 0,1,2,3,4,5,6,7,8,9,A,B,C,D,E,F,%((_RR_)+0) + R1024_OneRound 0,9,2,D,6,B,4,F,A,7,C,3,E,5,8,1,%((_RR_)+1) + R1024_Oneround 0,7,2,5,4,3,6,1,C,F,E,D,8,B,A,9,%((_RR_)+2) + R1024_Oneround 0,F,2,B,6,D,4,9,E,1,8,5,A,3,C,7,%((_RR_)+3) + if (SKEIN_ASM_UNROLL and 1024) eq 0 ;here with r1K_x0 == rIdx, X0 on stack + ;rotate the key schedule on the stack + mov [rsp+X_stk+ 8* 8],r1K_x8;free up a reg + mov r1K_x8,[rsp+ksKey+8*rIdx+8* 0] ;get key + mov [rsp+ksKey+8*rIdx+8*17],r1K_x8 ;rotate it (must do key first or tweak clobbers it!) + mov r1K_x8,[rsp+ksTwk+8*rIdx+8* 0] ;get tweak + mov [rsp+ksTwk+8*rIdx+8* 3],r1K_x8 ;rotate it + mov r1K_x8,[rsp+X_stk+ 8* 8] ;get the reg back + inc rIdx ;bump the index + mov [rsp+rIdx_offs],rIdx ;save it + mov r1K_x0,[rsp+ksKey+8*rIdx] ;get the key schedule word for X0 + add r1K_x0,[rsp+X_stk+8*0] ;perform the X0 key injection + endif + ;show the result of the key injection + Skein_Debug_Round 1024,SKEIN_RND_KEY_INJECT +endm ;R1024_FourRounds +; +;;;;;;;;;;;;;;;; +; code +; +Skein1024_Process_Block proc frame +; + Setup_Stack 1024,ROUNDS_1024/8,NO_FRAME, + mov r09,[rdi+TWEAK+ 8] + jmp short Skein1024_block_loop + align 16 + ; main hash loop for Skein1024 +Skein1024_block_loop: + ; general register usage: + ; RSP = stack pointer + ; RAX..RDX,RSI,RDI= X1, X3..X7 (state words) + ; R08..R15 = X8..X15 (state words) + ; RBP = temp (used for X0 and X2) + ; + if (SKEIN_ASM_UNROLL and 1024) eq 0 + xor rax,rax ;init loop index on the stack + mov [rsp+rIdx_offs],rax + endif + mov r08,[rdi+TWEAK+ 0] + add r08,[FP_+bitAdd] ;computed updated tweak value T0 + mov r10,r09 + xor r10,r08 ;rax/rbx/rcx = tweak schedule + mov [rdi+TWEAK+ 0],r08 ;save updated tweak value ctx->h.T[0] + mov [FP_+ksTwk+ 0],r08 + mov [FP_+ksTwk+ 8],r09 ;keep values in r08,r09 for initial tweak injection below + mov [FP_+ksTwk+16],r10 + if _SKEIN_DEBUG + mov [rdi+TWEAK+ 8],r09 ;save updated tweak value ctx->h.T[1] for Skein_Debug_Block + endif + mov rsi ,[FP_+blkPtr ] ;r1K_x2 --> input block + mov rax , KW_PARITY ;overall key schedule parity + + ; logic here assumes the set {rdi,rsi,rbp,rax} = r1K_x{0,1,2,3} + + irp _rN_,<0,1,2,3,4,6> ;process the "initial" words, using r14,r15 as temps + mov r14,[rdi+X_VARS+8*_rN_] ;get state word + mov r15,[rsi+ 8*_rN_] ;get msg word + xor rax,r14 ;update key schedule parity + mov [FP_+ksKey +8*_rN_],r14 ;save key schedule word on stack + mov [FP_+Wcopy +8*_rN_],r15 ;save local msg Wcopy + add r14,r15 ;do the initial key injection + mov [rsp+X_stk +8*_rN_],r14 ;save initial state var on stack + endm + ; now process the rest, using the "real" registers + ; (MUST do it in reverse order to inject tweaks r08/r09 first) + irp _rN_, +_rr_ = 0&_rN_&h + mov r1K_x&_rN_,[rdi+X_VARS+8*_rr_] ;get key schedule word from context + mov r1K_x4 ,[rsi+ 8*_rr_] ;get next input msg word + mov [rsp+ksKey +8*_rr_],r1K_x&_rN_ ;save key schedule on stack + xor rax , r1K_x&_rN_ ;accumulate key schedule parity + mov [FP_+Wcopy +8*_rr_],r1K_x4 ;save copy of msg word for feedforward + add r1K_x&_rN_, r1K_x4 ;do the initial key injection + if _rr_ eq 13 ;do the initial tweak injection + addReg r1K_x&_rN_,r08 ; (only in words 13/14) + elseif _rr_ eq 14 + addReg r1K_x&_rN_,r09 + endif + endm + mov [FP_+ksKey+8*WCNT],rax ;save key schedule parity +if _SKEIN_DEBUG + Skein_Debug_Block 1024 ;debug dump +endif + addReg rsi,8*WCNT ;bump the msg ptr + mov [FP_+blkPtr],rsi ;save bumped msg ptr + ; re-load words 0..4 [rbp,rsi,rdi,rax,rbx] from stack, enter the main loop + irp _rN_,<0,1,2,3,4> ;(no need to re-load x6) + mov r1K_x&_rN_,[rsp+X_stk+8*_rN_] ;re-load state and get ready to go! + endm +if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_KEY_INITIAL ;show state after initial key injection +endif + ; + ;;;;;;;;;;;;;;;;; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 1024 +_UNROLL_CNT = ROUNDS_1024/8 +else +_UNROLL_CNT = SKEIN_UNROLL_1024 + if ((ROUNDS_1024/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL1024" + endif +Skein1024_round_loop: +endif +; +_Rbase_ = 0 +rept _UNROLL_CNT*2 ;implement the rounds, 4 at a time + R1024_FourRounds %(4*_Rbase_+00) +_Rbase_ = _Rbase_+1 +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 1024) eq 0 + cmp qword ptr [rsp+tmpStk_1024],2*(ROUNDS_1024/8) ;see if we are done + jb Skein1024_round_loop +endif + ; end of rounds + ;;;;;;;;;;;;;;;;; + ; + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..15} + mov [rsp+X_stk+8*7],r1K_x7 ;we need a register. x6 already on stack + mov r1K_x7,[rsp+ctxPtr] + + irp _rN_,<0,1,2,3,4,5,8,9,A,B,C,D,E,F> ;do all but x6,x7 + xor r1K_x&_rN_,[rsp +Wcopy +8*(0&_rN_&h)] ;feedforward XOR + mov [r1K_x7+X_VARS+8*(0&_rN_&h)],r1K_x&_rN_ ;save result into context + if (0&_rN_&h eq 9) + mov r09,FIRST_MASK + endif + if (0&_rN_&h eq 0eh) + and r09,[r1K_x7+TWEAK+ 8] + endif + endm + ; + mov rax,[rsp+X_stk +8*6] ;now process x6,x7 + mov rbx,[rsp+X_stk +8*7] + xor rax,[rsp+Wcopy +8*6] + xor rbx,[rsp+Wcopy +8*7] + mov [r1K_x7+X_VARS+8*6],rax + dec qword ptr [rsp+blkCnt] ;set zero flag iff done + mov [r1K_x7+X_VARS+8*7],rbx + + Skein_Debug_Round 1024,SKEIN_RND_FEED_FWD,, + ; go back for more blocks, if needed + mov rdi,[rsp+ctxPtr] ;don't muck with the flags here! + lea rbp,[rsp+FRAME_OFFS] + jnz Skein1024_block_loop + mov [r1K_x7+TWEAK+ 8],r09 + Reset_Stack Skein1024_Process_Block + ret +; +if _SKEIN_DEBUG +; call here with r08 = "round number" +Skein_Debug_Round_1024: +_SP_OFFS_ = 8*2 ;stack "offset" here: r08, return addr + SP_ equ ;useful shorthand below +; + irp _wN_,<1,2,3,5,7,9,A,B,C,D,E,F> ;save rest of X[] state on stack so debug routines can access it + mov [SP_+X_stk+8*(0&_wN_&h)],r1K_x&_wN_ + endm + ;figure out what to do with x0. On rounds R where R==0 mod 4, it's already on the stack + cmp r08,SKEIN_RND_SPECIAL ;special rounds always save + jae save_x0 + test r08,3 + jz save_x0_not +save_x0: + mov [SP_+X_stk+8*0],r1K_x0 +save_x0_not: + ;figure out the x4/x6 swapping state and save the correct one! + cmp r08,SKEIN_RND_SPECIAL ;special rounds always do x4 + jae save_x4 + test r08,1 ;and even ones have r4 as well + jz save_x4 + mov [SP_+X_stk+8*6],r1K_x6 + jmp short debug_1024_go +save_x4: + mov [SP_+X_stk+8*4],r1K_x4 +debug_1024_go: + ;now all is saved in Xstk[] except for X8 + push rdx ;save two regs for BLK_BITS-specific parms + push rcx +_SP_OFFS_ = _SP_OFFS_ + 16 ;adjust stack offset accordingly + ; now stack offset is 32 to X_stk + mov rcx,[SP_ - 8] ;get back original r08 (pushed on stack in macro call) + mov [SP_+X_stk+8*8],rcx ;and save it in its rightful place in X_stk[8] + mov rdx,[SP_+ctxPtr] ;ctx_hdr_ptr + mov rcx, 1024 ;block size + jmp Skein_Debug_Round_Common +endif +; +Skein1024_Process_Block endp +; +ifdef SKEIN_CODE_SIZE + public Skein1024_Process_Block_CodeSize +Skein1024_Process_Block_CodeSize proc + mov rax,_ProcBytes_ + ret +Skein1024_Process_Block_CodeSize endp +; + public Skein1024_Unroll_Cnt +Skein1024_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_1024/8 + mov rax,_UNROLL_CNT + else + xor rax,rax + endif + ret +Skein1024_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 1024 +; +if _SKEIN_DEBUG +;---------------------------------------------------------------- +;local debug routine to set up for calls to: +; void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,int r,const u64b_t *X); +; +; here with r08 = round number +; rdx = ctx_hdr_ptr +; rcx = block size (256/512/1024) +; +Skein_Debug_Round_Common: +_SP_OFFS_ = 32 ;current stack "offset": r08, retAddr, rcx, rdx + irp _rr_, ;save the rest of the regs + push _rr_ +_SP_OFFS_ = _SP_OFFS_+8 + endm + if (_SP_OFFS_ and 0Fh) ; make sure stack is still 16-byte aligned here + .err "Debug_Round_Common: stack alignment" + endif + ; compute r09 = ptr to the X[] array on the stack + lea r09,[SP_+X_stk] ;adjust for reg pushes, return address + cmp r08,SKEIN_RND_FEED_FWD ;special handling for feedforward "round"? + jnz _got_r09a + lea r09,[rdx+X_VARS] +_got_r09a: + if _USE_ASM_ and 1024 + ; special handling for 1024-bit case + ; (for rounds right before with key injection: + ; use xDebug_1024[] instead of X_stk[]) + cmp r08,SKEIN_RND_SPECIAL + jae _got_r09b ;must be a normal round + or r08,r08 + jz _got_r09b ;just before key injection + test r08,3 + jne _got_r09b + cmp rcx,1024 ;only 1024-bit(s) for now + jne _got_r09b + lea r09,[SP_+xDebug_1024] +_got_r09b: + endif + sub rsp, 8*4 ;make room for parms on stack + call Skein_Show_Round ;call external debug handler + add rsp, 8*4 ;discard parm space on the stack + + irp _rr_, ;restore regs + pop _rr_ +_SP_OFFS_ = _SP_OFFS_-8 + endm + if _SP_OFFS_ - 32 + .err "Debug_Round_Common: push/pop misalignment!" + endif + pop rcx + pop rdx + ret +endif +;---------------------------------------------------------------- + end diff --git a/Additional_Implementations/skein_block_x64.s b/Additional_Implementations/skein_block_x64.s new file mode 100644 index 000000000000..b2d0a83acbe9 --- /dev/null +++ b/Additional_Implementations/skein_block_x64.s @@ -0,0 +1,1328 @@ +# +#---------------------------------------------------------------- +# 64-bit x86 assembler code (gnu as) for Skein block functions +# +# Author: Doug Whiting, Hifn/Exar +# +# This code is released to the public domain. +#---------------------------------------------------------------- +# + .text + .altmacro + .psize 0,128 #list file has no page boundaries +# +_MASK_ALL_ = (256+512+1024) #all three algorithm bits +_MAX_FRAME_ = 240 +# +################# +.ifndef SKEIN_USE_ASM +_USE_ASM_ = _MASK_ALL_ +.else +_USE_ASM_ = SKEIN_USE_ASM +.endif +################# +.ifndef SKEIN_LOOP #configure loop unrolling +_SKEIN_LOOP = 2 #default is fully unrolled for 256/512, twice for 1024 +.else +_SKEIN_LOOP = SKEIN_LOOP + .irp _NN_,%_SKEIN_LOOP #only display loop unrolling if default changed on command line +.print "+++ SKEIN_LOOP = \_NN_" + .endr +.endif +# the unroll counts (0 --> fully unrolled) +SKEIN_UNROLL_256 = (_SKEIN_LOOP / 100) % 10 +SKEIN_UNROLL_512 = (_SKEIN_LOOP / 10) % 10 +SKEIN_UNROLL_1024 = (_SKEIN_LOOP ) % 10 +# +SKEIN_ASM_UNROLL = 0 + .irp _NN_,256,512,1024 + .if (SKEIN_UNROLL_\_NN_) == 0 +SKEIN_ASM_UNROLL = SKEIN_ASM_UNROLL + \_NN_ + .endif + .endr +################# +# +.ifndef SKEIN_ROUNDS +ROUNDS_256 = 72 +ROUNDS_512 = 72 +ROUNDS_1024 = 80 +.else +ROUNDS_256 = 8*((((SKEIN_ROUNDS / 100) + 5) % 10) + 5) +ROUNDS_512 = 8*((((SKEIN_ROUNDS / 10) + 5) % 10) + 5) +ROUNDS_1024 = 8*((((SKEIN_ROUNDS ) + 5) % 10) + 5) +# only display rounds if default size is changed on command line +.irp _NN_,256,512,1024 + .if _USE_ASM_ && \_NN_ + .irp _RR_,%(ROUNDS_\_NN_) + .if _NN_ < 1024 +.print "+++ SKEIN_ROUNDS_\_NN_ = \_RR_" + .else +.print "+++ SKEIN_ROUNDS_\_NN_ = \_RR_" + .endif + .endr + .endif +.endr +.endif +################# +# +.ifdef SKEIN_CODE_SIZE +_SKEIN_CODE_SIZE = (1) +.else +.ifdef SKEIN_PERF #use code size if SKEIN_PERF is defined +_SKEIN_CODE_SIZE = (1) +.else +_SKEIN_CODE_SIZE = (0) +.endif +.endif +# +################# +# +.ifndef SKEIN_DEBUG +_SKEIN_DEBUG = 0 +.else +_SKEIN_DEBUG = 1 +.endif +################# +# +# define offsets of fields in hash context structure +# +HASH_BITS = 0 #bits of hash output +BCNT = 8 + HASH_BITS #number of bytes in BUFFER[] +TWEAK = 8 + BCNT #tweak values[0..1] +X_VARS = 16 + TWEAK #chaining vars +# +#(Note: buffer[] in context structure is NOT needed here :-) +# +KW_PARITY = 0x1BD11BDAA9FC1A22 #overall parity of key schedule words +FIRST_MASK = ~ (1 << 6) +FIRST_MASK64= ~ (1 << 62) +# +# rotation constants for Skein +# +RC_256_0_0 = 14 +RC_256_0_1 = 16 + +RC_256_1_0 = 52 +RC_256_1_1 = 57 + +RC_256_2_0 = 23 +RC_256_2_1 = 40 + +RC_256_3_0 = 5 +RC_256_3_1 = 37 + +RC_256_4_0 = 25 +RC_256_4_1 = 33 + +RC_256_5_0 = 46 +RC_256_5_1 = 12 + +RC_256_6_0 = 58 +RC_256_6_1 = 22 + +RC_256_7_0 = 32 +RC_256_7_1 = 32 + +RC_512_0_0 = 46 +RC_512_0_1 = 36 +RC_512_0_2 = 19 +RC_512_0_3 = 37 + +RC_512_1_0 = 33 +RC_512_1_1 = 27 +RC_512_1_2 = 14 +RC_512_1_3 = 42 + +RC_512_2_0 = 17 +RC_512_2_1 = 49 +RC_512_2_2 = 36 +RC_512_2_3 = 39 + +RC_512_3_0 = 44 +RC_512_3_1 = 9 +RC_512_3_2 = 54 +RC_512_3_3 = 56 + +RC_512_4_0 = 39 +RC_512_4_1 = 30 +RC_512_4_2 = 34 +RC_512_4_3 = 24 + +RC_512_5_0 = 13 +RC_512_5_1 = 50 +RC_512_5_2 = 10 +RC_512_5_3 = 17 + +RC_512_6_0 = 25 +RC_512_6_1 = 29 +RC_512_6_2 = 39 +RC_512_6_3 = 43 + +RC_512_7_0 = 8 +RC_512_7_1 = 35 +RC_512_7_2 = 56 +RC_512_7_3 = 22 + +RC_1024_0_0 = 24 +RC_1024_0_1 = 13 +RC_1024_0_2 = 8 +RC_1024_0_3 = 47 +RC_1024_0_4 = 8 +RC_1024_0_5 = 17 +RC_1024_0_6 = 22 +RC_1024_0_7 = 37 + +RC_1024_1_0 = 38 +RC_1024_1_1 = 19 +RC_1024_1_2 = 10 +RC_1024_1_3 = 55 +RC_1024_1_4 = 49 +RC_1024_1_5 = 18 +RC_1024_1_6 = 23 +RC_1024_1_7 = 52 + +RC_1024_2_0 = 33 +RC_1024_2_1 = 4 +RC_1024_2_2 = 51 +RC_1024_2_3 = 13 +RC_1024_2_4 = 34 +RC_1024_2_5 = 41 +RC_1024_2_6 = 59 +RC_1024_2_7 = 17 + +RC_1024_3_0 = 5 +RC_1024_3_1 = 20 +RC_1024_3_2 = 48 +RC_1024_3_3 = 41 +RC_1024_3_4 = 47 +RC_1024_3_5 = 28 +RC_1024_3_6 = 16 +RC_1024_3_7 = 25 + +RC_1024_4_0 = 41 +RC_1024_4_1 = 9 +RC_1024_4_2 = 37 +RC_1024_4_3 = 31 +RC_1024_4_4 = 12 +RC_1024_4_5 = 47 +RC_1024_4_6 = 44 +RC_1024_4_7 = 30 + +RC_1024_5_0 = 16 +RC_1024_5_1 = 34 +RC_1024_5_2 = 56 +RC_1024_5_3 = 51 +RC_1024_5_4 = 4 +RC_1024_5_5 = 53 +RC_1024_5_6 = 42 +RC_1024_5_7 = 41 + +RC_1024_6_0 = 31 +RC_1024_6_1 = 44 +RC_1024_6_2 = 47 +RC_1024_6_3 = 46 +RC_1024_6_4 = 19 +RC_1024_6_5 = 42 +RC_1024_6_6 = 44 +RC_1024_6_7 = 25 + +RC_1024_7_0 = 9 +RC_1024_7_1 = 48 +RC_1024_7_2 = 35 +RC_1024_7_3 = 52 +RC_1024_7_4 = 23 +RC_1024_7_5 = 31 +RC_1024_7_6 = 37 +RC_1024_7_7 = 20 +# +# Input: reg +# Output: <<< RC_BlkSize_roundNum_mixNum, BlkSize=256/512/1024 +# +.macro RotL64 reg,BLK_SIZE,ROUND_NUM,MIX_NUM +_RCNT_ = RC_\BLK_SIZE&_\ROUND_NUM&_\MIX_NUM + .if _RCNT_ #is there anything to do? + rolq $_RCNT_,%\reg + .endif +.endm +# +#---------------------------------------------------------------- +# +# MACROS: define local vars and configure stack +# +#---------------------------------------------------------------- +# declare allocated space on the stack +.macro StackVar localName,localSize +\localName = _STK_OFFS_ +_STK_OFFS_ = _STK_OFFS_+(\localSize) +.endm #StackVar +# +#---------------------------------------------------------------- +# +# MACRO: Configure stack frame, allocate local vars +# +.macro Setup_Stack BLK_BITS,KS_CNT,debugCnt + WCNT = (\BLK_BITS)/64 +# +_PushCnt_ = 0 #save nonvolatile regs on stack + .irp _reg_,rbp,rbx,r12,r13,r14,r15 + pushq %\_reg_ +_PushCnt_ = _PushCnt_ + 1 #track count to keep alignment + .endr +# +_STK_OFFS_ = 0 #starting offset from rsp + #---- local variables #<-- rsp + StackVar X_stk ,8*(WCNT) #local context vars + StackVar ksTwk ,8*3 #key schedule: tweak words + StackVar ksKey ,8*(WCNT)+8 #key schedule: key words + .if (SKEIN_ASM_UNROLL && (\BLK_BITS)) == 0 + StackVar ksRot ,16*(\KS_CNT) #leave space for "rotation" to happen + .endif + StackVar Wcopy ,8*(WCNT) #copy of input block + .if _SKEIN_DEBUG + .if \debugCnt + 0 #temp location for debug X[] info + StackVar xDebug_\BLK_BITS ,8*(\debugCnt) + .endif + .endif + .if ((8*_PushCnt_ + _STK_OFFS_) % 8) == 0 + StackVar align16,8 #keep 16-byte aligned (adjust for retAddr?) +tmpStk_\BLK_BITS = align16 #use this + .endif + #---- saved caller parameters (from regs rdi, rsi, rdx, rcx) + StackVar ctxPtr ,8 #context ptr + StackVar blkPtr ,8 #pointer to block data + StackVar blkCnt ,8 #number of full blocks to process + StackVar bitAdd ,8 #bit count to add to tweak +LOCAL_SIZE = _STK_OFFS_ #size of "local" vars + #---- + StackVar savRegs,8*_PushCnt_ #saved registers + StackVar retAddr,8 #return address + #---- caller's stack frame (aligned mod 16) +# +# set up the stack frame pointer (rbp) +# +FRAME_OFFS = ksTwk + 128 #allow short (negative) offset to ksTwk, kwKey + .if FRAME_OFFS > _STK_OFFS_ #keep rbp in the "locals" range +FRAME_OFFS = _STK_OFFS_ + .endif +F_O = -FRAME_OFFS +# + #put some useful defines in the .lst file (for grep) +__STK_LCL_SIZE_\BLK_BITS = LOCAL_SIZE +__STK_TOT_SIZE_\BLK_BITS = _STK_OFFS_ +__STK_FRM_OFFS_\BLK_BITS = FRAME_OFFS +# +# Notes on stack frame setup: +# * the most frequently used variable is X_stk[], based at [rsp+0] +# * the next most used is the key schedule arrays, ksKey and ksTwk +# so rbp is "centered" there, allowing short offsets to the key +# schedule even in 1024-bit Skein case +# * the Wcopy variables are infrequently accessed, but they have long +# offsets from both rsp and rbp only in the 1024-bit case. +# * all other local vars and calling parameters can be accessed +# with short offsets, except in the 1024-bit case +# + subq $LOCAL_SIZE,%rsp #make room for the locals + leaq FRAME_OFFS(%rsp),%rbp #maximize use of short offsets + movq %rdi, ctxPtr+F_O(%rbp) #save caller's parameters on the stack + movq %rsi, blkPtr+F_O(%rbp) + movq %rdx, blkCnt+F_O(%rbp) + movq %rcx, bitAdd+F_O(%rbp) +# +.endm #Setup_Stack +# +#---------------------------------------------------------------- +# +.macro Reset_Stack + addq $LOCAL_SIZE,%rsp #get rid of locals (wipe??) + .irp _reg_,r15,r14,r13,r12,rbx,rbp + popq %\_reg_ #restore caller's regs +_PushCnt_ = _PushCnt_ - 1 + .endr + .if _PushCnt_ + .error "Mismatched push/pops?" + .endif +.endm # Reset_Stack +# +#---------------------------------------------------------------- +# macros to help debug internals +# +.if _SKEIN_DEBUG + .extern Skein_Show_Block #calls to C routines + .extern Skein_Show_Round +# +SKEIN_RND_SPECIAL = 1000 +SKEIN_RND_KEY_INITIAL = SKEIN_RND_SPECIAL+0 +SKEIN_RND_KEY_INJECT = SKEIN_RND_SPECIAL+1 +SKEIN_RND_FEED_FWD = SKEIN_RND_SPECIAL+2 +# +.macro Skein_Debug_Block BLK_BITS +# +#void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X, +# const u08b_t *blkPtr, const u64b_t *wPtr, +# const u64b_t *ksPtr,const u64b_t *tsPtr) +# +_NN_ = 0 + .irp _reg_,rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11 + pushq %\_reg_ #save all volatile regs on tack before the call +_NN_ = _NN_ + 1 + .endr + # get and push call parameters + movq $\BLK_BITS ,%rdi #bits + movq ctxPtr+F_O(%rbp),%rsi #h (pointer) + leaq X_VARS (%rsi),%rdx #X (pointer) + movq blkPtr+F_O(%rbp),%rcx #blkPtr + leaq Wcopy +F_O(%rbp),%r8 #wPtr + leaq ksKey +F_O(%rbp),%r9 #key pointer + leaq ksTwk +F_O(%rbp),%rax #tweak pointer + pushq %rax # (pass on the stack) + call Skein_Show_Block #call external debug handler + addq $8*1,%rsp #discard parameters on stack + .if (_NN_ % 2 ) == 0 #check stack alignment + .error "Stack misalignment problem in Skein_Debug_Block_\_BLK_BITS" + .endif + .irp _reg_,r11,r10,r9,r8,rdi,rsi,rdx,rcx,rax + popq %\_reg_ #restore regs +_NN_ = _NN_ - 1 + .endr + .if _NN_ + .error "Push/pop mismatch problem in Skein_Debug_Block_\_BLK_BITS" + .endif +.endm # Skein_Debug_Block +# +# the macro to "call" to debug a round +# +.macro Skein_Debug_Round BLK_BITS,R,RDI_OFFS,afterOp + # call the appropriate (local) debug "function" + pushq %rdx #save rdx, so we can use it for round "number" + .if (SKEIN_ASM_UNROLL && \BLK_BITS) || (\R >= SKEIN_RND_SPECIAL) + movq $\R,%rdx + .else #compute round number using edi +_rOffs_ = \RDI_OFFS + 0 + .if \BLK_BITS == 1024 + movq rIdx_offs+8(%rsp),%rdx #get rIdx off the stack (adjust for pushq rdx above) + leaq 1+(((\R)-1) && 3)+_rOffs_(,%rdx,4),%rdx + .else + leaq 1+(((\R)-1) && 3)+_rOffs_(,%rdi,4),%rdx + .endif + .endif + call Skein_Debug_Round_\BLK_BITS + popq %rdx #restore origianl rdx value +# + afterOp +.endm # Skein_Debug_Round +.else #------- _SKEIN_DEBUG (dummy macros if debug not enabled) +.macro Skein_Debug_Block BLK_BITS +.endm +# +.macro Skein_Debug_Round BLK_BITS,R,RDI_OFFS,afterOp +.endm +# +.endif # _SKEIN_DEBUG +# +#---------------------------------------------------------------- +# +.macro addReg dstReg,srcReg_A,srcReg_B,useAddOp,immOffs + .if \immOffs + 0 + leaq \immOffs(%\srcReg_A\srcReg_B,%\dstReg),%\dstReg + .elseif ((\useAddOp + 0) == 0) + .ifndef ASM_NO_LEA #lea seems to be faster on Core 2 Duo CPUs! + leaq (%\srcReg_A\srcReg_B,%\dstReg),%\dstReg + .else + addq %\srcReg_A\srcReg_B,%\dstReg + .endif + .else + addq %\srcReg_A\srcReg_B,%\dstReg + .endif +.endm + +# keep Intel-style ordering here, to match addReg +.macro xorReg dstReg,srcReg_A,srcReg_B + xorq %\srcReg_A\srcReg_B,%\dstReg +.endm +# +#---------------------------------------------------------------- +# +.macro C_label lName + \lName: #use both "genders" to work across linkage conventions +_\lName: + .global \lName + .global _\lName +.endm +# +#=================================== Skein_256 ============================================= +# +.if _USE_ASM_ & 256 +# +# void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd)# +# +################# +# +# code +# +C_label Skein_256_Process_Block + Setup_Stack 256,((ROUNDS_256/8)+1) + movq TWEAK+8(%rdi),%r14 + jmp Skein_256_block_loop + .p2align 4 + # main hash loop for Skein_256 +Skein_256_block_loop: + # + # general register usage: + # RAX..RDX = X0..X3 + # R08..R12 = ks[0..4] + # R13..R15 = ts[0..2] + # RSP, RBP = stack/frame pointers + # RDI = round counter or context pointer + # RSI = temp + # + movq TWEAK+0(%rdi) ,%r13 + addq bitAdd+F_O(%rbp) ,%r13 #computed updated tweak value T0 + movq %r14 ,%r15 + xorq %r13 ,%r15 #now %r13.%r15 is set as the tweak + + movq $KW_PARITY ,%r12 + movq X_VARS+ 0(%rdi),%r8 + movq X_VARS+ 8(%rdi),%r9 + movq X_VARS+16(%rdi),%r10 + movq X_VARS+24(%rdi),%r11 + movq %r13,TWEAK+0(%rdi) #save updated tweak value ctx->h.T[0] + xorq %r8 ,%r12 #start accumulating overall parity + + movq blkPtr +F_O(%rbp) ,%rsi #esi --> input block + xorq %r9 ,%r12 + movq 0(%rsi) ,%rax #get X[0..3] + xorq %r10 ,%r12 + movq 8(%rsi) ,%rbx + xorq %r11 ,%r12 + movq 16(%rsi) ,%rcx + movq 24(%rsi) ,%rdx + + movq %rax,Wcopy+ 0+F_O(%rbp) #save copy of input block + movq %rbx,Wcopy+ 8+F_O(%rbp) + movq %rcx,Wcopy+16+F_O(%rbp) + movq %rdx,Wcopy+24+F_O(%rbp) + + addq %r8 ,%rax #initial key injection + addq %r9 ,%rbx + addq %r10,%rcx + addq %r11,%rdx + addq %r13,%rbx + addq %r14,%rcx + +.if _SKEIN_DEBUG + movq %r14,TWEAK+ 8(%rdi) #save updated tweak T[1] (start bit cleared?) + movq %r8 ,ksKey+ 0+F_O(%rbp) #save key schedule on stack for Skein_Debug_Block + movq %r9 ,ksKey+ 8+F_O(%rbp) + movq %r10,ksKey+16+F_O(%rbp) + movq %r11,ksKey+24+F_O(%rbp) + movq %r12,ksKey+32+F_O(%rbp) + + movq %r13,ksTwk+ 0+F_O(%rbp) + movq %r14,ksTwk+ 8+F_O(%rbp) + movq %r15,ksTwk+16+F_O(%rbp) + + movq %rax,X_stk + 0(%rsp) #save X[] on stack for Skein_Debug_Block + movq %rbx,X_stk + 8(%rsp) + movq %rcx,X_stk +16(%rsp) + movq %rdx,X_stk +24(%rsp) + + Skein_Debug_Block 256 #debug dump + Skein_Debug_Round 256,SKEIN_RND_KEY_INITIAL +.endif +# +.if ((SKEIN_ASM_UNROLL & 256) == 0) + movq %r8 ,ksKey+40+F_O(%rbp) #save key schedule on stack for looping code + movq %r9 ,ksKey+ 8+F_O(%rbp) + movq %r10,ksKey+16+F_O(%rbp) + movq %r11,ksKey+24+F_O(%rbp) + movq %r12,ksKey+32+F_O(%rbp) + + movq %r13,ksTwk+24+F_O(%rbp) + movq %r14,ksTwk+ 8+F_O(%rbp) + movq %r15,ksTwk+16+F_O(%rbp) +.endif + addq $WCNT*8,%rsi #skip the block + movq %rsi,blkPtr +F_O(%rbp) #update block pointer + # + # now the key schedule is computed. Start the rounds + # +.if SKEIN_ASM_UNROLL & 256 +_UNROLL_CNT = ROUNDS_256/8 +.else +_UNROLL_CNT = SKEIN_UNROLL_256 + .if ((ROUNDS_256/8) % _UNROLL_CNT) + .error "Invalid SKEIN_UNROLL_256" + .endif + xorq %rdi,%rdi #rdi = iteration count +Skein_256_round_loop: +.endif +_Rbase_ = 0 +.rept _UNROLL_CNT*2 + # all X and ks vars in regs # (ops to "rotate" ks vars, via mem, if not unrolled) + # round 4*_RBase_ + 0 + addReg rax, rbx + RotL64 rbx, 256,%((4*_Rbase_+0) % 8),0 + addReg rcx, rdx + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq ksKey+8*1+F_O(%rbp,%rdi,8),%r8 + .endif + xorReg rbx, rax + RotL64 rdx, 256,%((4*_Rbase_+0) % 8),1 + xorReg rdx, rcx + .if SKEIN_ASM_UNROLL & 256 + .irp _r0_,%( 8+(_Rbase_+3) % 5) + .irp _r1_,%(13+(_Rbase_+2) % 3) + leaq (%r\_r0_,%r\_r1_),%rdi #precompute key injection value for %rcx + .endr + .endr + .endif + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq ksTwk+8*1+F_O(%rbp,%rdi,8),%r13 + .endif + Skein_Debug_Round 256,%(4*_Rbase_+1) + + # round 4*_Rbase_ + 1 + addReg rax, rdx + RotL64 rdx, 256,%((4*_Rbase_+1) % 8),0 + xorReg rdx, rax + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq ksKey+8*2+F_O(%rbp,%rdi,8),%r9 + .endif + addReg rcx, rbx + RotL64 rbx, 256,%((4*_Rbase_+1) % 8),1 + xorReg rbx, rcx + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq ksKey+8*4+F_O(%rbp,%rdi,8),%r11 + .endif + Skein_Debug_Round 256,%(4*_Rbase_+2) + .if SKEIN_ASM_UNROLL & 256 + .irp _r0_,%( 8+(_Rbase_+2) % 5) + .irp _r1_,%(13+(_Rbase_+1) % 3) + leaq (%r\_r0_,%r\_r1_),%rsi #precompute key injection value for %rbx + .endr + .endr + .endif + # round 4*_Rbase_ + 2 + addReg rax, rbx + RotL64 rbx, 256,%((4*_Rbase_+2) % 8),0 + addReg rcx, rdx + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq ksKey+8*3+F_O(%rbp,%rdi,8),%r10 + .endif + xorReg rbx, rax + RotL64 rdx, 256,%((4*_Rbase_+2) % 8),1 + xorReg rdx, rcx + .if (SKEIN_ASM_UNROLL & 256) == 0 + movq %r8,ksKey+8*6+F_O(%rbp,%rdi,8) #"rotate" the key + leaq 1(%r11,%rdi),%r11 #precompute key + tweak + .endif + Skein_Debug_Round 256,%(4*_Rbase_+3) + # round 4*_Rbase_ + 3 + addReg rax, rdx + RotL64 rdx, 256,%((4*_Rbase_+3) % 8),0 + addReg rcx, rbx + .if (SKEIN_ASM_UNROLL & 256) == 0 + addq ksTwk+8*2+F_O(%rbp,%rdi,8),%r10 #precompute key + tweak + movq %r13,ksTwk+8*4+F_O(%rbp,%rdi,8) #"rotate" the tweak + .endif + xorReg rdx, rax + RotL64 rbx, 256,%((4*_Rbase_+3) % 8),1 + xorReg rbx, rcx + Skein_Debug_Round 256,%(4*_Rbase_+4) + .if (SKEIN_ASM_UNROLL & 256) == 0 + addReg r9 ,r13 #precompute key+tweak + .endif + #inject key schedule words +_Rbase_ = _Rbase_+1 + .if SKEIN_ASM_UNROLL & 256 + addReg rax,r,%(8+((_Rbase_+0) % 5)) + addReg rbx,rsi + addReg rcx,rdi + addReg rdx,r,%(8+((_Rbase_+3) % 5)),,_Rbase_ + .else + incq %rdi + addReg rax,r8 + addReg rcx,r10 + addReg rbx,r9 + addReg rdx,r11 + .endif + Skein_Debug_Round 256,SKEIN_RND_KEY_INJECT +.endr #rept _UNROLL_CNT +# +.if (SKEIN_ASM_UNROLL & 256) == 0 + cmpq $2*(ROUNDS_256/8),%rdi + jb Skein_256_round_loop +.endif # (SKEIN_ASM_UNROLL & 256) == 0 + movq ctxPtr +F_O(%rbp),%rdi #restore rdi --> context + + #---------------------------- + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..3} + movq $FIRST_MASK64 ,%r14 + xorq Wcopy + 0+F_O (%rbp),%rax + xorq Wcopy + 8+F_O (%rbp),%rbx + xorq Wcopy +16+F_O (%rbp),%rcx + xorq Wcopy +24+F_O (%rbp),%rdx + andq TWEAK + 8 (%rdi),%r14 + movq %rax,X_VARS+ 0(%rdi) #store final result + movq %rbx,X_VARS+ 8(%rdi) + movq %rcx,X_VARS+16(%rdi) + movq %rdx,X_VARS+24(%rdi) + + Skein_Debug_Round 256,SKEIN_RND_FEED_FWD + + # go back for more blocks, if needed + decq blkCnt+F_O(%rbp) + jnz Skein_256_block_loop + movq %r14,TWEAK + 8(%rdi) + Reset_Stack + ret +Skein_256_Process_Block_End: + + .if _SKEIN_DEBUG +Skein_Debug_Round_256: #here with rdx == round "number" from macro + pushq %rsi #save two regs for BLK_BITS-specific parms + pushq %rdi + movq 24(%rsp),%rdi #get back original rdx (pushed on stack in macro call) to rdi + movq %rax,X_stk+ 0+F_O(%rbp) #save X[] state on stack so debug routines can access it + movq %rbx,X_stk+ 8+F_O(%rbp) #(use FP_ since rsp has changed!) + movq %rcx,X_stk+16+F_O(%rbp) + movq %rdi,X_stk+24+F_O(%rbp) + + movq ctxPtr+F_O(%rbp),%rsi #ctx_hdr_ptr + movq $256,%rdi #now are set for the call + jmp Skein_Debug_Round_Common + .endif +# +.if _SKEIN_CODE_SIZE +C_label Skein_256_Process_Block_CodeSize + movq $(Skein_256_Process_Block_End-Skein_256_Process_Block),%rax + ret +# +C_label Skein_256_Unroll_Cnt + .if _UNROLL_CNT <> ROUNDS_256/8 + movq $_UNROLL_CNT,%rax + .else + xorq %rax,%rax + .endif + ret +.endif +# +.endif #_USE_ASM_ & 256 +# +#=================================== Skein_512 ============================================= +# +.if _USE_ASM_ & 512 +# +# void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd) +# +# X[i] == %r[8+i] #register assignments for X[] values during rounds (i=0..7) +# +################# +# MACRO: one round for 512-bit blocks +# +.macro R_512_OneRound rn0,rn1,rn2,rn3,rn4,rn5,rn6,rn7,_Rn_,op1,op2,op3,op4 +# + addReg r\rn0, r\rn1 + RotL64 r\rn1, 512,%((_Rn_) % 8),0 + xorReg r\rn1, r\rn0 + op1 + addReg r\rn2, r\rn3 + RotL64 r\rn3, 512,%((_Rn_) % 8),1 + xorReg r\rn3, r\rn2 + op2 + addReg r\rn4, r\rn5 + RotL64 r\rn5, 512,%((_Rn_) % 8),2 + xorReg r\rn5, r\rn4 + op3 + addReg r\rn6, r\rn7 + RotL64 r\rn7, 512,%((_Rn_) % 8),3 + xorReg r\rn7, r\rn6 + op4 + Skein_Debug_Round 512,%(_Rn_+1),-4 +# +.endm #R_512_OneRound +# +################# +# MACRO: eight rounds for 512-bit blocks +# +.macro R_512_FourRounds _RR_ #RR = base round number (0 % 8) + .if (SKEIN_ASM_UNROLL && 512) + # here for fully unrolled case. + _II_ = ((_RR_)/4) + 1 #key injection counter + R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + # inject the key schedule + addq ksKey+8*(((_II_)+0)%9)+F_O(%rbp),%r8 + addReg r11, rax + addq ksKey+8*(((_II_)+1)%9)+F_O(%rbp),%r9 + addReg r12, rbx + addq ksKey+8*(((_II_)+2)%9)+F_O(%rbp),%r10 + addReg r13, rcx + addReg r14, rdx + addReg r15, rsi,,,(_II_) + .else + # here for looping case #"rotate" key/tweak schedule (move up on stack) + incq %rdi #bump key injection counter + R_512_OneRound 8, 9,10,11,12,13,14,15,%((_RR_)+0),,, + R_512_OneRound 10, 9,12,15,14,13, 8,11,%((_RR_)+1),,, + R_512_OneRound 12, 9,14,11, 8,13,10,15,%((_RR_)+2),,, + R_512_OneRound 14, 9, 8,15,10,13,12,11,%((_RR_)+3),, + # inject the key schedule + addq ksKey+8*0+F_O(%rbp,%rdi,8),%r8 + addReg r11, rax + addReg r12, rbx + addq ksKey+8*1+F_O(%rbp,%rdi,8),%r9 + addReg r13, rcx + addReg r14, rdx + addq ksKey+8*2+F_O(%rbp,%rdi,8),%r10 + addReg r15, rsi + addReg r15, rdi #inject the round number + .endif + + #show the result of the key injection + Skein_Debug_Round 512,SKEIN_RND_KEY_INJECT +.endm #R_512_EightRounds +# +################# +# instantiated code +# +C_label Skein_512_Process_Block + Setup_Stack 512,ROUNDS_512/8 + movq TWEAK+ 8(%rdi),%rbx + jmp Skein_512_block_loop + .p2align 4 + # main hash loop for Skein_512 +Skein_512_block_loop: + # general register usage: + # RAX..RDX = temps for key schedule pre-loads + # R8 ..R15 = X0..X7 + # RSP, RBP = stack/frame pointers + # RDI = round counter or context pointer + # RSI = temp + # + movq TWEAK + 0(%rdi),%rax + addq bitAdd+F_O(%rbp),%rax #computed updated tweak value T0 + movq %rbx,%rcx + xorq %rax,%rcx #%rax/%rbx/%rcx = tweak schedule + movq %rax,TWEAK+ 0 (%rdi) #save updated tweak value ctx->h.T[0] + movq %rax,ksTwk+ 0+F_O(%rbp) + movq $KW_PARITY,%rdx + movq blkPtr +F_O(%rbp),%rsi #%rsi --> input block + movq %rbx,ksTwk+ 8+F_O(%rbp) + movq %rcx,ksTwk+16+F_O(%rbp) + .irp _Rn_,8,9,10,11,12,13,14,15 + movq X_VARS+8*(_Rn_-8)(%rdi),%r\_Rn_ + xorq %r\_Rn_,%rdx #compute overall parity + movq %r\_Rn_,ksKey+8*(_Rn_-8)+F_O(%rbp) + .endr #load state into %r8 ..%r15, compute parity + movq %rdx,ksKey+8*(8)+F_O(%rbp)#save key schedule parity + + addReg r13,rax #precompute key injection for tweak + addReg r14, rbx +.if _SKEIN_DEBUG + movq %rbx,TWEAK+ 8(%rdi) #save updated tweak value ctx->h.T[1] for Skein_Debug_Block below +.endif + movq 0(%rsi),%rax #load input block + movq 8(%rsi),%rbx + movq 16(%rsi),%rcx + movq 24(%rsi),%rdx + addReg r8 , rax #do initial key injection + addReg r9 , rbx + movq %rax,Wcopy+ 0+F_O(%rbp) #keep local copy for feedforward + movq %rbx,Wcopy+ 8+F_O(%rbp) + addReg r10, rcx + addReg r11, rdx + movq %rcx,Wcopy+16+F_O(%rbp) + movq %rdx,Wcopy+24+F_O(%rbp) + + movq 32(%rsi),%rax + movq 40(%rsi),%rbx + movq 48(%rsi),%rcx + movq 56(%rsi),%rdx + addReg r12, rax + addReg r13, rbx + addReg r14, rcx + addReg r15, rdx + movq %rax,Wcopy+32+F_O(%rbp) + movq %rbx,Wcopy+40+F_O(%rbp) + movq %rcx,Wcopy+48+F_O(%rbp) + movq %rdx,Wcopy+56+F_O(%rbp) + +.if _SKEIN_DEBUG + .irp _Rn_,8,9,10,11,12,13,14,15 #save values on stack for debug output + movq %r\_Rn_,X_stk+8*(_Rn_-8)(%rsp) + .endr + + Skein_Debug_Block 512 #debug dump + Skein_Debug_Round 512,SKEIN_RND_KEY_INITIAL +.endif + addq $8*WCNT,%rsi #skip the block + movq %rsi,blkPtr+F_O(%rbp) #update block pointer + # + ################# + # now the key schedule is computed. Start the rounds + # +.if SKEIN_ASM_UNROLL & 512 +_UNROLL_CNT = ROUNDS_512/8 +.else +_UNROLL_CNT = SKEIN_UNROLL_512 + .if ((ROUNDS_512/8) % _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_512" + .endif + xorq %rdi,%rdi #rdi = round counter +Skein_512_round_loop: +.endif +# +_Rbase_ = 0 +.rept _UNROLL_CNT*2 + R_512_FourRounds %(4*_Rbase_+00) +_Rbase_ = _Rbase_+1 +.endr #rept _UNROLL_CNT +# +.if (SKEIN_ASM_UNROLL & 512) == 0 + cmpq $2*(ROUNDS_512/8),%rdi + jb Skein_512_round_loop + movq ctxPtr +F_O(%rbp),%rdi #restore rdi --> context +.endif + # end of rounds + ################# + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} + .irp _Rn_,8,9,10,11,12,13,14,15 + .if (_Rn_ == 8) + movq $FIRST_MASK64,%rbx + .endif + xorq Wcopy+8*(_Rn_-8)+F_O(%rbp),%r\_Rn_ #feedforward XOR + movq %r\_Rn_,X_VARS+8*(_Rn_-8)(%rdi) #and store result + .if (_Rn_ == 14) + andq TWEAK+ 8(%rdi),%rbx + .endif + .endr + Skein_Debug_Round 512,SKEIN_RND_FEED_FWD + + # go back for more blocks, if needed + decq blkCnt+F_O(%rbp) + jnz Skein_512_block_loop + movq %rbx,TWEAK + 8(%rdi) + + Reset_Stack + ret +Skein_512_Process_Block_End: +# + .if _SKEIN_DEBUG +# call here with rdx = "round number" +Skein_Debug_Round_512: + pushq %rsi #save two regs for BLK_BITS-specific parms + pushq %rdi + .irp _Rn_,8,9,10,11,12,13,14,15 #save X[] state on stack so debug routines can access it + movq %r\_Rn_,X_stk+8*(_Rn_-8)+F_O(%rbp) + .endr + movq ctxPtr+F_O(%rbp),%rsi #ctx_hdr_ptr + movq $512,%rdi #now are set for the call + jmp Skein_Debug_Round_Common + .endif +# +.if _SKEIN_CODE_SIZE +C_label Skein_512_Process_Block_CodeSize + movq $(Skein_512_Process_Block_End-Skein_512_Process_Block),%rax + ret +# +C_label Skein_512_Unroll_Cnt + .if _UNROLL_CNT <> (ROUNDS_512/8) + movq $_UNROLL_CNT,%rax + .else + xorq %rax,%rax + .endif + ret +.endif +# +.endif # _USE_ASM_ & 512 +# +#=================================== Skein1024 ============================================= +.if _USE_ASM_ & 1024 +# +# void Skein1024_Process_Block(Skein_1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd)# +# +################# +# use details of permutation to make register assignments +# +o1K_rdi = 0 #offsets in X[] associated with each register +o1K_rsi = 1 +o1K_rbp = 2 +o1K_rax = 3 +o1K_rcx = 4 #rcx is "shared" with X6, since X4/X6 alternate +o1K_rbx = 5 +o1K_rdx = 7 +o1K_r8 = 8 +o1K_r9 = 9 +o1K_r10 = 10 +o1K_r11 = 11 +o1K_r12 = 12 +o1K_r13 = 13 +o1K_r14 = 14 +o1K_r15 = 15 +# +rIdx_offs = tmpStk_1024 +# +.macro r1024_Mix w0,w1,reg0,reg1,_RN0_,_Rn1_,op1 + addReg \reg0 , \reg1 #perform the MIX + RotL64 \reg1 , 1024,%((_RN0_) % 8),_Rn1_ + xorReg \reg1 , \reg0 +.if ((_RN0_) && 3) == 3 #time to do key injection? + .if _SKEIN_DEBUG + movq %\reg0 , xDebug_1024+8*w0(%rsp) #save intermediate values for Debug_Round + movq %\reg1 , xDebug_1024+8*w1(%rsp) # (before inline key injection) + .endif +_II_ = ((_RN0_)/4)+1 #injection count + .if SKEIN_ASM_UNROLL && 1024 #here to do fully unrolled key injection + addq ksKey+ 8*((_II_+w0) % 17)(%rsp),%\reg0 + addq ksKey+ 8*((_II_+w1) % 17)(%rsp),%\reg1 + .if w1 == 13 #tweak injection + addq ksTwk+ 8*((_II_+ 0) % 3)(%rsp),%\reg1 + .elseif w0 == 14 + addq ksTwk+ 8*((_II_+ 1) % 3)(%rsp),%\reg0 + .elseif w1 == 15 + addq $_II_, %\reg1 #(injection counter) + .endif + .else #here to do looping key injection + .if (w0 == 0) + movq %rdi, X_stk+8*w0(%rsp) #if so, store N0 so we can use reg as index + movq rIdx_offs(%rsp),%rdi #get the injection counter index into rdi + .else + addq ksKey+8+8*w0(%rsp,%rdi,8),%\reg0 #even key injection + .endif + .if w1 == 13 #tweak injection + addq ksTwk+8+8* 0(%rsp,%rdi,8),%\reg1 + .elseif w0 == 14 + addq ksTwk+8+8* 1(%rsp,%rdi,8),%\reg0 + .elseif w1 == 15 + addReg \reg1,rdi,,,1 #(injection counter) + .endif + addq ksKey+8+8*w1(%rsp,%rdi,8),%\reg1 #odd key injection + .endif +.endif + # insert the op provided, .if any + op1 +.endm +################# +# MACRO: four rounds for 1024-bit blocks +# +.macro r1024_FourRounds _RR_ #RR = base round number (0 mod 4) + # should be here with X4 set properly, X6 stored on stack +_Rn_ = (_RR_) + 0 + r1024_Mix 0, 1,rdi,rsi,_Rn_,0 + r1024_Mix 2, 3,rbp,rax,_Rn_,1 + r1024_Mix 4, 5,rcx,rbx,_Rn_,2, #save X4 on stack (x4/x6 alternate) + r1024_Mix 8, 9,r8 ,r9 ,_Rn_,4, #load X6 from stack + r1024_Mix 10,11,r10,r11,_Rn_,5 + r1024_Mix 12,13,r12,r13,_Rn_,6 + r1024_Mix 6, 7,rcx,rdx,_Rn_,3 + r1024_Mix 14,15,r14,r15,_Rn_,7 + .if _SKEIN_DEBUG + Skein_Debug_Round 1024,%(_Rn_+1) + .endif +_Rn_ = (_RR_) + 1 + r1024_Mix 0, 9,rdi,r9 ,_Rn_,0 + r1024_Mix 2,13,rbp,r13,_Rn_,1 + r1024_Mix 6,11,rcx,r11,_Rn_,2, #save X6 on stack (x4/x6 alternate) + r1024_Mix 10, 7,r10,rdx,_Rn_,4, #load X4 from stack + r1024_Mix 12, 3,r12,rax,_Rn_,5 + r1024_Mix 14, 5,r14,rbx,_Rn_,6 + r1024_Mix 4,15,rcx,r15,_Rn_,3 + r1024_Mix 8, 1,r8 ,rsi,_Rn_,7 + .if _SKEIN_DEBUG + Skein_Debug_Round 1024,%(_Rn_+1) + .endif +_Rn_ = (_RR_) + 2 + r1024_Mix 0, 7,rdi,rdx,_Rn_,0 + r1024_Mix 2, 5,rbp,rbx,_Rn_,1 + r1024_Mix 4, 3,rcx,rax,_Rn_,2, #save X4 on stack (x4/x6 alternate) + r1024_Mix 12,15,r12,r15,_Rn_,4, #load X6 from stack + r1024_Mix 14,13,r14,r13,_Rn_,5 + r1024_Mix 8,11,r8 ,r11,_Rn_,6 + r1024_Mix 6, 1,rcx,rsi,_Rn_,3 + r1024_Mix 10, 9,r10,r9 ,_Rn_,7 + .if _SKEIN_DEBUG + Skein_Debug_Round 1024,%(_Rn_+1) + .endif +_Rn_ = (_RR_) + 3 + r1024_Mix 0,15,rdi,r15,_Rn_,0 + r1024_Mix 2,11,rbp,r11,_Rn_,1 + r1024_Mix 6,13,rcx,r13,_Rn_,2, #save X6 on stack (x4/x6 alternate) + r1024_Mix 14, 1,r14,rsi,_Rn_,4, #load X4 from stack + r1024_Mix 8, 5,r8 ,rbx,_Rn_,5 + r1024_Mix 10, 3,r10,rax,_Rn_,6 + r1024_Mix 4, 9,rcx,r9 ,_Rn_,3 + r1024_Mix 12, 7,r12,rdx,_Rn_,7 + .if _SKEIN_DEBUG + Skein_Debug_Round 1024,%(_Rn_+1) + .endif + + .if (SKEIN_ASM_UNROLL && 1024) == 0 #here with rdi == rIdx, X0 on stack + #"rotate" the key schedule on the stack +i8 = o1K_r8 +i0 = o1K_rdi + movq %r8 , X_stk+8*i8(%rsp) #free up a register (save it on the stack) + movq ksKey+8* 0(%rsp,%rdi,8),%r8 #get key word + movq %r8 , ksKey+8*17(%rsp,%rdi,8) #rotate key (must do key first or tweak clobbers it!) + movq ksTwk+8* 0(%rsp,%rdi,8),%r8 #get tweak word + movq %r8 , ksTwk+8* 3(%rsp,%rdi,8) #rotate tweak (onto the stack) + movq X_stk+8*i8(%rsp) ,%r8 #get the reg back + incq %rdi #bump the index + movq %rdi, rIdx_offs (%rsp) #save rdi again + movq ksKey+8*i0(%rsp,%rdi,8),%rdi #get the key schedule word for X0 back + addq X_stk+8*i0(%rsp) ,%rdi #perform the X0 key injection + .endif + #show the result of the key injection + Skein_Debug_Round 1024,SKEIN_RND_KEY_INJECT +.endm #r1024_FourRounds +# +################ +# code +# +C_label Skein1024_Process_Block +# + Setup_Stack 1024,ROUNDS_1024/8,WCNT + movq TWEAK+ 8(%rdi),%r9 + jmp Skein1024_block_loop + # main hash loop for Skein1024 + .p2align 4 +Skein1024_block_loop: + # general register usage: + # RSP = stack pointer + # RAX..RDX,RSI,RDI = X1, X3..X7 (state words) + # R8 ..R15 = X8..X15 (state words) + # RBP = temp (used for X0 and X2) + # + .if (SKEIN_ASM_UNROLL & 1024) == 0 + xorq %rax,%rax #init loop index on the stack + movq %rax,rIdx_offs(%rsp) + .endif + movq TWEAK+ 0(%rdi),%r8 + addq bitAdd+ F_O(%rbp),%r8 #computed updated tweak value T0 + movq %r9 ,%r10 + xorq %r8 ,%r10 #%rax/%rbx/%rcx = tweak schedule + movq %r8 ,TWEAK+ 0(%rdi) #save updated tweak value ctx->h.T[0] + movq %r8 ,ksTwk+ 0+F_O(%rbp) + movq %r9 ,ksTwk+ 8+F_O(%rbp) #keep values in %r8 ,%r9 for initial tweak injection below + movq %r10,ksTwk+16+F_O(%rbp) + .if _SKEIN_DEBUG + movq %r9 ,TWEAK+ 8(%rdi) #save updated tweak value ctx->h.T[1] for Skein_Debug_Block + .endif + movq blkPtr +F_O(%rbp),%rsi # rsi --> input block + movq $KW_PARITY ,%rax #overall key schedule parity + + # the logic here assumes the set {rdi,rsi,rbp,rax} = X[0,1,2,3] + .irp _rN_,0,1,2,3,4,6 #process the "initial" words, using r14/r15 as temps + movq X_VARS+8*_rN_(%rdi),%r14 #get state word + movq 8*_rN_(%rsi),%r15 #get msg word + xorq %r14,%rax #update key schedule overall parity + movq %r14,ksKey +8*_rN_+F_O(%rbp) #save key schedule word on stack + movq %r15,Wcopy +8*_rN_+F_O(%rbp) #save local msg Wcopy + addq %r15,%r14 #do the initial key injection + movq %r14,X_stk +8*_rN_ (%rsp) #save initial state var on stack + .endr + # now process the rest, using the "real" registers + # (MUST do it in reverse order to inject tweaks r8/r9 first) + .irp _rr_,r15,r14,r13,r12,r11,r10,r9,r8,rdx,rbx +_oo_ = o1K_\_rr_ #offset assocated with the register + movq X_VARS+8*_oo_(%rdi),%\_rr_ #get key schedule word from context + movq 8*_oo_(%rsi),%rcx #get next input msg word + movq %\_rr_, ksKey +8*_oo_(%rsp) #save key schedule on stack + xorq %\_rr_, %rax #accumulate key schedule parity + movq %rcx,Wcopy+8*_oo_+F_O(%rbp) #save copy of msg word for feedforward + addq %rcx,%\_rr_ #do the initial key injection + .if _oo_ == 13 #do the initial tweak injection + addReg _rr_,r8 # (only in words 13/14) + .elseif _oo_ == 14 + addReg _rr_,r9 + .endif + .endr + movq %rax,ksKey+8*WCNT+F_O(%rbp) #save key schedule parity +.if _SKEIN_DEBUG + Skein_Debug_Block 1024 #initial debug dump +.endif + addq $8*WCNT,%rsi #bump the msg ptr + movq %rsi,blkPtr+F_O(%rbp) #save bumped msg ptr + # re-load words 0..4 from stack, enter the main loop + .irp _rr_,rdi,rsi,rbp,rax,rcx #(no need to re-load x6, already on stack) + movq X_stk+8*o1K_\_rr_(%rsp),%\_rr_ #re-load state and get ready to go! + .endr +.if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_KEY_INITIAL #show state after initial key injection +.endif + # + ################# + # now the key schedule is computed. Start the rounds + # +.if SKEIN_ASM_UNROLL & 1024 +_UNROLL_CNT = ROUNDS_1024/8 +.else +_UNROLL_CNT = SKEIN_UNROLL_1024 + .if ((ROUNDS_1024/8) % _UNROLL_CNT) + .error "Invalid SKEIN_UNROLL_1024" + .endif +Skein1024_round_loop: +.endif +# +_Rbase_ = 0 +.rept _UNROLL_CNT*2 #implement the rounds, 4 at a time + r1024_FourRounds %(4*_Rbase_+00) +_Rbase_ = _Rbase_+1 +.endr #rept _UNROLL_CNT +# +.if (SKEIN_ASM_UNROLL & 1024) == 0 + cmpq $2*(ROUNDS_1024/8),tmpStk_1024(%rsp) #see .if we are done + jb Skein1024_round_loop +.endif + # end of rounds + ################# + # + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..15} + movq %rdx,X_stk+8*o1K_rdx(%rsp) #we need a register. x6 already on stack + movq ctxPtr(%rsp),%rdx + + .irp _rr_,rdi,rsi,rbp,rax,rcx,rbx,r8,r9,r10,r11,r12,r13,r14,r15 #do all but x6,x7 +_oo_ = o1K_\_rr_ + xorq Wcopy +8*_oo_(%rsp),%\_rr_ #feedforward XOR + movq %\_rr_,X_VARS+8*_oo_(%rdx) #save result into context + .if (_oo_ == 9) + movq $FIRST_MASK64 ,%r9 + .endif + .if (_oo_ == 14) + andq TWEAK+ 8(%rdx),%r9 + .endif + .endr + # + movq X_stk +8*6(%rsp),%rax #now process x6,x7 (skipped in .irp above) + movq X_stk +8*7(%rsp),%rbx + xorq Wcopy +8*6(%rsp),%rax + xorq Wcopy +8*7(%rsp),%rbx + movq %rax,X_VARS+8*6(%rdx) + decq blkCnt(%rsp) #set zero flag iff done + movq %rbx,X_VARS+8*7(%rdx) + + Skein_Debug_Round 1024,SKEIN_RND_FEED_FWD,, + # go back for more blocks, if needed + movq ctxPtr(%rsp),%rdi #don't muck with the flags here! + lea FRAME_OFFS(%rsp),%rbp + jnz Skein1024_block_loop + movq %r9 ,TWEAK+ 8(%rdx) + Reset_Stack + ret +# +Skein1024_Process_Block_End: +# +.if _SKEIN_DEBUG +Skein_Debug_Round_1024: + # call here with rdx = "round number", +_SP_OFFS_ = 8*2 #stack "offset" here: rdx, return addr + # + #save rest of X[] state on stack so debug routines can access it + .irp _rr_,rsi,rbp,rax,rbx,r8,r9,r10,r11,r12,r13,r14,r15 + movq %\_rr_,X_stk+8*o1K_\_rr_+_SP_OFFS_(%rsp) + .endr + # Figure out what to do with x0 (rdi). When rdx == 0 mod 4, it's already on stack + cmpq $SKEIN_RND_SPECIAL,%rdx #special rounds always save + jae save_x0 + testq $3,%rdx #otherwise only if rdx != 0 mod 4 + jz save_x0_not +save_x0: + movq %rdi,X_stk+8*o1K_rdi+_SP_OFFS_(%rsp) +save_x0_not: + #figure out the x4/x6 swapping state and save the correct one! + cmpq $SKEIN_RND_SPECIAL,%rdx #special rounds always do x4 + jae save_x4 + testq $1,%rdx #and even ones have r4 as well + jz save_x4 + movq %rcx,X_stk+8*6+_SP_OFFS_(%rsp) + jmp debug_1024_go +save_x4: + movq %rcx,X_stk+8*4+_SP_OFFS_(%rsp) +debug_1024_go: + #now all is saved in Xstk[] except for rdx + push %rsi #save two regs for BLK_BITS-specific parms + push %rdi +_SP_OFFS_ = _SP_OFFS_ + 16 #adjust stack offset accordingly (now 32) + + movq _SP_OFFS_-8(%rsp),%rsi #get back original %rdx (pushed on stack in macro call) + movq %rsi,X_stk+8*o1K_rdx+_SP_OFFS_(%rsp) #and save it in its rightful place in X_stk[] + + movq ctxPtr+_SP_OFFS_(%rsp),%rsi #rsi = ctx_hdr_ptr + movq $1024,%rdi #rdi = block size + jmp Skein_Debug_Round_Common +.endif +# +.if _SKEIN_CODE_SIZE +C_label Skein1024_Process_Block_CodeSize + movq $(Skein1024_Process_Block_End-Skein1024_Process_Block),%rax + ret +# +C_label Skein1024_Unroll_Cnt + .if _UNROLL_CNT <> (ROUNDS_1024/8) + movq $_UNROLL_CNT,%rax + .else + xorq %rax,%rax + .endif + ret +.endif +# +.endif # _USE_ASM_ and 1024 +# +.if _SKEIN_DEBUG +#---------------------------------------------------------------- +#local debug routine to set up for calls to: +# void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,int r,const u64b_t *X) +# [ rdi rsi rdx rcx] +# +# here with %rdx = round number +# %rsi = ctx_hdr_ptr +# %rdi = block size (256/512/1024) +# on stack: saved rdi, saved rsi, retAddr, saved rdx +# +Skein_Debug_Round_Common: +_SP_OFFS_ = 32 #account for four words on stack already + .irp _rr_,rax,rbx,rcx,rbp,r8,r9,r10,r11,r12,r13,r14,r15 #save the rest of the regs + pushq %\_rr_ +_SP_OFFS_ = _SP_OFFS_+8 + .endr + .if (_SP_OFFS_ % 16) # make sure stack is still 16-byte aligned here + .error "Debug_Round_Common: stack alignment" + .endif + # compute %rcx = ptr to the X[] array on the stack (final parameter to call) + leaq X_stk+_SP_OFFS_(%rsp),%rcx #adjust for reg pushes, return address + cmpq $SKEIN_RND_FEED_FWD,%rdx #special handling for feedforward "round"? + jnz _got_rcxA + leaq X_VARS(%rsi),%rcx +_got_rcxA: + .if _USE_ASM_ & 1024 + # special handling for 1024-bit case + # (for rounds right before with key injection: + # use xDebug_1024[] instead of X_stk[]) + cmpq $SKEIN_RND_SPECIAL,%rdx + jae _got_rcxB #must be a normal round + orq %rdx,%rdx + jz _got_rcxB #just before key injection + test $3,%rdx + jne _got_rcxB + cmp $1024,%rdi #only 1024-bit(s) for now + jne _got_rcxB + leaq xDebug_1024+_SP_OFFS_(%rsp),%rcx +_got_rcxB: + .endif + call Skein_Show_Round #call external debug handler + + .irp _rr_,r15,r14,r13,r12,r11,r10,r9,r8,rbp,rcx,rbx,rax #restore regs + popq %\_rr_ +_SP_OFFS_ = _SP_OFFS_-8 + .endr + .if _SP_OFFS_ - 32 + .error "Debug_Round_Common: push/pop misalignment!" + .endif + popq %rdi + popq %rsi + ret +.endif +#---------------------------------------------------------------- + .end diff --git a/Additional_Implementations/skein_block_x86.asm b/Additional_Implementations/skein_block_x86.asm new file mode 100644 index 000000000000..4679e991fe04 --- /dev/null +++ b/Additional_Implementations/skein_block_x86.asm @@ -0,0 +1,1180 @@ +; +;---------------------------------------------------------------- +; 32-bit x86 assembler code for Skein block functions +; +; Author: Doug Whiting, Hifn +; +; This code is released to the public domain. +;---------------------------------------------------------------- +; + .386p + .model flat + .code +; +_MASK_ALL_ equ (256+512+1024) ;all three algorithm bits +; +;;;;;;;;;;;;;;;;; +ifndef SKEIN_USE_ASM +_USE_ASM_ = _MASK_ALL_ +elseif SKEIN_USE_ASM and _MASK_ALL_ +_USE_ASM_ = SKEIN_USE_ASM +else +_USE_ASM_ = _MASK_ALL_ +endif +;;;;;;;;;;;;;;;;; +ifndef SKEIN_LOOP +_SKEIN_LOOP = 0 ;default is all fully unrolled +else +_SKEIN_LOOP = SKEIN_LOOP +endif +; the unroll counts (0 --> fully unrolled) +SKEIN_UNROLL_256 = (_SKEIN_LOOP / 100) mod 10 +SKEIN_UNROLL_512 = (_SKEIN_LOOP / 10) mod 10 +SKEIN_UNROLL_1024 = (_SKEIN_LOOP ) mod 10 +; +SKEIN_ASM_UNROLL = 0 + irp _NN_,<256,512,1024> + if (SKEIN_UNROLL_&_NN_) eq 0 +SKEIN_ASM_UNROLL = SKEIN_ASM_UNROLL + _NN_ + endif + endm +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_ROUNDS +ROUNDS_256 = 72 +ROUNDS_512 = 72 +ROUNDS_1024 = 80 +else +ROUNDS_256 = 8*((((SKEIN_ROUNDS / 100) + 5) mod 10) + 5) +ROUNDS_512 = 8*((((SKEIN_ROUNDS / 10) + 5) mod 10) + 5) +ROUNDS_1024 = 8*((((SKEIN_ROUNDS ) + 5) mod 10) + 5) +endif +irp _NN_,<256,512,1024> + if _USE_ASM_ and _NN_ + irp _RR_,<%(ROUNDS_&_NN_)> + if _NN_ eq 1024 +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + else +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + endif + endm + endif +endm +;;;;;;;;;;;;;;;;; +; +ifdef SKEIN_CODE_SIZE +_SKEIN_CODE_SIZE equ (1) +else +ifdef SKEIN_PERF ;use code size if SKEIN_PERF is defined +_SKEIN_CODE_SIZE equ (1) +endif +endif +; +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_DEBUG +_SKEIN_DEBUG = 0 +else +_SKEIN_DEBUG = 1 +endif +;;;;;;;;;;;;;;;;; +; +; define offsets of fields in hash context structure +; +HASH_BITS = 0 ;# bits of hash output +BCNT = 4 + HASH_BITS ;number of bytes in BUFFER[] +TWEAK = 4 + BCNT ;tweak values[0..1] +X_VARS = 16 + TWEAK ;chaining vars +; +;(Note: buffer[] in context structure is NOT needed here :-) +; +KW_PARITY_LO= 0A9FC1A22h ;overall parity of key schedule words (hi32/lo32) +KW_PARITY_HI= 01BD11BDAh ;overall parity of key schedule words (hi32/lo32) +FIRST_MASK = NOT (1 SHL 30) ;FIRST block flag bit +; +; rotation constants for Skein +; +RC_256_0_0 = 14 +RC_256_0_1 = 16 + +RC_256_1_0 = 52 +RC_256_1_1 = 57 + +RC_256_2_0 = 23 +RC_256_2_1 = 40 + +RC_256_3_0 = 5 +RC_256_3_1 = 37 + +RC_256_4_0 = 25 +RC_256_4_1 = 33 + +RC_256_5_0 = 46 +RC_256_5_1 = 12 + +RC_256_6_0 = 58 +RC_256_6_1 = 22 + +RC_256_7_0 = 32 +RC_256_7_1 = 32 + +RC_512_0_0 = 46 +RC_512_0_1 = 36 +RC_512_0_2 = 19 +RC_512_0_3 = 37 + +RC_512_1_0 = 33 +RC_512_1_1 = 27 +RC_512_1_2 = 14 +RC_512_1_3 = 42 + +RC_512_2_0 = 17 +RC_512_2_1 = 49 +RC_512_2_2 = 36 +RC_512_2_3 = 39 + +RC_512_3_0 = 44 +RC_512_3_1 = 9 +RC_512_3_2 = 54 +RC_512_3_3 = 56 + +RC_512_4_0 = 39 +RC_512_4_1 = 30 +RC_512_4_2 = 34 +RC_512_4_3 = 24 + +RC_512_5_0 = 13 +RC_512_5_1 = 50 +RC_512_5_2 = 10 +RC_512_5_3 = 17 + +RC_512_6_0 = 25 +RC_512_6_1 = 29 +RC_512_6_2 = 39 +RC_512_6_3 = 43 + +RC_512_7_0 = 8 +RC_512_7_1 = 35 +RC_512_7_2 = 56 +RC_512_7_3 = 22 + +RC_1024_0_0 = 24 +RC_1024_0_1 = 13 +RC_1024_0_2 = 8 +RC_1024_0_3 = 47 +RC_1024_0_4 = 8 +RC_1024_0_5 = 17 +RC_1024_0_6 = 22 +RC_1024_0_7 = 37 + +RC_1024_1_0 = 38 +RC_1024_1_1 = 19 +RC_1024_1_2 = 10 +RC_1024_1_3 = 55 +RC_1024_1_4 = 49 +RC_1024_1_5 = 18 +RC_1024_1_6 = 23 +RC_1024_1_7 = 52 + +RC_1024_2_0 = 33 +RC_1024_2_1 = 4 +RC_1024_2_2 = 51 +RC_1024_2_3 = 13 +RC_1024_2_4 = 34 +RC_1024_2_5 = 41 +RC_1024_2_6 = 59 +RC_1024_2_7 = 17 + +RC_1024_3_0 = 5 +RC_1024_3_1 = 20 +RC_1024_3_2 = 48 +RC_1024_3_3 = 41 +RC_1024_3_4 = 47 +RC_1024_3_5 = 28 +RC_1024_3_6 = 16 +RC_1024_3_7 = 25 + +RC_1024_4_0 = 41 +RC_1024_4_1 = 9 +RC_1024_4_2 = 37 +RC_1024_4_3 = 31 +RC_1024_4_4 = 12 +RC_1024_4_5 = 47 +RC_1024_4_6 = 44 +RC_1024_4_7 = 30 + +RC_1024_5_0 = 16 +RC_1024_5_1 = 34 +RC_1024_5_2 = 56 +RC_1024_5_3 = 51 +RC_1024_5_4 = 4 +RC_1024_5_5 = 53 +RC_1024_5_6 = 42 +RC_1024_5_7 = 41 + +RC_1024_6_0 = 31 +RC_1024_6_1 = 44 +RC_1024_6_2 = 47 +RC_1024_6_3 = 46 +RC_1024_6_4 = 19 +RC_1024_6_5 = 42 +RC_1024_6_6 = 44 +RC_1024_6_7 = 25 + +RC_1024_7_0 = 9 +RC_1024_7_1 = 48 +RC_1024_7_2 = 35 +RC_1024_7_3 = 52 +RC_1024_7_4 = 23 +RC_1024_7_5 = 31 +RC_1024_7_6 = 37 +RC_1024_7_7 = 20 +; +; Input: rHi,rLo +; Output: <<< _RCNT_ +Rol64 macro rHi,rLo,tmp,_RCNT_ + if _RCNT_ ;is there anything to do? + if _RCNT_ lt 32 + mov tmp,rLo + shld rLo,rHi,_RCNT_ + shld rHi,tmp,_RCNT_ + elseif _RCNT_ gt 32 + mov tmp,rLo + shrd rLo,rHi,((64-_RCNT_) AND 63) + shrd rHi,tmp,((64-_RCNT_) AND 63) + else + xchg rHi,rLo ;special case for _RCNT_ == 32 + endif + endif +endm +; +; Input: rHi,rLo +; Output: <<< rName&&rNum, and tmp trashed; +RotL64 macro rHi,rLo,tmp,BLK_SIZE,ROUND_NUM,MIX_NUM +_RCNT_ = ( RC_&BLK_SIZE&_&ROUND_NUM&_&MIX_NUM AND 63 ) + Rol64 rHi,rLo,tmp,_RCNT_ +endm +; +;---------------------------------------------------------------- +; declare allocated space on the stack +StackVar macro localName,localSize +localName = _STK_OFFS_ +_STK_OFFS_ = _STK_OFFS_+(localSize) +endm ;StackVar +; +;---------------------------------------------------------------- +; +; MACRO: Configure stack frame, allocate local vars +; +Setup_Stack macro WCNT,KS_CNT +_STK_OFFS_ = 0 ;starting offset from esp + ;----- local variables ;<-- esp + StackVar X_stk ,8*(WCNT) ;local context vars + StackVar Wcopy ,8*(WCNT) ;copy of input block + StackVar ksTwk ,8*3 ;key schedule: tweak words + StackVar ksKey ,8*(WCNT)+8 ;key schedule: key words + if WCNT le 8 +FRAME_OFFS = _STK_OFFS_ ;<-- ebp + else +FRAME_OFFS = _STK_OFFS_-8*4 ;<-- ebp + endif + if (SKEIN_ASM_UNROLL and (WCNT*64)) eq 0 + StackVar ksRot ,16*(KS_CNT+0);leave space for "rotation" to happen + endif +LOCAL_SIZE = _STK_OFFS_ ;size of local vars + ;----- + StackVar savRegs,8*4 ;pushad data + StackVar retAddr,4 ;return address + ;----- caller parameters + StackVar ctxPtr ,4 ;context ptr + StackVar blkPtr ,4 ;pointer to block data + StackVar blkCnt ,4 ;number of full blocks to process + StackVar bitAdd ,4 ;bit count to add to tweak + ;----- caller's stack frame +; +; Notes on stack frame setup: +; * the most frequently used variable is X_stk[], based at [esp+0] +; * the next most used is the key schedule words +; so ebp is "centered" there, allowing short offsets to the key/tweak +; schedule even in 1024-bit Skein case +; * the Wcopy variables are infrequently accessed, but they have long +; offsets from both esp and ebp only in the 1024-bit case. +; * all other local vars and calling parameters can be accessed +; with short offsets, except in the 1024-bit case +; + pushad ;save all regs + sub esp,LOCAL_SIZE ;make room for the locals + lea ebp,[esp+FRAME_OFFS] ;maximize use of short offsets + mov edi,[FP_+ctxPtr ] ;edi --> context +; +endm ;Setup_Stack +; +FP_ equ ;keep as many short offsets as possible +; +;---------------------------------------------------------------- +; +Reset_Stack macro procStart + add esp,LOCAL_SIZE ;get rid of locals (wipe??) + popad ;restore all regs + + ;display code size in bytes to stdout + irp _BCNT_,<%($+1-procStart)> ;account for return opcode +if _BCNT_ ge 10000 ;(align it all pretty) +%out procStart code size = _BCNT_ bytes +elseif _BCNT_ ge 1000 +%out procStart code size = _BCNT_ bytes +else +%out procStart code size = _BCNT_ bytes +endif + endm ;irp _BCNT_ + +endm ; Reset_Stack +; +;---------------------------------------------------------------- +; macros to help debug internals +; +if _SKEIN_DEBUG + extrn _Skein_Show_Block:near ;calls to C routines + extrn _Skein_Show_Round:near +; +SKEIN_RND_SPECIAL = 1000 +SKEIN_RND_KEY_INITIAL = SKEIN_RND_SPECIAL+0 +SKEIN_RND_KEY_INJECT = SKEIN_RND_SPECIAL+1 +SKEIN_RND_FEED_FWD = SKEIN_RND_SPECIAL+2 +; +Skein_Debug_Block macro BLK_BITS +; +;void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X, +; const u08b_t *blkPtr, const u64b_t *wPtr, +; const u64b_t *ksPtr,const u64b_t *tsPtr); +; + pushad ;save all regs + lea eax,[FP_+ksTwk] + lea ebx,[FP_+ksKey] + lea ecx,[esp+32+Wcopy] + mov edx,[FP_+ctxPtr] ;ctx_hdr_ptr + lea edx,[edx+X_VARS] ;edx ==> cxt->X[] + push eax ;tsPtr + push ebx ;ksPtr + push ecx ;wPtr + push dword ptr [FP_+blkPtr] ;blkPtr + push edx ;ctx->Xptr + push dword ptr [FP_+ctxPtr] ;ctx_hdr_ptr + mov eax,BLK_BITS + push eax ;bits + ifdef _MINGW_ + call _Skein_Show_Block-4 ;strange linkage?? + else + call _Skein_Show_Block + endif + add esp,7*4 ;discard parameter space on stack + popad ;restore regs +endm ;Skein_Debug_Block + +; +Skein_Debug_Round macro BLK_SIZE,R,saveRegs +; +;void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,int r,const u64b_t *X); +; + ifnb + mov [esp+X_stk+ 0],eax ;save internal vars for debug dump + mov [esp+X_stk+ 4],ebx + mov [esp+X_stk+ 8],ecx + mov [esp+X_stk+12],edx + endif + pushad ;save all regs + if R ne SKEIN_RND_FEED_FWD + lea eax,[esp+32+X_stk] + else + mov eax,[FP_+ctxPtr] + add eax,X_VARS + endif + push eax ;Xptr + if (SKEIN_ASM_UNROLL and BLK_SIZE) or (R ge SKEIN_RND_SPECIAL) + mov eax,R + else + lea eax,[4*edi+1+(((R)-1) and 3)] ;compute round number using edi + endif + push eax ;round number + push dword ptr [FP_+ctxPtr] ;ctx_hdr_ptr + mov eax,BLK_SIZE + push eax ;bits + ifdef _MINGW_ + call _Skein_Show_Round-4 ;strange linkage?? + else + call _Skein_Show_Round + endif + add esp,4*4 ;discard parameter space on stack + popad ;restore regs +endm ;Skein_Debug_Round +endif ;ifdef SKEIN_DEBUG +; +;---------------------------------------------------------------- +; +; MACRO: a mix step +; +MixStep macro BLK_SIZE,ld_A,ld_C,st_A,st_C,RotNum0,RotNum1,_debug_ + ifnb + mov eax,[esp+X_stk+8*(ld_A)+0] + mov ebx,[esp+X_stk+8*(ld_A)+4] + endif + ifnb + mov ecx,[esp+X_stk+8*(ld_C)+0] + mov edx,[esp+X_stk+8*(ld_C)+4] + endif + add eax, ecx ;X[A] += X[C] + adc ebx, edx + ifnb + mov [esp+X_stk+8*(st_A)+0],eax + mov [esp+X_stk+8*(st_A)+4],ebx + endif +__rNum0 = (RotNum0) AND 7 + RotL64 ecx, edx, esi,%(BLK_SIZE),%(__rNum0),%(RotNum1) ;X[C] <<<= RC_ + xor ecx, eax ;X[C] ^= X[A] + xor edx, ebx + if _SKEIN_DEBUG or (0 eq (_debug_ + 0)) + ifb + mov [esp+X_stk+8*(ld_C)+0],ecx + mov [esp+X_stk+8*(ld_C)+4],edx + else + mov [esp+X_stk+8*(st_C)+0],ecx + mov [esp+X_stk+8*(st_C)+4],edx + endif + endif + if _SKEIN_DEBUG and (0 ne (_debug_ + 0)) + Skein_Debug_Round BLK_SIZE,%(RotNum0+1) + endif +endm ;MixStep +; +;;;;;;;;;;;;;;;;; +; +; MACRO: key schedule injection +; +ks_Inject macro BLK_SIZE,X_load,X_stor,rLo,rHi,rndBase,keyIdx,twkIdx,ROUND_ADD + ;are rLo,rHi values already loaded? if not, load them now + ifnb + mov rLo,[esp+X_stk +8*(X_load) ] + mov rHi,[esp+X_stk +8*(X_load)+4] + endif + + ;inject the 64-bit key schedule value (and maybe the tweak as well) +if SKEIN_ASM_UNROLL and BLK_SIZE +_kOffs_ = ((rndBase)+(keyIdx)) mod ((BLK_SIZE/64)+1) + add rLo,[FP_+ksKey+8*_kOffs_+ 0] + adc rHi,[FP_+ksKey+8*_kOffs_+ 4] + ifnb +_tOffs_ = ((rndBase)+(twkIdx)) mod 3 + add rLo,[FP_+ksTwk+8*_tOffs_+ 0] + adc rHi,[FP_+ksTwk+8*_tOffs_+ 4] + endif + ifnb + add rLo,(ROUND_ADD) + adc rHi,0 + endif +else + add rLo,[FP_+ksKey+8*(keyIdx)+8*edi ] + adc rHi,[FP_+ksKey+8*(keyIdx)+8*edi+4] + ifnb + add rLo,[FP_+ksTwk+8*(twkIdx)+8*edi ] + adc rHi,[FP_+ksTwk+8*(twkIdx)+8*edi+4] + endif + ifnb + add rLo,edi ;edi is the round number + adc rHi,0 + endif +endif + + ;do we need to store updated rLo,rHi values? if so, do it now + ifnb + mov [esp+X_stk +8*(X_stor) ],rLo + mov [esp+X_stk +8*(X_stor)+4],rHi + endif +endm ;ks_Inject +; +;---------------------------------------------------------------- +; MACRO: key schedule rotation +; +ks_Rotate macro rLo,rHi,WCNT + mov rLo,[FP_+ksKey+8*edi+ 0] ;"rotate" the key schedule in memory + mov rHi,[FP_+ksKey+8*edi+ 4] + mov [FP_+ksKey+8*edi+8*(WCNT+1)+ 0],rLo + mov [FP_+ksKey+8*edi+8*(WCNT+1)+ 4],rHi + mov rLo,[FP_+ksTwk+8*edi+ 0] + mov rHi,[FP_+ksTwk+8*edi+ 4] + mov [FP_+ksTwk+8*edi+8*3+ 0],rLo + mov [FP_+ksTwk+8*edi+8*3+ 4],rHi +endm +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 256 + public _Skein_256_Process_Block +; +; void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; +; MACRO: two rounds +; +R_256_TwoRounds macro _RR_,ld_0 + ; here with edx:ecx = X[1] + ;--------- round _RR_ + MixStep 256,ld_0, ,0,1,((_RR_)+0),0 + MixStep 256, 2,3,2,3,((_RR_)+0),1,1 + + ; here with edx:ecx = X[3] + ;--------- round _RR_ + 1 + MixStep 256, 0, ,0,3,((_RR_)+1),0 + MixStep 256, 2,1,2,1,((_RR_)+1),1,1 + + ; here with edx:ecx = X[1] +endm ;R_256_TwoRounds +; +;;;;;;;;;;;;;;;;; +; +; code +; +_Skein_256_Process_Block proc near + WCNT = 4 ;WCNT=4 for Skein-256 + Setup_Stack WCNT,(ROUNDS_256/8) + + ; main hash loop for Skein_256 +Skein_256_block_loop: + mov eax,[edi+TWEAK+ 0] ;ebx:eax = tweak word T0 + mov ebx,[edi+TWEAK+ 4] + mov ecx,[edi+TWEAK+ 8] ;edx:ecx = tweak word T1 + mov edx,[edi+TWEAK+12] + + add eax,[FP_+bitAdd ] ;bump T0 by the bitAdd parameter + adc ebx, 0 + mov [edi+TWEAK ],eax ;save updated tweak value T0 + mov [edi+TWEAK+ 4],ebx + + mov [FP_+ksTwk ],eax ;build the tweak schedule on the stack + mov [FP_+ksTwk+ 4],ebx + xor eax,ecx ;ebx:eax = T0 ^ T1 + xor ebx,edx + mov [FP_+ksTwk+ 8],ecx + mov [FP_+ksTwk+12],edx + mov [FP_+ksTwk+16],eax + mov [FP_+ksTwk+20],ebx + + mov eax,KW_PARITY_LO ;init parity accumulator + mov ebx,KW_PARITY_HI +; +_NN_ = 0 + rept WCNT ;copy in the chaining vars + mov ecx,[edi+X_VARS+_NN_ ] + mov edx,[edi+X_VARS+_NN_+ 4] + xor eax,ecx ;compute overall parity along the way + xor ebx,edx + mov [FP_+ksKey +_NN_ ],ecx + mov [FP_+ksKey +_NN_+ 4],edx +_NN_ = _NN_+8 + endm +; + mov [FP_+ksKey +_NN_ ],eax ;save overall parity at the end of the array + mov [FP_+ksKey +_NN_+ 4],ebx + + mov esi,[FP_+blkPtr ] ;esi --> input block +; +_NN_ = WCNT*8-16 ;work down from the end + rept WCNT/2 ;perform initial key injection + mov eax,[esi+_NN_ + 0] + mov ebx,[esi+_NN_ + 4] + mov ecx,[esi+_NN_ + 8] + mov edx,[esi+_NN_ +12] + mov [esp+_NN_+Wcopy + 0],eax + mov [esp+_NN_+Wcopy + 4],ebx + mov [esp+_NN_+Wcopy + 8],ecx + mov [esp+_NN_+Wcopy +12],edx + add eax,[FP_+_NN_+ksKey + 0] + adc ebx,[FP_+_NN_+ksKey + 4] + add ecx,[FP_+_NN_+ksKey + 8] + adc edx,[FP_+_NN_+ksKey +12] + if _NN_ eq (WCNT*8-16) ;inject the tweak words + add eax,[FP_+ ksTwk + 8]; (at the appropriate points) + adc ebx,[FP_+ ksTwk +12] + elseif _NN_ eq (WCNT*8-32) + add ecx,[FP_+ ksTwk + 0] + adc edx,[FP_+ ksTwk + 4] + endif + if _NN_ or _SKEIN_DEBUG + mov [esp+_NN_+X_stk + 0],eax + mov [esp+_NN_+X_stk + 4],ebx + mov [esp+_NN_+X_stk + 8],ecx + mov [esp+_NN_+X_stk +12],edx + endif +_NN_ = _NN_ - 16 ;end at X[0], so regs are already loaded for first MIX! + endm +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block WCNT*64 + Skein_Debug_Round WCNT*64,SKEIN_RND_KEY_INITIAL +endif + add esi, WCNT*8 ;skip the block + mov [FP_+blkPtr ],esi ;update block pointer + ; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 256 +_UNROLL_CNT = ROUNDS_256/8 +else +_UNROLL_CNT = SKEIN_UNROLL_256 ;unroll count + if ((ROUNDS_256/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_256" + endif + xor edi,edi ;edi = iteration count +Skein_256_round_loop: +endif +_Rbase_ = 0 +rept _UNROLL_CNT*2 + ; here with X[0], X[1] already loaded into eax..edx + R_256_TwoRounds %(4*_Rbase_+00), + R_256_TwoRounds %(4*_Rbase_+02),0 + + ;inject key schedule + if _UNROLL_CNT ne (ROUNDS_256/8) + ks_Rotate eax,ebx,WCNT + inc edi ;edi = round number + endif +_Rbase_ = _Rbase_+1 + ks_Inject 256,3,3,eax,ebx,_Rbase_,3, ,_Rbase_ + ks_Inject 256,2,2,eax,ebx,_Rbase_,2,1 + ks_Inject 256, , ,ecx,edx,_Rbase_,1,0 + ks_Inject 256,0, ,eax,ebx,_Rbase_,0 + if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_KEY_INJECT,saveRegs + endif +endm ;rept _UNROLL_CNT +; + if _UNROLL_CNT ne (ROUNDS_256/8) + cmp edi,2*(ROUNDS_256/8) + jb Skein_256_round_loop + mov edi,[FP_+ctxPtr ] ;restore edi --> context + endif + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..3} +_NN_ = 0 + rept WCNT/2 + if _NN_ ;eax..edx already loaded the first time + mov eax,[esp+X_stk + _NN_ + 0] + mov ebx,[esp+X_stk + _NN_ + 4] + mov ecx,[esp+X_stk + _NN_ + 8] + mov edx,[esp+X_stk + _NN_ +12] + endif + if _NN_ eq 0 + and dword ptr [edi +TWEAK +12],FIRST_MASK + endif + xor eax,[esp+Wcopy + _NN_ + 0] + xor ebx,[esp+Wcopy + _NN_ + 4] + xor ecx,[esp+Wcopy + _NN_ + 8] + xor edx,[esp+Wcopy + _NN_ +12] + mov [edi+X_VARS+ _NN_ + 0],eax + mov [edi+X_VARS+ _NN_ + 4],ebx + mov [edi+X_VARS+ _NN_ + 8],ecx + mov [edi+X_VARS+ _NN_ +12],edx +_NN_ = _NN_+16 + endm +if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_FEED_FWD +endif + ; go back for more blocks, if needed + dec dword ptr [FP_+blkCnt] + jnz Skein_256_block_loop + + Reset_Stack _Skein_256_Process_Block + ret +_Skein_256_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein_256_Process_Block_CodeSize +_Skein_256_Process_Block_CodeSize proc + mov eax,_Skein_256_Process_Block_CodeSize - _Skein_256_Process_Block + ret +_Skein_256_Process_Block_CodeSize endp +; + public _Skein_256_Unroll_Cnt +_Skein_256_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_256/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein_256_Unroll_Cnt endp +endif +endif ;_USE_ASM_ and 256 +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 512 + public _Skein_512_Process_Block +; +; void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; MACRO: four rounds +; +R_512_FourRounds macro _RR_,ld_0 + ; here with edx:ecx = X[1] + ;--------- round _RR_ + ; R512(0,1,2,3,4,5,6,7,R_0, 1); + MixStep 512, ld_0, ,0,1,((_RR_)+0),0 + MixStep 512, 2,3,2,3,((_RR_)+0),1 + MixStep 512, 4,5,4,5,((_RR_)+0),2 + MixStep 512, 6,7,6, ,((_RR_)+0),3,1 + + ; here with edx:ecx = X[7] + ; R512(2,1,4,7,6,5,0,3,R_1, 2); + MixStep 512, 4, ,4,7,((_RR_)+1),1 + MixStep 512, 6,5,6,5,((_RR_)+1),2 + MixStep 512, 0,3,0,3,((_RR_)+1),3 + MixStep 512, 2,1,2, ,((_RR_)+1),0,1 + + ; here with edx:ecx = X[1] + ; R512(4,1,6,3,0,5,2,7,R_2, 3); + MixStep 512, 4, ,4,1,((_RR_)+2),0 + MixStep 512, 6,3,6,3,((_RR_)+2),1 + MixStep 512, 0,5,0,5,((_RR_)+2),2 + MixStep 512, 2,7,2, ,((_RR_)+2),3,1 + + ; here with edx:ecx = X[7] + ; R512(6,1,0,7,2,5,4,3,R_3, 4); + MixStep 512, 0, ,0,7,((_RR_)+3),1 + MixStep 512, 2,5,2,5,((_RR_)+3),2 + MixStep 512, 4,3,4,3,((_RR_)+3),3 + MixStep 512, 6,1,6, ,((_RR_)+3),0,1 + +endm ;R_512_FourRounds +; +;;;;;;;;;;;;;;;;; +; code +; +_Skein_512_Process_Block proc near + WCNT = 8 ;WCNT=8 for Skein-512 + Setup_Stack WCNT,(ROUNDS_512/8) + + ; main hash loop for Skein_512 +Skein_512_block_loop: + mov eax,[edi+TWEAK+ 0] ;ebx:eax = tweak word T0 + mov ebx,[edi+TWEAK+ 4] + mov ecx,[edi+TWEAK+ 8] ;edx:ecx = tweak word T1 + mov edx,[edi+TWEAK+12] + + add eax,[FP_+bitAdd ] ;bump T0 by the bitAdd parameter + adc ebx, 0 + mov [edi+TWEAK ],eax ;save updated tweak value T0 + mov [edi+TWEAK+ 4],ebx + + mov [FP_+ksTwk ],eax ;build the tweak schedule on the stack + mov [FP_+ksTwk+ 4],ebx + xor eax,ecx ;ebx:eax = T0 ^ T1 + xor ebx,edx + mov [FP_+ksTwk+ 8],ecx + mov [FP_+ksTwk+12],edx + mov [FP_+ksTwk+16],eax + mov [FP_+ksTwk+20],ebx + + mov eax,KW_PARITY_LO ;init parity accumulator + mov ebx,KW_PARITY_HI +; +_NN_ = 0 + rept WCNT ;copy in the chaining vars + mov ecx,[edi+X_VARS+_NN_ ] + mov edx,[edi+X_VARS+_NN_+ 4] + xor eax,ecx ;compute overall parity along the way + xor ebx,edx + mov [FP_+ksKey +_NN_ ],ecx + mov [FP_+ksKey +_NN_+ 4],edx +_NN_ = _NN_+8 + endm +; + mov [FP_+ksKey +_NN_ ],eax ;save overall parity at the end of the array + mov [FP_+ksKey +_NN_+ 4],ebx + + mov esi,[FP_+blkPtr ] ;esi --> input block +; +_NN_ = WCNT*8-16 ;work down from the end + rept WCNT/2 ;perform initial key injection + mov eax,[esi+_NN_ + 0] + mov ebx,[esi+_NN_ + 4] + mov ecx,[esi+_NN_ + 8] + mov edx,[esi+_NN_ +12] + mov [esp+_NN_+Wcopy + 0],eax + mov [esp+_NN_+Wcopy + 4],ebx + mov [esp+_NN_+Wcopy + 8],ecx + mov [esp+_NN_+Wcopy +12],edx + add eax,[FP_+_NN_+ksKey + 0] + adc ebx,[FP_+_NN_+ksKey + 4] + add ecx,[FP_+_NN_+ksKey + 8] + adc edx,[FP_+_NN_+ksKey +12] + if _NN_ eq (WCNT*8-16) ;inject the tweak words + add eax,[FP_+ ksTwk + 8]; (at the appropriate points) + adc ebx,[FP_+ ksTwk +12] + elseif _NN_ eq (WCNT*8-32) + add ecx,[FP_+ ksTwk + 0] + adc edx,[FP_+ ksTwk + 4] + endif + if _NN_ or _SKEIN_DEBUG + mov [esp+_NN_+X_stk + 0],eax + mov [esp+_NN_+X_stk + 4],ebx + mov [esp+_NN_+X_stk + 8],ecx + mov [esp+_NN_+X_stk +12],edx + endif +_NN_ = _NN_ - 16 ;end at X[0], so regs are already loaded for first MIX! + endm +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block WCNT*64 + Skein_Debug_Round WCNT*64,SKEIN_RND_KEY_INITIAL +endif + add esi, WCNT*8 ;skip the block + mov [FP_+blkPtr ],esi ;update block pointer + ; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 512 +_UNROLL_CNT = ROUNDS_512/8 +else +_UNROLL_CNT = SKEIN_UNROLL_512 + if ((ROUNDS_512/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_512" + endif + xor edi,edi ;edi = round counter +Skein_512_round_loop: +endif +_Rbase_ = 0 +rept _UNROLL_CNT*2 + ; here with X[0], X[1] already loaded into eax..edx + R_512_FourRounds %(4*_Rbase_+00), + + ;inject odd key schedule words + if _UNROLL_CNT ne (ROUNDS_512/8) + ks_Rotate eax,ebx,WCNT + inc edi ;edi = round number + endif +_Rbase_ = _Rbase_+1 + ks_Inject 512,7,7,eax,ebx,_Rbase_,7, ,_Rbase_ + ks_Inject 512,6,6,eax,ebx,_Rbase_,6,1 + ks_Inject 512,5,5,eax,ebx,_Rbase_,5,0 + ks_Inject 512,4,4,eax,ebx,_Rbase_,4 + ks_Inject 512,3,3,eax,ebx,_Rbase_,3 + ks_Inject 512,2,2,eax,ebx,_Rbase_,2 + ks_Inject 512, , ,ecx,edx,_Rbase_,1 + ks_Inject 512,0, ,eax,ebx,_Rbase_,0 + if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_KEY_INJECT ,saveRegs + endif +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 512) eq 0 + cmp edi,2*(ROUNDS_512/8) + jb Skein_512_round_loop + mov edi,[FP_+ctxPtr ] ;restore edi --> context +endif + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} +_NN_ = 0 + rept WCNT/2 + if _NN_ ;eax..edx already loaded the first time + mov eax,[esp+X_stk + _NN_ + 0] + mov ebx,[esp+X_stk + _NN_ + 4] + mov ecx,[esp+X_stk + _NN_ + 8] + mov edx,[esp+X_stk + _NN_ +12] + endif + if _NN_ eq 0 + and dword ptr [edi + TWEAK+12],FIRST_MASK + endif + xor eax,[esp+Wcopy + _NN_ + 0] + xor ebx,[esp+Wcopy + _NN_ + 4] + xor ecx,[esp+Wcopy + _NN_ + 8] + xor edx,[esp+Wcopy + _NN_ +12] + mov [edi+X_VARS+ _NN_ + 0],eax + mov [edi+X_VARS+ _NN_ + 4],ebx + mov [edi+X_VARS+ _NN_ + 8],ecx + mov [edi+X_VARS+ _NN_ +12],edx +_NN_ = _NN_+16 + endm +if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_FEED_FWD +endif + ; go back for more blocks, if needed + dec dword ptr [FP_+blkCnt] + jnz Skein_512_block_loop + + Reset_Stack _Skein_512_Process_Block + ret +_Skein_512_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein_512_Process_Block_CodeSize +_Skein_512_Process_Block_CodeSize proc + mov eax,_Skein_512_Process_Block_CodeSize - _Skein_512_Process_Block + ret +_Skein_512_Process_Block_CodeSize endp +; + public _Skein_512_Unroll_Cnt +_Skein_512_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_512/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein_512_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 512 +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 1024 + public _Skein1024_Process_Block +; +; void Skein_1024_Process_Block(Skein_1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; MACRO: four rounds +; +R_1024_FourRounds macro _RR_,ld_0 + ; here with edx:ecx = X[1] + + ;--------- round _RR_ + MixStep 1024, ld_0, , 0, 1,((_RR_)+0),0 + MixStep 1024, 2, 3, 2, 3,((_RR_)+0),1 + MixStep 1024, 4, 5, 4, 5,((_RR_)+0),2 + MixStep 1024, 6, 7, 6, 7,((_RR_)+0),3 + MixStep 1024, 8, 9, 8, 9,((_RR_)+0),4 + MixStep 1024, 10,11,10,11,((_RR_)+0),5 + MixStep 1024, 12,13,12,13,((_RR_)+0),6 + MixStep 1024, 14,15,14, ,((_RR_)+0),7,1 + ; here with edx:ecx = X[15] + + ;--------- round _RR_+1 + MixStep 1024, 4, , 4,15,((_RR_)+1),3 + MixStep 1024, 0, 9, 0, 9,((_RR_)+1),0 + MixStep 1024, 2,13, 2,13,((_RR_)+1),1 + MixStep 1024, 6,11, 6,11,((_RR_)+1),2 + MixStep 1024, 10, 7,10, 7,((_RR_)+1),4 + MixStep 1024, 12, 3,12, 3,((_RR_)+1),5 + MixStep 1024, 14, 5,14, 5,((_RR_)+1),6 + MixStep 1024, 8, 1, 8, ,((_RR_)+1),7,1 + ; here with edx:ecx = X[1] + + ;--------- round _RR_+2 + MixStep 1024, 6, , 6, 1,((_RR_)+2),3 + MixStep 1024, 0, 7, 0, 7,((_RR_)+2),0 + MixStep 1024, 2, 5, 2, 5,((_RR_)+2),1 + MixStep 1024, 4, 3, 4, 3,((_RR_)+2),2 + MixStep 1024, 12,15,12,15,((_RR_)+2),4 + MixStep 1024, 14,13,14,13,((_RR_)+2),5 + MixStep 1024, 8,11, 8,11,((_RR_)+2),6 + MixStep 1024, 10, 9,10, ,((_RR_)+2),7,1 + ; here with edx:ecx = X[9] + + ;--------- round _RR_+3 + MixStep 1024, 4, , 4, 9,((_RR_)+3),3 + MixStep 1024, 0,15, 0,15,((_RR_)+3),0 + MixStep 1024, 2,11, 2,11,((_RR_)+3),1 + MixStep 1024, 6,13, 6,13,((_RR_)+3),2 + MixStep 1024, 8, 5, 8, 5,((_RR_)+3),5 + MixStep 1024, 10, 3,10, 3,((_RR_)+3),6 + MixStep 1024, 12, 7,12, 7,((_RR_)+3),7 + MixStep 1024, 14, 1,14, ,((_RR_)+3),4,1 + + ; here with edx:ecx = X[1] +endm ;R_1024_FourRounds +; +;;;;;;;;;;;;;;;;; +; code +; +_Skein1024_Process_Block proc near +; + WCNT = 16 ;WCNT=16 for Skein-1024 + Setup_Stack WCNT,(ROUNDS_1024/8) + + ; main hash loop for Skein1024 +Skein1024_block_loop: + mov eax,[edi+TWEAK+ 0] ;ebx:eax = tweak word T0 + mov ebx,[edi+TWEAK+ 4] + mov ecx,[edi+TWEAK+ 8] ;edx:ecx = tweak word T1 + mov edx,[edi+TWEAK+12] + + add eax,[FP_+bitAdd ] ;bump T0 by the bitAdd parameter + adc ebx, 0 + mov [edi+TWEAK ],eax ;save updated tweak value T0 + mov [edi+TWEAK+ 4],ebx + + mov [FP_+ksTwk ],eax ;build the tweak schedule on the stack + mov [FP_+ksTwk+ 4],ebx + xor eax,ecx ;ebx:eax = T0 ^ T1 + xor ebx,edx + mov [FP_+ksTwk+ 8],ecx + mov [FP_+ksTwk+12],edx + mov [FP_+ksTwk+16],eax + mov [FP_+ksTwk+20],ebx + + mov eax,KW_PARITY_LO ;init parity accumulator + mov ebx,KW_PARITY_HI +EDI_BIAS equ 70h ;bias the edi offsets to make them short! + add edi, EDI_BIAS +CT_ equ +; +_NN_ = 0 + rept WCNT ;copy in the chaining vars + mov ecx,[CT_+X_VARS+_NN_ ] + mov edx,[CT_+X_VARS+_NN_+ 4] + xor eax,ecx ;compute overall parity along the way + xor ebx,edx + mov [FP_+ksKey +_NN_ ],ecx + mov [FP_+ksKey +_NN_+ 4],edx +_NN_ = _NN_+8 + endm +; + mov [FP_+ksKey +_NN_ ],eax ;save overall parity at the end of the array + mov [FP_+ksKey +_NN_+ 4],ebx + + mov esi,[FP_+blkPtr ] ;esi --> input block + lea edi,[esp+Wcopy] +; +_NN_ = WCNT*8-16 ;work down from the end + rept WCNT/2 ;perform initial key injection + mov eax,[esi+_NN_ + 0] + mov ebx,[esi+_NN_ + 4] + mov ecx,[esi+_NN_ + 8] + mov edx,[esi+_NN_ +12] + mov [edi+_NN_+ + 0],eax + mov [edi+_NN_+ + 4],ebx + mov [edi+_NN_+ + 8],ecx + mov [edi+_NN_+ +12],edx + add eax,[FP_+_NN_+ksKey + 0] + adc ebx,[FP_+_NN_+ksKey + 4] + add ecx,[FP_+_NN_+ksKey + 8] + adc edx,[FP_+_NN_+ksKey +12] + if _NN_ eq (WCNT*8-16) ;inject the tweak words + add eax,[FP_+ ksTwk + 8]; (at the appropriate points) + adc ebx,[FP_+ ksTwk +12] + elseif _NN_ eq (WCNT*8-32) + add ecx,[FP_+ ksTwk + 0] + adc edx,[FP_+ ksTwk + 4] + endif + if _NN_ or _SKEIN_DEBUG + mov [esp+_NN_+X_stk + 0],eax + mov [esp+_NN_+X_stk + 4],ebx + mov [esp+_NN_+X_stk + 8],ecx + mov [esp+_NN_+X_stk +12],edx + endif +_NN_ = _NN_ - 16 ;end at X[0], so regs are already loaded for first MIX! + endm +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block WCNT*64 + Skein_Debug_Round WCNT*64,SKEIN_RND_KEY_INITIAL +endif + sub esi,-WCNT*8 ;skip the block (short immediate) + mov [FP_+blkPtr ],esi ;update block pointer + ; + ; now the key schedule is computed. Start the rounds + ; +if SKEIN_ASM_UNROLL and 1024 +_UNROLL_CNT = ROUNDS_1024/8 +else +_UNROLL_CNT = SKEIN_UNROLL_1024 + if ((ROUNDS_1024/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_1024" + endif + xor edi,edi ;edi = round counter +Skein_1024_round_loop: +endif + +_Rbase_ = 0 +rept _UNROLL_CNT*2 + ; here with X[0], X[1] already loaded into eax..edx + R_1024_FourRounds %(4*_Rbase_+00), + + ;inject odd key schedule words + ;inject odd key schedule words + if _UNROLL_CNT ne (ROUNDS_1024/8) + ks_Rotate eax,ebx,WCNT + inc edi ;edi = round number + endif +_Rbase_ = _Rbase_+1 + ks_Inject 1024,15,15,eax,ebx,_Rbase_,15, ,_Rbase_ + ks_Inject 1024,14,14,eax,ebx,_Rbase_,14,1 + ks_Inject 1024,13,13,eax,ebx,_Rbase_,13,0 + irp _w,<12,11,10,9,8,7,6,5,4,3,2> + ks_Inject 1024,_w,_w,eax,ebx,_Rbase_,_w + endm + ks_Inject 1024, , ,ecx,edx,_Rbase_,1 + ks_Inject 1024, 0, ,eax,ebx,_Rbase_,0 + + if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_KEY_INJECT ,saveRegs + endif +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 1024) eq 0 + cmp edi,2*(ROUNDS_1024/8) + jb Skein_1024_round_loop +endif + mov edi,[FP_+ctxPtr ] ;restore edi --> context + add edi,EDI_BIAS ;and bias it for short offsets below + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..15} + lea esi,[esp+Wcopy] ;use short offsets below +_NN_ = 0 + rept WCNT/2 + if _NN_ ;eax..edx already loaded the first time + mov eax,[esp+X_stk + _NN_ + 0] + mov ebx,[esp+X_stk + _NN_ + 4] + mov ecx,[esp+X_stk + _NN_ + 8] + mov edx,[esp+X_stk + _NN_ +12] + endif + if _NN_ eq 0 + and dword ptr [CT_ + TWEAK+12],FIRST_MASK + endif + xor eax,[esi + _NN_ + 0] + xor ebx,[esi + _NN_ + 4] + xor ecx,[esi + _NN_ + 8] + xor edx,[esi + _NN_ +12] + mov [CT_+X_VARS+ _NN_ + 0],eax + mov [CT_+X_VARS+ _NN_ + 4],ebx + mov [CT_+X_VARS+ _NN_ + 8],ecx + mov [CT_+X_VARS+ _NN_ +12],edx +_NN_ = _NN_+16 + endm + sub edi,EDI_BIAS ;undo the bias for return + +if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_FEED_FWD +endif + ; go back for more blocks, if needed + dec dword ptr [FP_+blkCnt] + jnz Skein1024_block_loop + + Reset_Stack _Skein1024_Process_Block + ret +_Skein1024_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein1024_Process_Block_CodeSize +_Skein1024_Process_Block_CodeSize proc + mov eax,_Skein1024_Process_Block_CodeSize - _Skein1024_Process_Block + ret +_Skein1024_Process_Block_CodeSize endp +; + public _Skein1024_Unroll_Cnt +_Skein1024_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_1024/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein1024_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 1024 +;---------------------------------------------------------------- + end diff --git a/Additional_Implementations/skein_block_xmm32.asm b/Additional_Implementations/skein_block_xmm32.asm new file mode 100644 index 000000000000..96ef121cd49a --- /dev/null +++ b/Additional_Implementations/skein_block_xmm32.asm @@ -0,0 +1,1167 @@ +; +;---------------------------------------------------------------- +; 32-bit x86 assembler code for Skein block functions using XMM registers +; +; Author: Doug Whiting, Hifn +; +; This code is released to the public domain. +;---------------------------------------------------------------- +; + .386p + .model flat + .code + .xmm ;enable XMM instructions +; +_MASK_ALL_ equ (256+512+1024) ;all three algorithm bits +; +;;;;;;;;;;;;;;;;; +ifndef SKEIN_USE_ASM +_USE_ASM_ = _MASK_ALL_ +elseif SKEIN_USE_ASM and _MASK_ALL_ +_USE_ASM_ = SKEIN_USE_ASM +else +_USE_ASM_ = _MASK_ALL_ +endif +; +;;;;;;;;;;;;;;;;; +ifndef SKEIN_LOOP +_SKEIN_LOOP = 0 ;default is all fully unrolled +else +_SKEIN_LOOP = SKEIN_LOOP +endif +;-------------- +; the unroll counts (0 --> fully unrolled) +SKEIN_UNROLL_256 = (_SKEIN_LOOP / 100) mod 10 +SKEIN_UNROLL_512 = (_SKEIN_LOOP / 10) mod 10 +SKEIN_UNROLL_1024 = (_SKEIN_LOOP ) mod 10 +; +SKEIN_ASM_UNROLL = 0 + irp _NN_,<256,512,1024> + if (SKEIN_UNROLL_&_NN_) eq 0 +SKEIN_ASM_UNROLL = SKEIN_ASM_UNROLL + _NN_ + endif + endm +; +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_ROUNDS +ROUNDS_256 = 72 +ROUNDS_512 = 72 +ROUNDS_1024 = 80 +else +ROUNDS_256 = 8*((((SKEIN_ROUNDS / 100) + 5) mod 10) + 5) +ROUNDS_512 = 8*((((SKEIN_ROUNDS / 10) + 5) mod 10) + 5) +ROUNDS_1024 = 8*((((SKEIN_ROUNDS ) + 5) mod 10) + 5) +endif +irp _NN_,<256,512,1024> + if _USE_ASM_ and _NN_ + irp _RR_,<%(ROUNDS_&_NN_)> + if _NN_ eq 1024 +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + else +%out +++ SKEIN_ROUNDS_&_NN_ = _RR_ + endif + endm + endif +endm +;;;;;;;;;;;;;;;;; +; +ifdef SKEIN_CODE_SIZE +_SKEIN_CODE_SIZE equ (1) +else +ifdef SKEIN_PERF ;use code size if SKEIN_PERF is defined +_SKEIN_CODE_SIZE equ (1) +endif +endif +; +;;;;;;;;;;;;;;;;; +; +ifndef SKEIN_DEBUG +_SKEIN_DEBUG = 0 +else +_SKEIN_DEBUG = 1 +endif +;;;;;;;;;;;;;;;;; +; +; define offsets of fields in hash context structure +; +HASH_BITS = 0 ;# bits of hash output +BCNT = 4 + HASH_BITS ;number of bytes in BUFFER[] +TWEAK = 4 + BCNT ;tweak values[0..1] +X_VARS = 16 + TWEAK ;chaining vars +; +;(Note: buffer[] in context structure is NOT needed here :-) +; +KW_PARITY_LO= 0A9FC1A22h ;overall parity of key schedule words (hi32/lo32) +KW_PARITY_HI= 01BD11BDAh +FIRST_MASK8 = NOT (1 SHL 6) ;FIRST block flag bit +; +; rotation constants for Skein +; +RC_256_0_0 = 14 +RC_256_0_1 = 16 + +RC_256_1_0 = 52 +RC_256_1_1 = 57 + +RC_256_2_0 = 23 +RC_256_2_1 = 40 + +RC_256_3_0 = 5 +RC_256_3_1 = 37 + +RC_256_4_0 = 25 +RC_256_4_1 = 33 + +RC_256_5_0 = 46 +RC_256_5_1 = 12 + +RC_256_6_0 = 58 +RC_256_6_1 = 22 + +RC_256_7_0 = 32 +RC_256_7_1 = 32 + +RC_512_0_0 = 46 +RC_512_0_1 = 36 +RC_512_0_2 = 19 +RC_512_0_3 = 37 + +RC_512_1_0 = 33 +RC_512_1_1 = 27 +RC_512_1_2 = 14 +RC_512_1_3 = 42 + +RC_512_2_0 = 17 +RC_512_2_1 = 49 +RC_512_2_2 = 36 +RC_512_2_3 = 39 + +RC_512_3_0 = 44 +RC_512_3_1 = 9 +RC_512_3_2 = 54 +RC_512_3_3 = 56 + +RC_512_4_0 = 39 +RC_512_4_1 = 30 +RC_512_4_2 = 34 +RC_512_4_3 = 24 + +RC_512_5_0 = 13 +RC_512_5_1 = 50 +RC_512_5_2 = 10 +RC_512_5_3 = 17 + +RC_512_6_0 = 25 +RC_512_6_1 = 29 +RC_512_6_2 = 39 +RC_512_6_3 = 43 + +RC_512_7_0 = 8 +RC_512_7_1 = 35 +RC_512_7_2 = 56 +RC_512_7_3 = 22 + +RC_1024_0_0 = 24 +RC_1024_0_1 = 13 +RC_1024_0_2 = 8 +RC_1024_0_3 = 47 +RC_1024_0_4 = 8 +RC_1024_0_5 = 17 +RC_1024_0_6 = 22 +RC_1024_0_7 = 37 + +RC_1024_1_0 = 38 +RC_1024_1_1 = 19 +RC_1024_1_2 = 10 +RC_1024_1_3 = 55 +RC_1024_1_4 = 49 +RC_1024_1_5 = 18 +RC_1024_1_6 = 23 +RC_1024_1_7 = 52 + +RC_1024_2_0 = 33 +RC_1024_2_1 = 4 +RC_1024_2_2 = 51 +RC_1024_2_3 = 13 +RC_1024_2_4 = 34 +RC_1024_2_5 = 41 +RC_1024_2_6 = 59 +RC_1024_2_7 = 17 + +RC_1024_3_0 = 5 +RC_1024_3_1 = 20 +RC_1024_3_2 = 48 +RC_1024_3_3 = 41 +RC_1024_3_4 = 47 +RC_1024_3_5 = 28 +RC_1024_3_6 = 16 +RC_1024_3_7 = 25 + +RC_1024_4_0 = 41 +RC_1024_4_1 = 9 +RC_1024_4_2 = 37 +RC_1024_4_3 = 31 +RC_1024_4_4 = 12 +RC_1024_4_5 = 47 +RC_1024_4_6 = 44 +RC_1024_4_7 = 30 + +RC_1024_5_0 = 16 +RC_1024_5_1 = 34 +RC_1024_5_2 = 56 +RC_1024_5_3 = 51 +RC_1024_5_4 = 4 +RC_1024_5_5 = 53 +RC_1024_5_6 = 42 +RC_1024_5_7 = 41 + +RC_1024_6_0 = 31 +RC_1024_6_1 = 44 +RC_1024_6_2 = 47 +RC_1024_6_3 = 46 +RC_1024_6_4 = 19 +RC_1024_6_5 = 42 +RC_1024_6_6 = 44 +RC_1024_6_7 = 25 + +RC_1024_7_0 = 9 +RC_1024_7_1 = 48 +RC_1024_7_2 = 35 +RC_1024_7_3 = 52 +RC_1024_7_4 = 23 +RC_1024_7_5 = 31 +RC_1024_7_6 = 37 +RC_1024_7_7 = 20 +; +mov64 macro x0,x1 + movq x0,x1 +endm +; +;---------------------------------------------------------------- +; declare allocated space on the stack +StackVar macro localName,localSize +localName = _STK_OFFS_ +_STK_OFFS_ = _STK_OFFS_+(localSize) +endm ;StackVar +; +;---------------------------------------------------------------- +; +; MACRO: Configure stack frame, allocate local vars +; +Setup_Stack macro WCNT,RND_CNT +_STK_OFFS_ = 0 ;starting offset from esp, forced on 16-byte alignment + ;----- local variables ;<-- esp + StackVar X_stk , 8*(WCNT) ;local context vars + StackVar Wcopy , 8*(WCNT) ;copy of input block + StackVar ksTwk ,16*3 ;key schedule: tweak words + StackVar ksKey ,16*(WCNT)+16;key schedule: key words +FRAME_OFFS = ksTwk+128 ;<-- ebp + if (SKEIN_ASM_UNROLL and (WCNT*64)) eq 0 + StackVar ksRot,16*(RND_CNT/4);leave space for ks "rotation" to happen + endif +LOCAL_SIZE = _STK_OFFS_ ;size of local vars + ; + ;"restart" the stack defns, because we relocate esp to guarantee alignment + ; (i.e., these vars are NOT at fixed offsets from esp) +_STK_OFFS_ = 0 + ;----- + StackVar savRegs,8*4 ;pushad data + StackVar retAddr,4 ;return address + ;----- caller parameters + StackVar ctxPtr ,4 ;context ptr + StackVar blkPtr ,4 ;pointer to block data + StackVar blkCnt ,4 ;number of full blocks to process + StackVar bitAdd ,4 ;bit count to add to tweak + ;----- caller's stack frame +; +; Notes on stack frame setup: +; * the most used variable (except for Skein-256) is X_stk[], based at [esp+0] +; * the next most used is the key schedule words +; so ebp is "centered" there, allowing short offsets to the key/tweak +; schedule in 256/512-bit Skein cases, but not posible for Skein-1024 :-( +; * the Wcopy variables are infrequently accessed, and they have long +; offsets from both esp and ebp only in the 1024-bit case. +; * all other local vars and calling parameters can be accessed +; with short offsets, except in the 1024-bit case +; + pushad ;save all regs + mov ebx,esp ;keep ebx as pointer to caller parms + sub esp,LOCAL_SIZE ;make room for the locals + and esp,not 15 ;force alignment + mov edi,[ebx+ctxPtr ] ;edi --> Skein context + lea ebp,[esp+FRAME_OFFS] ;maximize use of short offsets from ebp + mov ecx,ptr32 [ebx+blkCnt] ;keep block cnt in ecx +; +endm ;Setup_Stack +; +FP_ equ ;keep as many short offsets as possible +SI_ equ ;keep as many short offsets as possible +ptr64 equ ;useful abbreviations +ptr32 equ +ptr08 equ +; +;---------------------------------------------------------------- +; +Reset_Stack macro procStart + mov esp,ebx ;get rid of locals (wipe??) + popad ;restore all regs + + ;display code size in bytes to stdout + irp _BCNT_,<%($+1-procStart)> ;account for return opcode +if _BCNT_ ge 10000 ;(align it all pretty) +%out procStart code size = _BCNT_ bytes +elseif _BCNT_ ge 1000 +%out procStart code size = _BCNT_ bytes +else +%out procStart code size = _BCNT_ bytes +endif + endm ;irp _BCNT_ + +endm ; Reset_Stack +; +;---------------------------------------------------------------- +; macros to help debug internals +; +if _SKEIN_DEBUG + extrn _Skein_Show_Block:near ;calls to C routines + extrn _Skein_Show_Round:near +; +SKEIN_RND_SPECIAL = 1000 +SKEIN_RND_KEY_INITIAL = SKEIN_RND_SPECIAL+0 +SKEIN_RND_KEY_INJECT = SKEIN_RND_SPECIAL+1 +SKEIN_RND_FEED_FWD = SKEIN_RND_SPECIAL+2 +; +Skein_Debug_Block macro BLK_BITS +; +;void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X, +; const u08b_t *blkPtr, const u64b_t *wPtr, +; const u64b_t *ksPtr,const u64b_t *tsPtr); +; + Put_XMM_&BLK_BITS + pushad ;save all regs + lea eax,[FP_+ksTwk+1] ;+1 = flag: "stride" size = 2 qwords + lea esi,[FP_+ksKey+1] + lea ecx,[esp+32+Wcopy] ;adjust offset by 32 for pushad + mov edx,[ebx+ctxPtr] ;ctx_hdr_ptr + lea edx,[edx+X_VARS] ;edx ==> cxt->X[] + push eax ;tsPtr + push esi ;ksPtr + push ecx ;wPtr + push ptr32 [ebx+blkPtr] ;blkPtr + push edx ;ctx->Xptr + push ptr32 [ebx+ctxPtr] ;ctx_hdr_ptr + mov eax,BLK_BITS + push eax ;bits + ifdef _MINGW_ + call _Skein_Show_Block-4 ;strange linkage?? + else + call _Skein_Show_Block + endif + add esp,7*4 ;discard parameter space on stack + popad ;restore regs +; + Get_XMM_&BLK_BITS +endm ;Skein_Debug_Block + +; +Skein_Debug_Round macro BLK_BITS,R,saveRegs +; +;void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,int r,const u64b_t *X); +; + ifnb + Put_XMM_&BLK_BITS + endif + pushad ;save all regs + if R ne SKEIN_RND_FEED_FWD + lea eax,[esp+32+X_stk] ;adjust offset by 32 for pushad + else + mov eax,[ebx+ctxPtr] + add eax,X_VARS + endif + push eax ;Xptr + if (SKEIN_ASM_UNROLL and BLK_BITS) or (R ge SKEIN_RND_SPECIAL) + mov eax,R + else + lea eax,[4*edx+1+(((R)-1) and 3)] ;compute round number using edx + endif + push eax ;round number + push ptr32 [ebx+ctxPtr] ;ctx_hdr_ptr + mov eax,BLK_BITS + push eax ;bits + ifdef _MINGW_ + call _Skein_Show_Round-4 ;strange linkage?? + else + call _Skein_Show_Round + endif + add esp,4*4 ;discard parameter space on stack + popad ;restore regs + + ifnb + Get_XMM_&BLK_BITS ;save internal vars for debug dump + endif +endm ;Skein_Debug_Round +endif ;ifdef SKEIN_DEBUG +; +;---------------------------------------------------------------- +; useful macros +_ldX macro xn + ifnb + mov64 xmm&xn,ptr64 [esp+X_stk+8*xn] + endif +endm + +_stX macro xn + ifnb + mov64 ptr64 [esp+X_stk+8*xn],xmm&xn + endif +endm +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 256 + public _Skein_256_Process_Block +; +; void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; +; Skein-256 round macros +; +R_256_OneRound macro _RR_,x0,x1,x2,x3,t0,t1 + irp _qq_,<%((_RR_) and 7)> ;figure out which rotation constants to use + if x0 eq 0 +_RC0_ = RC_256_&_qq_&_0 +_RC1_ = RC_256_&_qq_&_1 + else +_RC0_ = RC_256_&_qq_&_1 +_RC1_ = RC_256_&_qq_&_0 + endif + endm +; + paddq xmm&x0,xmm&x1 + mov64 xmm&t0,xmm&x1 + psllq xmm&x1, _RC0_ + psrlq xmm&t0,64-_RC0_ + xorpd xmm&x1,xmm&x0 + xorpd xmm&x1,xmm&t0 +; + paddq xmm&x2,xmm&x3 + mov64 xmm&t1,xmm&x3 + psllq xmm&x3, _RC1_ + psrlq xmm&t1,64-_RC1_ + xorpd xmm&x3,xmm&x2 + xorpd xmm&x3,xmm&t1 + if _SKEIN_DEBUG + Skein_Debug_Round 256,%(_RR_+1),saveRegs + endif +endm ;R_256_OneRound +; +R_256_FourRounds macro _RN_ + R_256_OneRound (_RN_+0),0,1,2,3,4,5 + R_256_OneRound (_RN_+1),2,1,0,3,4,5 + + R_256_OneRound (_RN_+2),0,1,2,3,4,5 + R_256_OneRound (_RN_+3),2,1,0,3,4,5 + + ;inject key schedule + inc edx ;bump round number + movd xmm4,edx + if _UNROLL_CNT eq (ROUNDS_256/8) + ;fully unrolled version +_RK_ = ((_RN_)/4) ;key injection counter + paddq xmm0,[FP_+ksKey+16*((_RK_+1) mod 5)] + paddq xmm1,[FP_+ksKey+16*((_RK_+2) mod 5)] + paddq xmm2,[FP_+ksKey+16*((_RK_+3) mod 5)] + paddq xmm3,[FP_+ksKey+16*((_RK_+4) mod 5)] + paddq xmm1,[FP_+ksTwk+16*((_RK_+1) mod 3)] + paddq xmm2,[FP_+ksTwk+16*((_RK_+2) mod 3)] + paddq xmm3,xmm4 + else ;looping version + paddq xmm0,[SI_+ksKey+16*1] + paddq xmm1,[SI_+ksKey+16*2] + paddq xmm2,[SI_+ksKey+16*3] + paddq xmm3,[SI_+ksKey+16*4] + paddq xmm1,[SI_+ksTwk+16*1] + paddq xmm2,[SI_+ksTwk+16*2] + paddq xmm3,xmm4 +; + mov64 xmm4,;first, "rotate" key schedule on the stack + mov64 xmm5,; (for next time through) + mov64 ,xmm4 + mov64 ,xmm5 + add esi,16 ;bump rolling pointer + endif + if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_KEY_INJECT,saveRegs + endif +endm ;R256_FourRounds +; +if _SKEIN_DEBUG ; macros for saving/restoring X_stk for debug routines +Put_XMM_256 equ +Get_XMM_256 equ + +_Put_XMM_256: + irp _NN_,<0,1,2,3> + mov64 ptr64 [esp+X_stk+4+_NN_*8],xmm&_NN_ + endm + ret +; +_Get_XMM_256: + irp _NN_,<0,1,2,3> + mov64 xmm&_NN_,ptr64 [esp+X_stk+4+_NN_*8] + endm + ret +endif +; +;;;;;;;;;;;;;;;;; +; +; code +; +_Skein_256_Process_Block proc near + WCNT = 4 ;WCNT=4 for Skein-256 + Setup_Stack WCNT,ROUNDS_256 + ; main hash loop for Skein_256 +Skein_256_block_loop: + movd xmm4,ptr32 [ebx+bitAdd] + mov64 xmm5,ptr64 [edi+TWEAK+0] + mov64 xmm6,ptr64 [edi+TWEAK+8] + paddq xmm5,xmm4 ;bump T0 by the bitAdd parameter + mov64 ptr64 [edi+TWEAK],xmm5 ;save updated tweak value T0 (for next time) + movapd xmm7,xmm6 + xorpd xmm7,xmm5 ;compute overall tweak parity + movdqa [FP_+ksTwk ],xmm5 ;save the expanded tweak schedule on the stack + movdqa [FP_+ksTwk+16],xmm6 + movdqa [FP_+ksTwk+32],xmm7 + + mov esi,[ebx+blkPtr] ;esi --> input block + mov eax,KW_PARITY_LO ;init key schedule parity accumulator + mov edx,KW_PARITY_HI + movd xmm4,eax + movd xmm0,edx + unpcklps xmm4,xmm0 ;pack two 32-bit words into xmm4 +; + irp _NN_,<0,1,2,3> ;copy in the chaining vars + mov64 xmm&_NN_,ptr64 [edi+X_VARS+8*_NN_] + xorpd xmm4,xmm&_NN_ ;update overall parity + movdqa [FP_+ksKey+16*_NN_],xmm&_NN_ + endm + movdqa [FP_+ksKey+16*WCNT],xmm4;save overall parity at the end of the array +; + paddq xmm1,xmm5 ;inject the initial tweak words + paddq xmm2,xmm6 +; + irp _NN_,<0,1,2,3> ;perform the initial key injection + mov64 xmm4,ptr64 [esi+8*_NN_] ;and save a copy of the input block on stack + mov64 ptr64 [esp+8*_NN_+Wcopy],xmm4 + paddq xmm&_NN_,xmm4 + endm +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block 256 + Skein_Debug_Round 256,SKEIN_RND_KEY_INITIAL,saveRegs +endif + add esi, WCNT*8 ;skip to the next block + mov [ebx+blkPtr ],esi ;save the updated block pointer + ; + ; now the key schedule is computed. Start the rounds + ; + xor edx,edx ;edx = iteration count +if SKEIN_ASM_UNROLL and 256 +_UNROLL_CNT = ROUNDS_256/8 ;fully unrolled +else +_UNROLL_CNT = SKEIN_UNROLL_256 ;partial unroll count + if ((ROUNDS_256/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_256" ;sanity check + endif + mov esi,ebp ;use this as "rolling" pointer into ksTwk/ksKey +Skein_256_round_loop: ; (since there's no 16* scaled address mode) +endif +; +_Rbase_ = 0 +rept _UNROLL_CNT*2 ; here with X[0..3] in XMM0..XMM3 + R_256_FourRounds _Rbase_ +_Rbase_ = _Rbase_+4 +endm ;rept _UNROLL_CNT*2 +; + if _UNROLL_CNT ne (ROUNDS_256/8) + cmp edx,2*(ROUNDS_256/8) + jb Skein_256_round_loop + endif + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..3} + irp _NN_,<0,1,2,3> + mov64 xmm4,ptr64 [esp+Wcopy+8*_NN_] + xorpd xmm&_NN_,xmm4 + mov64 ptr64 [edi+X_VARS+8*_NN_],xmm&_NN_ + endm + and ptr08 [edi +TWEAK +15],FIRST_MASK8 +if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_FEED_FWD,saveRegs +endif + ; go back for more blocks, if needed + dec ecx + jnz Skein_256_block_loop + + Reset_Stack _Skein_256_Process_Block + ret +; +_Skein_256_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein_256_Process_Block_CodeSize +_Skein_256_Process_Block_CodeSize proc + mov eax,_Skein_256_Process_Block_CodeSize - _Skein_256_Process_Block + ret +_Skein_256_Process_Block_CodeSize endp +; + public _Skein_256_Unroll_Cnt +_Skein_256_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_256/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein_256_Unroll_Cnt endp +endif +endif ;_USE_ASM_ and 256 +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 512 + public _Skein_512_Process_Block +; +; void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +;;;;;;;;;;;;;;;;; +; MACRO: one round +; +R_512_Round macro _RR_, a0,a1,Ra, b0,b1,Rb, c0,c1,Rc, d0,d1,Rd +irp _nr_,<%((_RR_) and 7)> +_Ra_ = RC_512_&_nr_&_&Ra +_Rb_ = RC_512_&_nr_&_&Rb +_Rc_ = RC_512_&_nr_&_&Rc +_Rd_ = RC_512_&_nr_&_&Rd +endm + paddq xmm&a0,xmm&a1 + _stX c0 + mov64 xmm&c0,xmm&a1 + psllq xmm&a1, _Ra_ + psrlq xmm&c0,64-_Ra_ + xorpd xmm&a1,xmm&c0 + xorpd xmm&a1,xmm&a0 + + paddq xmm&b0,xmm&b1 + _stX a0 + mov64 xmm&a0,xmm&b1 + psllq xmm&b1, _Rb_ + psrlq xmm&a0,64-_Rb_ + xorpd xmm&b1,xmm&b0 + _ldX c0 + xorpd xmm&b1,xmm&a0 + + paddq xmm&c0,xmm&c1 + mov64 xmm&a0,xmm&c1 + psllq xmm&c1, _Rc_ + psrlq xmm&a0,64-_Rc_ + xorpd xmm&c1,xmm&c0 + xorpd xmm&c1,xmm&a0 + + paddq xmm&d0,xmm&d1 + mov64 xmm&a0,xmm&d1 + psllq xmm&d1, _Rd_ + psrlq xmm&a0,64-_Rd_ + xorpd xmm&d1,xmm&a0 + _ldX a0 + xorpd xmm&d1,xmm&d0 + if _SKEIN_DEBUG + Skein_Debug_Round 512,%(_RR_+1),saveRegs + endif +endm +; +; MACRO: four rounds +R_512_FourRounds macro _RN_ + R_512_Round (_RN_) , 0,1,0, 2,3,1, 4,5,2, 6,7,3 + R_512_Round (_RN_)+1, 2,1,0, 4,7,1, 6,5,2, 0,3,3 + R_512_Round (_RN_)+2, 4,1,0, 6,3,1, 0,5,2, 2,7,3 + R_512_Round (_RN_)+3, 6,1,0, 0,7,1, 2,5,2, 4,3,3 + + ;inject key schedule + irp _NN_,<0,1,2,3,4,5,6,7> + if _UNROLL_CNT eq (ROUNDS_512/8) + paddq xmm&_NN_,[FP_+ksKey+16*((((_RN_)/4)+(_NN_)+1) mod 9)] + else + paddq xmm&_NN_,[SI_+ksKey+16*((_NN_)+1)] + endif + endm + _stX 0 ;free up a register + inc edx ;bump round counter + movd xmm0,edx ;inject the tweak + if _UNROLL_CNT eq (ROUNDS_512/8) + paddq xmm5,[FP_+ksTwk+16*(((_RN_)+1) mod 3)] + paddq xmm6,[FP_+ksTwk+16*(((_RN_)+2) mod 3)] + paddq xmm7,xmm0 + else ;looping version + paddq xmm5,[SI_+ksTwk+16*1] + paddq xmm6,[SI_+ksTwk+16*2] + paddq xmm7,xmm0 +; + mov64 xmm0,;first, "rotate" key schedule on the stack + mov64 ,xmm0 + mov64 xmm0,; (for next time through) + mov64 ,xmm0 + add esi,16 ;bump rolling pointer + endif + _ldX 0 ;restore X0 + if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_KEY_INJECT,saveRegs + endif +endm ;R_512_FourRounds +;;;;;;;;;;;;;;;;; +if _SKEIN_DEBUG ; macros for saving/restoring X_stk for debug routines +Put_XMM_512 equ +Get_XMM_512 equ + +_Put_XMM_512: + irp _NN_,<0,1,2,3,4,5,6,7> + mov64 ptr64 [esp+X_stk+4+_NN_*8],xmm&_NN_ + endm + ret +; +_Get_XMM_512: + irp _NN_,<0,1,2,3,4,5,6,7> + mov64 xmm&_NN_,ptr64 [esp+X_stk+4+_NN_*8] + endm + ret +endif +; +;;;;;;;;;;;;;;;;; +; code +; +_Skein_512_Process_Block proc near + WCNT = 8 ;WCNT=8 for Skein-512 + Setup_Stack WCNT,ROUNDS_512 + ; main hash loop for Skein_512 +Skein_512_block_loop: + movd xmm0,ptr32 [ebx+bitAdd] + mov64 xmm1,ptr64 [edi+TWEAK+0] + mov64 xmm2,ptr64 [edi+TWEAK+8] + paddq xmm1,xmm0 ;bump T0 by the bitAdd parameter + mov64 ptr64 [edi+TWEAK],xmm1 ;save updated tweak value T0 (for next time) + mov64 xmm0,xmm2 + xorpd xmm0,xmm1 ;compute overall tweak parity + movdqa [FP_+ksTwk ],xmm1 ;save the expanded tweak schedule on the stack + movdqa [FP_+ksTwk+16*1],xmm2 + movdqa [FP_+ksTwk+16*2],xmm0 + + mov esi,[ebx+blkPtr] ;esi --> input block + mov eax,KW_PARITY_LO ;init key schedule parity accumulator + mov edx,KW_PARITY_HI + movd xmm0,eax + movd xmm7,edx + unpcklps xmm0,xmm7 ;pack two 32-bit words into xmm0 +; + irp _NN_,<7,6,5,4,3,2,1> ;copy in the chaining vars (skip #0 for now) + mov64 xmm&_NN_,ptr64 [edi+X_VARS+8*_NN_] + xorpd xmm0,xmm&_NN_ ;update overall parity + movdqa [FP_+ksKey+16*_NN_],xmm&_NN_ + if _NN_ eq 5 + paddq xmm5,xmm1 ;inject the initial tweak words + paddq xmm6,xmm2 ; (before they get trashed in xmm1/2) + endif + endm + mov64 xmm4,ptr64 [edi+X_VARS] ;handle #0 now + xorpd xmm0,xmm4 ;update overall parity + movdqa [FP_+ksKey+16* 0 ],xmm4;save the key value in slot #0 + movdqa [FP_+ksKey+16*WCNT],xmm0;save overall parity at the end of the array +; + mov64 xmm0,xmm4 + irp _NN_,<7,6,5, 4,3,2,1,0> ;perform the initial key injection (except #4) + mov64 xmm4,ptr64 [esi+ 8*_NN_];and save a copy of the input block on stack + mov64 ptr64 [esp+ 8*_NN_+Wcopy],xmm4 + paddq xmm&_NN_,xmm4 + endm + mov64 xmm4,ptr64 [esi+ 8*4] ;get input block word #4 + mov64 ptr64 [esp+ 8*4+Wcopy],xmm4 + paddq xmm4,[FP_+ksKey+16*4] ;inject the initial key +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block 512 + Skein_Debug_Round 512,SKEIN_RND_KEY_INITIAL,saveRegs +endif + add esi, WCNT*8 ;skip to the next block + mov [ebx+blkPtr],esi ;save the updated block pointer + ; + ; now the key schedule is computed. Start the rounds + ; + xor edx,edx ;edx = round counter +if SKEIN_ASM_UNROLL and 512 +_UNROLL_CNT = ROUNDS_512/8 +else +_UNROLL_CNT = SKEIN_UNROLL_512 + if ((ROUNDS_512/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_512" + endif + mov esi,ebp ;use this as "rolling" pointer into ksTwk/ksKey +Skein_512_round_loop: ; (since there's no 16* scaled address mode) +endif +_Rbase_ = 0 +rept _UNROLL_CNT*2 + R_512_FourRounds _Rbase_ +_Rbase_ = _Rbase_+4 +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 512) eq 0 + cmp edx,2*(ROUNDS_512/8) + jb Skein_512_round_loop +endif + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} + and ptr08 [edi +TWEAK +15],FIRST_MASK8 +irp _NN_,<0,2,4,6> ;do the aligned ones first + xorpd xmm&_NN_,[esp+Wcopy+8*_NN_] + mov64 ptr64 [edi+X_VARS+8*_NN_],xmm&_NN_ +endm +irp _NN_,<1,3,5,7> ;now we have some register space available + mov64 xmm0,ptr64 [esp+Wcopy+8*_NN_] + xorpd xmm&_NN_,xmm0 + mov64 ptr64 [edi+X_VARS+8*_NN_],xmm&_NN_ +endm +if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_FEED_FWD +endif + ; go back for more blocks, if needed + dec ecx + jnz Skein_512_block_loop + + Reset_Stack _Skein_512_Process_Block + ret +_Skein_512_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein_512_Process_Block_CodeSize +_Skein_512_Process_Block_CodeSize proc + mov eax,_Skein_512_Process_Block_CodeSize - _Skein_512_Process_Block + ret +_Skein_512_Process_Block_CodeSize endp +; + public _Skein_512_Unroll_Cnt +_Skein_512_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_512/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein_512_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 512 +; +;---------------------------------------------------------------- +; +if _USE_ASM_ and 1024 + public _Skein1024_Process_Block +; +; void Skein_1024_Process_Block(Skein_1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd); +; +R_1024_REGS equ (5) ;keep this many block variables in registers +; +;;;;;;;;;;;;;;;; +if _SKEIN_DEBUG ; macros for saving/restoring X_stk for debug routines +Put_XMM_1024 equ +Get_XMM_1024 equ + +_Put_XMM_1024: +_NN_ = 0 + rept R_1024_REGS + irp _rr_,<%(_NN_)> + mov64 ptr64 [esp+X_stk+4+8*_NN_],xmm&_rr_ + endm +_NN_ = _NN_+1 + endm + ret +; +_Get_XMM_1024: +_NN_ = 0 + rept R_1024_REGS + irp _rr_,<%(_NN_)> + mov64 xmm&_rr_,ptr64 [esp+X_stk+4+8*_NN_] + endm +_NN_ = _NN_+1 + endm + ret +endif +; +;;;;;;;;;;;;;;;;; +; MACRO: one mix step +MixStep_1024 macro x0,x1,rotIdx0,rotIdx1,_debug_ +_r0_ = x0 ;default, if already loaded +_r1_ = x1 + ; load the regs (if necessary) + if (x0 ge R_1024_REGS) +_r0_ = 5 + mov64 xmm5,ptr64 [esp+X_stk+8*(x0)] + endif + if (x1 ge R_1024_REGS) +_r1_ = 6 + mov64 xmm6,ptr64 [esp+X_stk+8*(x1)] + endif + ; do the mix + irp _rx_,<%((rotIdx0) and 7)> +_Rc_ = RC_1024_&_rx_&_&rotIdx1 ;rotation constant + endm + irp _x0_,<%_r0_> + irp _x1_,<%_r1_> + paddq xmm&_x0_,xmm&_x1_ + mov64 xmm7 ,xmm&_x1_ + psllq xmm&_x1_, _Rc_ + psrlq xmm7 ,64-_Rc_ + xorpd xmm&_x1_,xmm&_x0_ + xorpd xmm&_x1_,xmm7 + endm + endm + ; save the regs (if necessary) + if (x0 ge R_1024_REGS) + mov64 ptr64 [esp+X_stk+8*(x0)],xmm5 + endif + if (x1 ge R_1024_REGS) + mov64 ptr64 [esp+X_stk+8*(x1)],xmm6 + endif + ; debug output + if _SKEIN_DEBUG and (0 ne (_debug_ + 0)) + Skein_Debug_Round 1024,%((RotIdx0)+1),saveRegs + endif +endm +;;;;;;;;;;;;;;;;; +; MACRO: four rounds +; +R_1024_FourRounds macro _RR_ + ;--------- round _RR_ + MixStep_1024 0, 1,%((_RR_)+0),0 + MixStep_1024 2, 3,%((_RR_)+0),1 + MixStep_1024 4, 5,%((_RR_)+0),2 + MixStep_1024 6, 7,%((_RR_)+0),3 + MixStep_1024 8, 9,%((_RR_)+0),4 + MixStep_1024 10,11,%((_RR_)+0),5 + MixStep_1024 12,13,%((_RR_)+0),6 + MixStep_1024 14,15,%((_RR_)+0),7,1 + ;--------- round _RR_+1 + MixStep_1024 0, 9,%((_RR_)+1),0 + MixStep_1024 2,13,%((_RR_)+1),1 + MixStep_1024 6,11,%((_RR_)+1),2 + MixStep_1024 4,15,%((_RR_)+1),3 + MixStep_1024 10, 7,%((_RR_)+1),4 + MixStep_1024 12, 3,%((_RR_)+1),5 + MixStep_1024 14, 5,%((_RR_)+1),6 + MixStep_1024 8, 1,%((_RR_)+1),7,1 + ;--------- round _RR_+2 + MixStep_1024 0, 7,%((_RR_)+2),0 + MixStep_1024 2, 5,%((_RR_)+2),1 + MixStep_1024 4, 3,%((_RR_)+2),2 + MixStep_1024 6, 1,%((_RR_)+2),3 + MixStep_1024 12,15,%((_RR_)+2),4 + MixStep_1024 14,13,%((_RR_)+2),5 + MixStep_1024 8,11,%((_RR_)+2),6 + MixStep_1024 10, 9,%((_RR_)+2),7,1 + ;--------- round _RR_+3 + MixStep_1024 0,15,%((_RR_)+3),0 + MixStep_1024 2,11,%((_RR_)+3),1 + MixStep_1024 6,13,%((_RR_)+3),2 + MixStep_1024 4, 9,%((_RR_)+3),3 + MixStep_1024 14, 1,%((_RR_)+3),4 + MixStep_1024 8, 5,%((_RR_)+3),5 + MixStep_1024 10, 3,%((_RR_)+3),6 + MixStep_1024 12, 7,%((_RR_)+3),7,1 + + inc edx ;edx = round number + movd xmm7,edx + ;inject the key +irp _NN_,<15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0> + if _UNROLL_CNT ne (ROUNDS_1024/8) + if _NN_ lt R_1024_REGS + paddq xmm&_NN_,ptr64 [SI_+ksKey+16*_NN_+16] + else + mov64 xmm6 ,ptr64 [esp+X_stk+ 8*_NN_] + if _NN_ eq 15 + paddq xmm6,xmm7 + elseif _NN_ eq 14 + paddq xmm6,ptr64 [SI_+ksTwk+16*2] + elseif _NN_ eq 13 + paddq xmm6,ptr64 [SI_+ksTwk+16*1] + endif + paddq xmm6 ,ptr64 [SI_+ksKey+16*_NN_+16] + mov64 ptr64 [esp+X_stk+ 8*_NN_],xmm6 + endif + else + if _NN_ lt R_1024_REGS + paddq xmm&_NN_,ptr64 [FP_+ksKey+16*(((_Rbase_/4)+(_NN_)+1) mod 17)] + else + mov64 xmm6,ptr64 [esp+X_stk+ 8*_NN_] + paddq xmm6,ptr64 [FP_+ksKey+16*(((_Rbase_/4)+(_NN_)+1) mod 17)] + if _NN_ eq 15 + paddq xmm6,xmm7 + elseif _NN_ eq 14 + paddq xmm6,ptr64 [FP_+ksTwk+16*(((_Rbase_/4)+2) mod 3)] + elseif _NN_ eq 13 + paddq xmm6,ptr64 [FP_+ksTwk+16*(((_Rbase_/4)+1) mod 3)] + endif + mov64 ptr64 [esp+X_stk+ 8*_NN_],xmm6 + endif + endif +endm +if _UNROLL_CNT ne (ROUNDS_1024/8) ;rotate the key schedule on the stack + mov64 xmm6,ptr64 [SI_+ksKey] + mov64 xmm7,ptr64 [SI_+ksTwk] + mov64 ptr64 [SI_+ksKey+16*(WCNT+1)],xmm6 + mov64 ptr64 [SI_+ksTwk+16* 3 ],xmm7 + add esi,16 ;bump rolling pointer +endif +if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_KEY_INJECT ,saveRegs +endif +endm ;R_1024_FourRounds +;;;;;;;;;;;;;;;; +; code +; +_Skein1024_Process_Block proc near +; + WCNT = 16 ;WCNT=16 for Skein-1024 + Setup_Stack WCNT,ROUNDS_1024 + add edi,80h ;bias the edi ctxt offsets to keep them all short +ctx equ ;offset alias + ; main hash loop for Skein1024 +Skein1024_block_loop: + movd xmm0,ptr32 [ebx+bitAdd] + mov64 xmm1,ptr64 [ctx+TWEAK+0] + mov64 xmm2,ptr64 [ctx+TWEAK+8] + paddq xmm1,xmm0 ;bump T0 by the bitAdd parameter + mov64 ptr64 [ctx+TWEAK],xmm1 ;save updated tweak value T0 (for next time) + mov64 xmm0,xmm2 + xorpd xmm0,xmm1 ;compute overall tweak parity + movdqa [FP_+ksTwk ],xmm1 ;save the expanded tweak schedule on the stack + movdqa [FP_+ksTwk+16],xmm2 + movdqa [FP_+ksTwk+32],xmm0 + + mov esi,[ebx+blkPtr] ;esi --> input block + mov eax,KW_PARITY_LO ;init key schedule parity accumulator + mov edx,KW_PARITY_HI + movd xmm7,eax + movd xmm6,edx + unpcklps xmm7,xmm6 ;pack two 32-bit words into xmm7 +; + lea eax,[esp+80h] ;use short offsets for Wcopy, X_stk writes below +SP_ equ ;[eax+OFFS] mode is one byte shorter than [esp+OFFS] +irp _NN_,<15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0> + mov64 xmm6,ptr64 [ctx+X_VARS+8*_NN_] + xorpd xmm7,xmm6 ;update overall parity + movdqa [FP_+ksKey+16*_NN_],xmm6;save the key schedule on the stack + if _NN_ lt R_1024_REGS + _rr_ = _NN_ + else + _rr_ = R_1024_REGS + endif + irp _rn_,<%(_rr_)> + mov64 xmm&_rn_,ptr64 [esi+ 8*_NN_];save copy of the input block on stack + mov64 ptr64 [SP_+ Wcopy + 8*_NN_],xmm&_rn_ ;(for feedforward later) + paddq xmm&_rn_,xmm6 ;inject the key into the block + if _NN_ eq 13 + paddq xmm&_rn_,xmm1 ;inject the initial tweak words + elseif _NN_ eq 14 + paddq xmm&_rn_,xmm2 + endif + if _NN_ ge R_1024_REGS ;only save X[5..15] on stack, leave X[0..4] in regs + mov64 ptr64 [SP_+X_stk+8*_NN_],xmm&_rn_ + endif + endm +endm + movdqa [FP_+ksKey+16*WCNT],xmm7;save overall key parity at the end of the array +; +if _SKEIN_DEBUG ;debug dump of state at this point + Skein_Debug_Block 1024 + Skein_Debug_Round 1024,SKEIN_RND_KEY_INITIAL,saveRegs +endif + add esi, WCNT*8 ;skip to the next block + mov [ebx+blkPtr],esi ;save the updated block pointer + ; + ; now the key schedule is computed. Start the rounds + ; + xor edx,edx ;edx = round counter +if SKEIN_ASM_UNROLL and 1024 +_UNROLL_CNT = ROUNDS_1024/8 +else +_UNROLL_CNT = SKEIN_UNROLL_1024 + if ((ROUNDS_1024/8) mod _UNROLL_CNT) + .err "Invalid SKEIN_UNROLL_1024" + endif + mov esi,ebp ;use this as "rolling" pointer into ksTwk/ksKey +Skein_1024_round_loop: +endif +; +_Rbase_ = 0 +rept _UNROLL_CNT*2 + R_1024_FourRounds %_Rbase_ +_Rbase_ = _Rbase_+4 +endm ;rept _UNROLL_CNT +; +if (SKEIN_ASM_UNROLL and 1024) eq 0 + cmp edx,2*(ROUNDS_1024/8) + jb Skein_1024_round_loop +endif + and ptr08 [ctx +TWEAK +15],FIRST_MASK8 ;clear tweak bit for next time thru + ;---------------------------- + ; feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..15} + lea eax,[esp+80h] ;allow short offsets to X_stk and Wcopy +irp _NN_,<0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15> + if _NN_ lt R_1024_REGS + if _NN_ and 1 ;already in regs: no load needed + mov64 xmm7 ,ptr64 [SP_+ Wcopy + 8*_NN_] ;unaligned + xorpd xmm&_NN_,xmm7 + else + xorpd xmm&_NN_, [SP_+ Wcopy + 8*_NN_] ;aligned + endif + mov64 ptr64 [ctx+ X_vars+ 8*_NN_],xmm&_NN_ + else + mov64 xmm7,ptr64 [SP_+ X_stk + 8*_NN_] ;load X value from stack + if _NN_ and 1 + mov64 xmm6,ptr64 [SP_+ Wcopy + 8*_NN_] ;unaligned + xorpd xmm7,xmm6 + else + xorpd xmm7, [SP_+ Wcopy + 8*_NN_] ;aligned + endif + mov64 ptr64 [ctx+ X_vars+ 8*_NN_],xmm7 + endif +endm +if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_FEED_FWD ;no need to save regs on stack here +endif + ; go back for more blocks, if needed + dec ecx + jnz Skein1024_block_loop + + Reset_Stack _Skein1024_Process_Block + ret +_Skein1024_Process_Block endp +; +ifdef _SKEIN_CODE_SIZE + public _Skein1024_Process_Block_CodeSize +_Skein1024_Process_Block_CodeSize proc + mov eax,_Skein1024_Process_Block_CodeSize - _Skein1024_Process_Block + ret +_Skein1024_Process_Block_CodeSize endp +; + public _Skein1024_Unroll_Cnt +_Skein1024_Unroll_Cnt proc + if _UNROLL_CNT ne ROUNDS_1024/8 + mov eax,_UNROLL_CNT + else + xor eax,eax + endif + ret +_Skein1024_Unroll_Cnt endp +endif +; +endif ; _USE_ASM_ and 1024 +;---------------------------------------------------------------- + end diff --git a/Additional_Implementations/skein_block_xmm32.s b/Additional_Implementations/skein_block_xmm32.s new file mode 100644 index 000000000000..fa10bd2b98c1 --- /dev/null +++ b/Additional_Implementations/skein_block_xmm32.s @@ -0,0 +1,1110 @@ +# +#---------------------------------------------------------------- +# 32-bit x86 assembler code for Skein block functions using XMM registers +# +# Author: Doug Whiting, Hifn/Exar +# +# This code is released to the public domain. +#---------------------------------------------------------------- +# + .text + .altmacro #use advanced macro features + .psize 0,128 #list file has no page boundaries +# +_MASK_ALL_ = (256+512+1024) #all three algorithm bits +SAVE_REGS = 1 +# +################# +.ifndef SKEIN_USE_ASM +_USE_ASM_ = _MASK_ALL_ +.elseif SKEIN_USE_ASM & _MASK_ALL_ +_USE_ASM_ = SKEIN_USE_ASM +.else +_USE_ASM_ = _MASK_ALL_ +.endif +# +################# +.ifndef SKEIN_LOOP +_SKEIN_LOOP = 002 #default is all fully unrolled, except Skein1024 +.else +_SKEIN_LOOP = SKEIN_LOOP +.endif +#-------------- +# the unroll counts (0 --> fully unrolled) +SKEIN_UNROLL_256 = (_SKEIN_LOOP / 100) % 10 +SKEIN_UNROLL_512 = (_SKEIN_LOOP / 10) % 10 +SKEIN_UNROLL_1024 = (_SKEIN_LOOP ) % 10 +# +SKEIN_ASM_UNROLL = 0 + .irp _NN_,256,512,1024 + .if (SKEIN_UNROLL_\_NN_) == 0 +SKEIN_ASM_UNROLL = SKEIN_ASM_UNROLL + \_NN_ + .endif + .endr +# +################# +# +.ifndef SKEIN_ROUNDS +ROUNDS_256 = 72 +ROUNDS_512 = 72 +ROUNDS_1024 = 80 +.else +ROUNDS_256 = 8*((((SKEIN_ROUNDS / 100) + 5) % 10) + 5) +ROUNDS_512 = 8*((((SKEIN_ROUNDS / 10) + 5) % 10) + 5) +ROUNDS_1024 = 8*((((SKEIN_ROUNDS ) + 5) % 10) + 5) +.irp _NN_,256,512,1024 + .if _USE_ASM_ && \_NN_ + .irp _RR_,%(ROUNDS_\_NN_) + .if \_NN_ < 1024 +.print "+++ SKEIN_ROUNDS_\_NN_ = \_RR_" + .else +.print "+++ SKEIN_ROUNDS_\_NN_ = \_RR_" + .endif + .endr + .endif +.endr +.endif +################# +# +.ifdef SKEIN_CODE_SIZE +_SKEIN_CODE_SIZE = (1) +.else +.ifdef SKEIN_PERF #use code size if SKEIN_PERF is defined +_SKEIN_CODE_SIZE = (1) +.endif +.endif +# +################# +# +.ifndef SKEIN_DEBUG +_SKEIN_DEBUG = 0 +.else +_SKEIN_DEBUG = 1 +.endif +################# +# +# define offsets of fields in hash context structure +# +HASH_BITS = 0 ## bits of hash output +BCNT = 4 + HASH_BITS #number of bytes in BUFFER[] +TWEAK = 4 + BCNT #tweak values[0..1] +X_VARS = 16 + TWEAK #chaining vars +# +#(Note: buffer[] in context structure is NOT needed here :-) +# +KW_PARITY_LO= 0xA9FC1A22 #overall parity of key schedule words (hi32/lo32) +KW_PARITY_HI= 0x1BD11BDA +FIRST_MASK8 = ~ (1 << 6) #FIRST block flag bit +# +# rotation constants for Skein +# +RC_256_0_0 = 14 +RC_256_0_1 = 16 + +RC_256_1_0 = 52 +RC_256_1_1 = 57 + +RC_256_2_0 = 23 +RC_256_2_1 = 40 + +RC_256_3_0 = 5 +RC_256_3_1 = 37 + +RC_256_4_0 = 25 +RC_256_4_1 = 33 + +RC_256_5_0 = 46 +RC_256_5_1 = 12 + +RC_256_6_0 = 58 +RC_256_6_1 = 22 + +RC_256_7_0 = 32 +RC_256_7_1 = 32 + +RC_512_0_0 = 46 +RC_512_0_1 = 36 +RC_512_0_2 = 19 +RC_512_0_3 = 37 + +RC_512_1_0 = 33 +RC_512_1_1 = 27 +RC_512_1_2 = 14 +RC_512_1_3 = 42 + +RC_512_2_0 = 17 +RC_512_2_1 = 49 +RC_512_2_2 = 36 +RC_512_2_3 = 39 + +RC_512_3_0 = 44 +RC_512_3_1 = 9 +RC_512_3_2 = 54 +RC_512_3_3 = 56 + +RC_512_4_0 = 39 +RC_512_4_1 = 30 +RC_512_4_2 = 34 +RC_512_4_3 = 24 + +RC_512_5_0 = 13 +RC_512_5_1 = 50 +RC_512_5_2 = 10 +RC_512_5_3 = 17 + +RC_512_6_0 = 25 +RC_512_6_1 = 29 +RC_512_6_2 = 39 +RC_512_6_3 = 43 + +RC_512_7_0 = 8 +RC_512_7_1 = 35 +RC_512_7_2 = 56 +RC_512_7_3 = 22 + +RC_1024_0_0 = 24 +RC_1024_0_1 = 13 +RC_1024_0_2 = 8 +RC_1024_0_3 = 47 +RC_1024_0_4 = 8 +RC_1024_0_5 = 17 +RC_1024_0_6 = 22 +RC_1024_0_7 = 37 + +RC_1024_1_0 = 38 +RC_1024_1_1 = 19 +RC_1024_1_2 = 10 +RC_1024_1_3 = 55 +RC_1024_1_4 = 49 +RC_1024_1_5 = 18 +RC_1024_1_6 = 23 +RC_1024_1_7 = 52 + +RC_1024_2_0 = 33 +RC_1024_2_1 = 4 +RC_1024_2_2 = 51 +RC_1024_2_3 = 13 +RC_1024_2_4 = 34 +RC_1024_2_5 = 41 +RC_1024_2_6 = 59 +RC_1024_2_7 = 17 + +RC_1024_3_0 = 5 +RC_1024_3_1 = 20 +RC_1024_3_2 = 48 +RC_1024_3_3 = 41 +RC_1024_3_4 = 47 +RC_1024_3_5 = 28 +RC_1024_3_6 = 16 +RC_1024_3_7 = 25 + +RC_1024_4_0 = 41 +RC_1024_4_1 = 9 +RC_1024_4_2 = 37 +RC_1024_4_3 = 31 +RC_1024_4_4 = 12 +RC_1024_4_5 = 47 +RC_1024_4_6 = 44 +RC_1024_4_7 = 30 + +RC_1024_5_0 = 16 +RC_1024_5_1 = 34 +RC_1024_5_2 = 56 +RC_1024_5_3 = 51 +RC_1024_5_4 = 4 +RC_1024_5_5 = 53 +RC_1024_5_6 = 42 +RC_1024_5_7 = 41 + +RC_1024_6_0 = 31 +RC_1024_6_1 = 44 +RC_1024_6_2 = 47 +RC_1024_6_3 = 46 +RC_1024_6_4 = 19 +RC_1024_6_5 = 42 +RC_1024_6_6 = 44 +RC_1024_6_7 = 25 + +RC_1024_7_0 = 9 +RC_1024_7_1 = 48 +RC_1024_7_2 = 35 +RC_1024_7_3 = 52 +RC_1024_7_4 = 23 +RC_1024_7_5 = 31 +RC_1024_7_6 = 37 +RC_1024_7_7 = 20 +# +#---------------------------------------------------------------- +# declare allocated space on the stack +.macro StackVar localName,localSize +\localName = _STK_OFFS_ +_STK_OFFS_ = _STK_OFFS_+(\localSize) +.endm #StackVar +# +#---------------------------------------------------------------- +# +# MACRO: Configure stack frame, allocate local vars +# +.macro Setup_Stack WCNT,RND_CNT +_STK_OFFS_ = 0 #starting offset from esp, forced on 16-byte alignment + #----- local variables #<-- esp + StackVar X_stk , 8*(WCNT) #local context vars + StackVar Wcopy , 8*(WCNT) #copy of input block + StackVar ksTwk ,16*3 #key schedule: tweak words + StackVar ksKey ,16*(WCNT)+16#key schedule: key words +FRAME_OFFS = ksTwk+128 #<-- ebp +F_O = FRAME_OFFS #syntactic shorthand + .if (SKEIN_ASM_UNROLL && (WCNT*64)) == 0 + StackVar ksRot,16*(RND_CNT/4)#leave space for ks "rotation" to happen + .endif +LOCAL_SIZE = _STK_OFFS_ #size of local vars + # + #"restart" the stack defns, because we relocate esp to guarantee alignment + # (i.e., these vars are NOT at fixed offsets from esp) +_STK_OFFS_ = 0 + #----- + StackVar savRegs,8*4 #pushad data + StackVar retAddr,4 #return address + #----- caller parameters + StackVar ctxPtr ,4 #context ptr + StackVar blkPtr ,4 #pointer to block data + StackVar blkCnt ,4 #number of full blocks to process + StackVar bitAdd ,4 #bit count to add to tweak + #----- caller's stack frame +# +# Notes on stack frame setup: +# * the most used variable (except for Skein-256) is X_stk[], based at [esp+0] +# * the next most used is the key schedule words +# so ebp is "centered" there, allowing short offsets to the key/tweak +# schedule in 256/512-bit Skein cases, but not posible for Skein-1024 :-( +# * the Wcopy variables are infrequently accessed, and they have long +# offsets from both esp and ebp only in the 1024-bit case. +# * all other local vars and calling parameters can be accessed +# with short offsets, except in the 1024-bit case +# + pushal #save all regs + movl %esp,%ebx #keep ebx as pointer to caller parms + subl $LOCAL_SIZE,%esp #make room for the locals + andl $~15,%esp #force alignment + movl ctxPtr(%ebx),%edi #edi --> Skein context + leal FRAME_OFFS(%esp),%ebp #maximize use of short offsets from ebp + movl blkCnt(%ebx),%ecx #keep block cnt in ecx +.endm #Setup_Stack +# +#---------------------------------------------------------------- +# +.macro Reset_Stack,procStart + movl %ebx,%esp #get rid of locals (wipe??) + popal #restore all regs +.endm # Reset_Stack +# +#---------------------------------------------------------------- +# macros to help debug internals +# +.if _SKEIN_DEBUG + .extern _Skein_Show_Block #calls to C routines + .extern _Skein_Show_Round +# +SKEIN_RND_SPECIAL = 1000 +SKEIN_RND_KEY_INITIAL = SKEIN_RND_SPECIAL+0 +SKEIN_RND_KEY_INJECT = SKEIN_RND_SPECIAL+1 +SKEIN_RND_FEED_FWD = SKEIN_RND_SPECIAL+2 +# +.macro Skein_Debug_Block BLK_BITS +# +#void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X, +# const u08b_t *blkPtr, const u64b_t *wPtr, +# const u64b_t *ksPtr,const u64b_t *tsPtr)# +# + call _Put_XMM_\BLK_BITS + pushal #save all regs + leal ksTwk+1-F_O(%ebp),%eax #+1 = flag: "stride" size = 2 qwords + leal ksKey+1-F_O(%ebp),%esi + leal Wcopy+32(%esp),%ecx #adjust offset by 32 for pushad + movl ctxPtr(%ebx) ,%edx #ctx_hdr_ptr + leal X_VARS(%edx) ,%edx #edx ==> cxt->X[] + pushl %eax #tsPtr + pushl %esi #ksPtr + pushl %ecx #wPtr + pushl blkPtr(%ebx) #blkPtr + pushl %edx #ctx->Xptr + pushl ctxPtr(%ebx) #ctx_hdr_ptr + movl $\BLK_BITS,%eax + pushl %eax #bits + call _Skein_Show_Block + addl $7*4,%esp #discard parameter space on stack + popal #restore regs +# + call _Get_XMM_\BLK_BITS +.endm #Skein_Debug_Block + +# +.macro Skein_Debug_Round BLK_BITS,R,saveRegs=0 +# +#void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,int r,const u64b_t *X)# +# + .if \saveRegs + call _Put_XMM_\BLK_BITS + .endif + pushal #save all regs + .if R <> SKEIN_RND_FEED_FWD + leal 32+X_stk(%esp),%eax #adjust offset by 32 for pushal + .else + movl ctxPtr(%ebx),%eax + addl $X_VARS,%eax + .endif + pushl %eax #Xptr + .if (SKEIN_ASM_UNROLL && \BLK_BITS) || (\R >= SKEIN_RND_SPECIAL) + movl $\R,%eax + .else #compute round number from edx, R + leal 1+(((\R)-1) && 3)(,%edx,4),%eax + .endif + pushl %eax #round number + pushl ctxPtr(%ebx) #ctx_hdr_ptr + movl $\BLK_BITS,%eax + pushl %eax #bits + call _Skein_Show_Round + addl $4*4,%esp #discard parameter space on stack + popal #restore regs + .if \saveRegs + call _Get_XMM_\BLK_BITS #save internal vars for debug dump + .endif +.endm #Skein_Debug_Round +.endif #ifdef SKEIN_DEBUG +# +#---------------------------------------------------------------- +# useful macros +.macro _ldX xn + movq X_stk+8*(\xn)(%esp),%xmm\xn +.endm + +.macro _stX xn + movq %xmm\xn,X_stk+8*(\xn)(%esp) +.endm +# +#---------------------------------------------------------------- +# +.macro C_label lName + \lName: #use both "genders" to work across linkage conventions +_\lName: + .global \lName + .global _\lName +.endm +# + +.if _USE_ASM_ & 256 +# +# void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd)# +# +################# +# +# Skein-256 round macros +# +.macro R_256_OneRound _RR_,x0,x1,x2,x3,t0,t1 + .irp _qq_,%((\_RR_) && 7) #figure out which rotation constants to use + .if \x0 == 0 +_RC0_ = RC_256_\_qq_&&_0 +_RC1_ = RC_256_\_qq_&&_1 + .else +_RC0_ = RC_256_\_qq_&&_1 +_RC1_ = RC_256_\_qq_&&_0 + .endif + .endr +# + paddq %xmm\x1,%xmm\x0 + movq %xmm\x1,%xmm\t0 + psllq $ _RC0_,%xmm\x1 + psrlq $64-_RC0_,%xmm\t0 + xorpd %xmm\x0,%xmm\x1 + xorpd %xmm\t0,%xmm\x1 +# + paddq %xmm\x3,%xmm\x2 + movq %xmm\x3,%xmm\t1 + psllq $ _RC1_,%xmm\x3 + psrlq $64-_RC1_,%xmm\t1 + xorpd %xmm\x2,%xmm\x3 + xorpd %xmm\t1,%xmm\x3 + .if _SKEIN_DEBUG + Skein_Debug_Round 256,%(\_RR_+1),SAVE_REGS + .endif +.endm #R_256_OneRound +# +.macro R_256_FourRounds _RN_ + R_256_OneRound %(_RN_+0),0,1,2,3,4,5 + R_256_OneRound (_RN_+1),2,1,0,3,4,5 + + R_256_OneRound (_RN_+2),0,1,2,3,4,5 + R_256_OneRound (_RN_+3),2,1,0,3,4,5 + + #inject key schedule + incl %edx #bump round number + movd %edx,%xmm4 + .if _UNROLL_CNT == (ROUNDS_256/8) + #fully unrolled version +_RK_ = ((_RN_)/4) #key injection counter + paddq ksKey+16*((_RK_+1) % 5)-F_O(%ebp),%xmm0 + paddq ksKey+16*((_RK_+2) % 5)-F_O(%ebp),%xmm1 + paddq ksKey+16*((_RK_+3) % 5)-F_O(%ebp),%xmm2 + paddq ksKey+16*((_RK_+4) % 5)-F_O(%ebp),%xmm3 + paddq ksTwk+16*((_RK_+1) % 3)-F_O(%ebp),%xmm1 + paddq ksTwk+16*((_RK_+2) % 3)-F_O(%ebp),%xmm2 + paddq %xmm4,%xmm3 + .else #looping version + paddq ksKey+16*1-F_O(%esi),%xmm0 + paddq ksKey+16*2-F_O(%esi),%xmm1 + paddq ksKey+16*3-F_O(%esi),%xmm2 + paddq ksKey+16*4-F_O(%esi),%xmm3 + paddq ksTwk+16*1-F_O(%esi),%xmm1 + paddq ksTwk+16*2-F_O(%esi),%xmm2 + paddq %xmm4,%xmm3 +# + movq ksKey-F_O(%esi),%xmm4 #first, "rotate" key schedule on the stack + movq ksTwk-F_O(%esi),%xmm5 # (for next time through) + movq %xmm4,ksKey+16*(WCNT+1)-F_O(%esi) + movq %xmm5,ksTwk+16*3-F_O(%esi) + addl $16,%esi #bump rolling pointer + .endif + .if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_KEY_INJECT,SAVE_REGS + .endif +.endm #R256_FourRounds +# +.if _SKEIN_DEBUG # macros for saving/restoring X_stk for debug routines +_Put_XMM_256: + .irp _NN_,0,1,2,3 + movq %xmm\_NN_,X_stk+4+\_NN_*8(%esp) + .endr + ret +# +_Get_XMM_256: + .irp _NN_,0,1,2,3 + movq X_stk+4+_NN_*8(%esp),%xmm\_NN_ + .endr + ret +.endif +# +################# +# +# code +# +C_label Skein_256_Process_Block + WCNT = 4 #WCNT=4 for Skein-256 + Setup_Stack WCNT,ROUNDS_256 + # main hash loop for Skein_256 +Skein_256_block_loop: + movd bitAdd (%ebx),%xmm4 + movq TWEAK+0(%edi),%xmm5 + movq TWEAK+8(%edi),%xmm6 + paddq %xmm4 ,%xmm5 #bump T0 by the bitAdd parameter + movq %xmm5,TWEAK(%edi) #save updated tweak value T0 (for next time) + movapd %xmm6,%xmm7 + xorpd %xmm5,%xmm7 #compute overall tweak parity + movdqa %xmm5,ksTwk -F_O(%ebp)#save the expanded tweak schedule on the stack + movdqa %xmm6,ksTwk+16-F_O(%ebp) + movdqa %xmm7,ksTwk+32-F_O(%ebp) + + movl blkPtr(%ebx),%esi #esi --> input block + movl $KW_PARITY_LO,%eax #init key schedule parity accumulator + movl $KW_PARITY_HI,%edx + movd %eax ,%xmm4 + movd %edx ,%xmm0 + unpcklps %xmm0,%xmm4 #replicate parity dword to 64 bits +# + .irp _NN_,0,1,2,3 #copy in the chaining vars + movq X_VARS+8*\_NN_(%edi),%xmm\_NN_ + xorpd %xmm\_NN_,%xmm4 #update overall parity + movdqa %xmm\_NN_,ksKey+16*_NN_-F_O(%ebp) + .endr + movdqa %xmm4,ksKey+16*WCNT-F_O(%ebp)#save overall parity at the end of the array +# + paddq %xmm5,%xmm1 #inject the initial tweak words + paddq %xmm6,%xmm2 +# + .irp _NN_,0,1,2,3 #perform the initial key injection + movq 8*\_NN_(%esi),%xmm4#and save a copy of the input block on stack + movq %xmm4,8*\_NN_+Wcopy(%esp) + paddq %xmm4,%xmm\_NN_ #inject the key word + .endr +# +.if _SKEIN_DEBUG #debug dump of state at this point + Skein_Debug_Block 256 + Skein_Debug_Round 256,SKEIN_RND_KEY_INITIAL,SAVE_REGS +.endif + addl $WCNT*8,%esi #skip to the next block + movl %esi,blkPtr(%ebx) #save the updated block pointer + # + # now the key schedule is computed. Start the rounds + # + xorl %edx,%edx #edx = iteration count +.if SKEIN_ASM_UNROLL & 256 +_UNROLL_CNT = ROUNDS_256/8 #fully unrolled +.else +_UNROLL_CNT = SKEIN_UNROLL_256 #partial unroll count + .if ((ROUNDS_256/8) % _UNROLL_CNT) + .error "Invalid SKEIN_UNROLL_256" #sanity check + .endif + movl %ebp,%esi #use this as "rolling" pointer into ksTwk/ksKey +Skein_256_round_loop: # (since there's no 16* scaled address mode) +.endif +# +_Rbase_ = 0 +.rept _UNROLL_CNT*2 # here with X[0..3] in XMM0..XMM3 + R_256_FourRounds _Rbase_ +_Rbase_ = _Rbase_+4 +.endr #rept _UNROLL_CNT*2 +# + .if _UNROLL_CNT <> (ROUNDS_256/8) + cmpl $2*(ROUNDS_256/8),%edx + jb Skein_256_round_loop + .endif + #---------------------------- + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..3} + .irp _NN_,0,1,2,3 + movq Wcopy+8*\_NN_(%esp),%xmm4 + xorpd %xmm4,%xmm\_NN_ + movq %xmm\_NN_,X_VARS+8*\_NN_(%edi) + .endr + andb $FIRST_MASK8,TWEAK +15(%edi) +.if _SKEIN_DEBUG + Skein_Debug_Round 256,SKEIN_RND_FEED_FWD,SAVE_REGS +.endif + # go back for more blocks, if needed + decl %ecx + jnz Skein_256_block_loop + Reset_Stack _Skein_256_Process_Block + ret +# +.ifdef _SKEIN_CODE_SIZE +C_label Skein_256_Process_Block_CodeSize + movl $_Skein_256_Process_Block_CodeSize - _Skein_256_Process_Block,%eax + ret +# +C_label Skein_256_Unroll_Cnt + .if _UNROLL_CNT <> ROUNDS_256/8 + movl $_UNROLL_CNT,%eax + .else + xorl %eax,%eax + .endif + ret +.endif +.endif #_USE_ASM_ & 256 +# +#---------------------------------------------------------------- +# +.if _USE_ASM_ & 512 +# +# void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd)# +# +################# +# MACRO: one round +# +.macro R_512_Round _RR_, a0,a1,Ra, b0,b1,Rb, c0,c1,Rc, d0,d1,Rd + .irp _qq_,%((\_RR_) && 7) +_Ra_ = RC_512_\_qq_&&_\Ra +_Rb_ = RC_512_\_qq_&&_\Rb +_Rc_ = RC_512_\_qq_&&_\Rc +_Rd_ = RC_512_\_qq_&&_\Rd + .endr + paddq %xmm\a1 , %xmm\a0 + _stX c0 + movq %xmm\a1 , %xmm\c0 + psllq $ _Ra_ , %xmm\a1 + psrlq $64-_Ra_ , %xmm\c0 + xorpd %xmm\c0 , %xmm\a1 + xorpd %xmm\a0 , %xmm\a1 + + paddq %xmm\b1 , %xmm\b0 + _stX a0 + movq %xmm\b1 , %xmm\a0 + psllq $ _Rb_ , %xmm\b1 + psrlq $64-_Rb_ , %xmm\a0 + xorpd %xmm\b0 , %xmm\b1 + _ldX c0 + xorpd %xmm\a0 , %xmm\b1 + + paddq %xmm\c1 , %xmm\c0 + movq %xmm\c1 , %xmm\a0 + psllq $ _Rc_ , %xmm\c1 + psrlq $64-_Rc_ , %xmm\a0 + xorpd %xmm\c0 , %xmm\c1 + xorpd %xmm\a0 , %xmm\c1 + + paddq %xmm\d1 , %xmm\d0 + movq %xmm\d1 , %xmm\a0 + psllq $ _Rd_ , %xmm\d1 + psrlq $64-_Rd_ , %xmm\a0 + xorpd %xmm\a0 , %xmm\d1 + _ldX a0 + xorpd %xmm\d0 , %xmm\d1 + .if _SKEIN_DEBUG + Skein_Debug_Round 512,%(_RR_+1),SAVE_REGS + .endif +.endm +# +# MACRO: four rounds +.macro R_512_FourRounds _RN_ + R_512_Round %((_RN_) ), 0,1,0, 2,3,1, 4,5,2, 6,7,3 + R_512_Round %((_RN_)+1), 2,1,0, 4,7,1, 6,5,2, 0,3,3 + R_512_Round %((_RN_)+2), 4,1,0, 6,3,1, 0,5,2, 2,7,3 + R_512_Round %((_RN_)+3), 6,1,0, 0,7,1, 2,5,2, 4,3,3 + + #inject key schedule +.irp _NN_,0,1,2,3,4,5,6,7 + .if _UNROLL_CNT == (ROUNDS_512/8) + paddq ksKey+16*((((\_RN_)/4)+(\_NN_)+1)%9)-F_O(%ebp),%xmm\_NN_ + .else + paddq ksKey+16*((\_NN_)+1)-F_O(%esi),%xmm\_NN_ + .endif +.endr + _stX 0 #free up a register + incl %edx #bump round counter + movd %edx,%xmm0 #inject the tweak + .if _UNROLL_CNT == (ROUNDS_512/8) + paddq ksTwk+16*(((_RN_)+1) % 3)-F_O(%ebp),%xmm5 + paddq ksTwk+16*(((_RN_)+2) % 3)-F_O(%ebp),%xmm6 + paddq %xmm0 ,%xmm7 + .else #looping version + paddq ksTwk+16*1-F_O(%esi),%xmm5 + paddq ksTwk+16*2-F_O(%esi),%xmm6 + paddq %xmm0 ,%xmm7 + # "rotate" key schedule on the stack (for next time through) + movq ksKey -F_O(%esi),%xmm0 + movq %xmm0,ksKey+16*(WCNT+1)-F_O(%esi) + movq ksTwk -F_O(%esi),%xmm0 + movq %xmm0,ksTwk+16*3 -F_O(%esi) + addl $16,%esi #bump rolling pointer + .endif + _ldX 0 #restore X0 + .if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_KEY_INJECT,SAVE_REGS + .endif +.endm #R_512_FourRounds +################# +.if _SKEIN_DEBUG # macros for saving/restoring X_stk for debug routines +_Put_XMM_512: + .irp _NN_,0,1,2,3,4,5,6,7 + movq %xmm\_NN_,X_stk+4+\_NN_*8(%esp) + .endr + ret +# +_Get_XMM_512: + .irp _NN_,0,1,2,3,4,5,6,7 + movq X_stk+4+\_NN_*8(%esp),%xmm\_NN_ + .endr + ret +.endif +# +################# +# +C_label Skein_512_Process_Block + WCNT = 8 #WCNT=8 for Skein-512 + Setup_Stack WCNT,ROUNDS_512 + # main hash loop for Skein_512 +Skein_512_block_loop: + movd bitAdd(%ebx) ,%xmm0 + movq TWEAK+0(%edi),%xmm1 + movq TWEAK+8(%edi),%xmm2 + paddq %xmm0,%xmm1 #bump T0 by the bitAdd parameter + movq %xmm1,TWEAK(%edi) #save updated tweak value T0 (for next time) + movq %xmm2,%xmm0 + xorpd %xmm1,%xmm0 #compute overall tweak parity + movdqa %xmm1,ksTwk -F_O(%ebp)#save the expanded tweak schedule on the stack + movdqa %xmm2,ksTwk+16*1-F_O(%ebp) + movdqa %xmm0,ksTwk+16*2-F_O(%ebp) + + movl blkPtr(%ebx),%esi #esi --> input block + movl $KW_PARITY_LO,%eax #init key schedule parity accumulator + movl $KW_PARITY_HI,%edx + movd %eax ,%xmm0 + movd %edx ,%xmm7 + unpcklps %xmm7,%xmm0 #replicate parity dword to 64 bits +# + .irp _NN_,7,6,5,4,3,2,1 #copy in the chaining vars (skip #0 for now) + movq X_VARS+8*\_NN_(%edi),%xmm\_NN_ + xorpd %xmm\_NN_,%xmm0 #update overall parity + movdqa %xmm\_NN_,ksKey+16*\_NN_-F_O(%ebp) + .if \_NN_ == 5 + paddq %xmm1,%xmm5 #inject the initial tweak words + paddq %xmm2,%xmm6 # (before they get trashed in %xmm1/2) + .endif + .endr + movq X_VARS(%edi),%xmm4 #handle #0 now + xorpd %xmm4,%xmm0 #update overall parity + movdqa %xmm4,ksKey+16* 0 -F_O(%ebp) #save the key value in slot #0 + movdqa %xmm0,ksKey+16*WCNT-F_O(%ebp) #save overall parity at the end of the array +# + movq %xmm4,%xmm0 + .irp _NN_,7,6,5, 4,3,2,1,0 #perform the initial key injection (except #4) + movq 8*\_NN_(%esi),%xmm4 #and save a copy of the input block on stack + movq %xmm4,8*\_NN_+Wcopy(%esp) + paddq %xmm4,%xmm\_NN_ + .endr + movq 8*4(%esi),%xmm4 #get input block word #4 + movq %xmm4,8*4+Wcopy(%esp) + paddq ksKey+16*4-F_O(%ebp),%xmm4#inject the initial key +# +.if _SKEIN_DEBUG #debug dump of state at this point + Skein_Debug_Block 512 + Skein_Debug_Round 512,SKEIN_RND_KEY_INITIAL,SAVE_REGS +.endif + addl $WCNT*8,%esi #skip to the next block + movl %esi,blkPtr(%ebx) #save the updated block pointer + # + # now the key schedule is computed. Start the rounds + # + xorl %edx,%edx #edx = round counter +.if SKEIN_ASM_UNROLL & 512 +_UNROLL_CNT = ROUNDS_512/8 +.else +_UNROLL_CNT = SKEIN_UNROLL_512 + .if ((ROUNDS_512/8) % _UNROLL_CNT) + .error "Invalid SKEIN_UNROLL_512" + .endif + movl %ebp,%esi #use this as "rolling" pointer into ksTwk/ksKey +Skein_512_round_loop: # (since there's no 16* scaled address mode) +.endif +_Rbase_ = 0 +.rept _UNROLL_CNT*2 + R_512_FourRounds %_Rbase_ +_Rbase_ = _Rbase_+4 +.endr #rept _UNROLL_CNT +# +.if (SKEIN_ASM_UNROLL & 512) == 0 + cmpl $2*(ROUNDS_512/8),%edx + jb Skein_512_round_loop +.endif + #---------------------------- + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..7} + andb $FIRST_MASK8,TWEAK +15(%edi) +.irp _NN_,0,2,4,6 #do the aligned ones first + xorpd Wcopy+8*\_NN_(%esp),%xmm\_NN_ + movq %xmm\_NN_,X_VARS+8*_NN_(%edi) +.endr +.irp _NN_,1,3,5,7 #now we have some register space available + movq Wcopy+8*\_NN_(%esp),%xmm0 + xorpd %xmm0,%xmm&\_NN_ + movq %xmm&\_NN_,X_VARS+8*\_NN_(%edi) +.endr +.if _SKEIN_DEBUG + Skein_Debug_Round 512,SKEIN_RND_FEED_FWD +.endif + # go back for more blocks, if needed + decl %ecx + jnz Skein_512_block_loop + + Reset_Stack _Skein_512_Process_Block + ret +# +.ifdef _SKEIN_CODE_SIZE +C_label Skein_512_Process_Block_CodeSize + movl $(_Skein_512_Process_Block_CodeSize - _Skein_512_Process_Block),%eax + ret +# +C_label Skein_512_Unroll_Cnt + .if _UNROLL_CNT <> ROUNDS_512/8 + movl $_UNROLL_CNT,%eax + .else + xorl %eax,%eax + .endif + ret +.endif +# +.endif # _USE_ASM_ & 512 +# +#---------------------------------------------------------------- +# +.if _USE_ASM_ & 1024 + .global _Skein1024_Process_Block +# +# void Skein_1024_Process_Block(Skein_1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t bitcntAdd)# +# +R_1024_REGS = (5) #keep this many block variables in registers +# +################ +.if _SKEIN_DEBUG # macros for saving/restoring X_stk for debug routines +_Put_XMM_1024: +_NN_ = 0 + .rept R_1024_REGS + .irp _rr_,%(_NN_) + movq %xmm\_rr_,X_stk+4+8*_NN_(%esp) + .endr +_NN_ = _NN_+1 + .endr + ret +# +_Get_XMM_1024: +_NN_ = 0 + .rept R_1024_REGS + .irp _rr_,%(_NN_) + movq X_stk+4+8*_NN_(%esp),%xmm\_rr_ + .endr +_NN_ = _NN_+1 + .endr + ret +.endif +# +################# +# MACRO: one mix step +.macro MixStep_1024 x0,x1,rotIdx0,rotIdx1,_debug_=0 +_r0_ = \x0 #default, if already loaded +_r1_ = \x1 + # load the regs (if necessary) + .if (\x0 >= R_1024_REGS) +_r0_ = 5 + movq X_stk+8*(\x0)(%esp),%xmm5 + .endif + .if (\x1 >= R_1024_REGS) +_r1_ = 6 + movq X_stk+8*(\x1)(%esp),%xmm6 + .endif + # do the mix + .irp _rx_,%((rotIdx0) && 7) +_Rc_ = RC_1024_\_rx_&&_\rotIdx1 #rotation constant + .endr + .irp _x0_,%_r0_ + .irp _x1_,%_r1_ + paddq %xmm\_x1_,%xmm\_x0_ + movq %xmm\_x1_,%xmm7 + psllq $ _Rc_ ,%xmm\_x1_ + psrlq $64-_Rc_ ,%xmm7 + xorpd %xmm\_x0_,%xmm\_x1_ + xorpd %xmm7 ,%xmm\_x1_ + .endr + .endr + # save the regs (if necessary) + .if (\x0 >= R_1024_REGS) + movq %xmm5,X_stk+8*(\x0)(%esp) + .endif + .if (\x1 >= R_1024_REGS) + movq %xmm6,X_stk+8*(\x1)(%esp) + .endif + # debug output + .if _SKEIN_DEBUG && (\_debug_) + Skein_Debug_Round 1024,%((\RotIdx0)+1),SAVE_REGS + .endif +.endm +################# +# MACRO: four rounds +# +.macro R_1024_FourRounds _RR_ + #--------- round _RR_ + MixStep_1024 0, 1,%((\_RR_)+0),0 + MixStep_1024 2, 3,%((\_RR_)+0),1 + MixStep_1024 4, 5,%((\_RR_)+0),2 + MixStep_1024 6, 7,%((\_RR_)+0),3 + MixStep_1024 8, 9,%((\_RR_)+0),4 + MixStep_1024 10,11,%((\_RR_)+0),5 + MixStep_1024 12,13,%((\_RR_)+0),6 + MixStep_1024 14,15,%((\_RR_)+0),7,1 + #--------- round _RR_+1 + MixStep_1024 0, 9,%((\_RR_)+1),0 + MixStep_1024 2,13,%((\_RR_)+1),1 + MixStep_1024 6,11,%((\_RR_)+1),2 + MixStep_1024 4,15,%((\_RR_)+1),3 + MixStep_1024 10, 7,%((\_RR_)+1),4 + MixStep_1024 12, 3,%((\_RR_)+1),5 + MixStep_1024 14, 5,%((\_RR_)+1),6 + MixStep_1024 8, 1,%((\_RR_)+1),7,1 + #--------- round _RR_+2 + MixStep_1024 0, 7,%((\_RR_)+2),0 + MixStep_1024 2, 5,%((\_RR_)+2),1 + MixStep_1024 4, 3,%((\_RR_)+2),2 + MixStep_1024 6, 1,%((\_RR_)+2),3 + MixStep_1024 12,15,%((\_RR_)+2),4 + MixStep_1024 14,13,%((\_RR_)+2),5 + MixStep_1024 8,11,%((\_RR_)+2),6 + MixStep_1024 10, 9,%((\_RR_)+2),7,1 + #--------- round _RR_+3 + MixStep_1024 0,15,%((\_RR_)+3),0 + MixStep_1024 2,11,%((\_RR_)+3),1 + MixStep_1024 6,13,%((\_RR_)+3),2 + MixStep_1024 4, 9,%((\_RR_)+3),3 + MixStep_1024 14, 1,%((\_RR_)+3),4 + MixStep_1024 8, 5,%((\_RR_)+3),5 + MixStep_1024 10, 3,%((\_RR_)+3),6 + MixStep_1024 12, 7,%((\_RR_)+3),7,1 + + incl %edx #edx = round number + movd %edx,%xmm7 + + #inject the key +.irp _NN_,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 + .if _UNROLL_CNT <> (ROUNDS_1024/8) + .if \_NN_ < R_1024_REGS + paddq ksKey+16*\_NN_+16-F_O(%esi),%xmm&\_NN_ + .else + movq X_stk+ 8*\_NN_(%esp),%xmm6 + .if \_NN_ == 15 + paddq %xmm7,%xmm6 + .elseif \_NN_ == 14 + paddq ksTwk+16*2-F_O(%esi),%xmm6 + .elseif \_NN_ == 13 + paddq ksTwk+16*1-F_O(%esi),%xmm6 + .endif + paddq ksKey+16*\_NN_+16-F_O(%esi),%xmm6 + movq %xmm6,X_stk+ 8*\_NN_(%esp) + .endif + .else + .if \_NN_ < R_1024_REGS + paddq ksKey+16*(((_Rbase_/4)+(\_NN_)+1) % 17)-F_O(%ebp),%xmm&\_NN_ + .else + movq X_stk+ 8*\_NN_(%esp), %xmm6 + paddq ksKey+16*(((_Rbase_/4)+(\_NN_)+1) % 17)-F_O(%ebp),%xmm6 + .if \_NN_ == 15 + paddq %xmm7,%xmm6 + .elseif \_NN_ == 14 + paddq ksTwk+16*(((_Rbase_/4)+2) % 3)-F_O(%ebp),%xmm6 + .elseif \_NN_ == 13 + paddq ksTwk+16*(((_Rbase_/4)+1) % 3)-F_O(%ebp),%xmm6 + .endif + movq %xmm6,X_stk+ 8*\_NN_(%esp) + .endif + .endif +.endr + .if _UNROLL_CNT <> (ROUNDS_1024/8) #rotate the key schedule on the stack + movq ksKey-F_O(%esi), %xmm6 + movq ksTwk-F_O(%esi), %xmm7 + movq %xmm6,ksKey+16*(WCNT+1)-F_O(%esi) + movq %xmm7,ksTwk+16* 3 -F_O(%esi) + addl $16,%esi #bump rolling pointer + .endif + .if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_KEY_INJECT ,SAVE_REGS + .endif +.endm #R_1024_FourRounds +# +################ +# +C_label Skein1024_Process_Block +# + WCNT = 16 #WCNT=16 for Skein-1024 + Setup_Stack WCNT,ROUNDS_1024 + addl $0x80,%edi #bias the edi ctxt offsets to keep them all short + # main hash loop for Skein1024 +Skein1024_block_loop: + movd bitAdd(%ebx) ,%xmm0 + movq TWEAK+0-0x80(%edi),%xmm1 + movq TWEAK+8-0x80(%edi),%xmm2 + paddq %xmm0,%xmm1 #bump T0 by the bitAdd parameter + movq %xmm1,TWEAK-0x80(%edi) #save updated tweak value T0 (for next time) + movq %xmm2,%xmm0 + xorpd %xmm1,%xmm0 #compute overall tweak parity + movdqa %xmm1,ksTwk -F_O(%ebp)#save the expanded tweak schedule on the stack + movdqa %xmm2,ksTwk+16-F_O(%ebp) + movdqa %xmm0,ksTwk+32-F_O(%ebp) + + movl blkPtr(%ebx),%esi #esi --> input block + movl $KW_PARITY_LO,%eax #init key schedule parity accumulator + movl $KW_PARITY_HI,%edx + movd %eax ,%xmm7 + movd %edx ,%xmm6 + unpcklps %xmm6,%xmm7 #replicate parity dword to 64 bits +# + leal 0x80(%esp),%eax #use short offsets for Wcopy, X_stk writes below +.irp _NN_,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 + movq X_VARS+8*\_NN_-0x80(%edi),%xmm6 + xorpd %xmm6,%xmm7 #update overall parity + movdqa %xmm6,ksKey+16*\_NN_-F_O(%ebp) #save the key schedule on the stack + .if \_NN_ < R_1024_REGS + _rr_ = \_NN_ + .else + _rr_ = R_1024_REGS + .endif + .irp _rn_,%(_rr_) + movq 8*\_NN_(%esi),%xmm\_rn_ #save copy of the input block on stack + movq %xmm\_rn_,Wcopy+8*\_NN_-0x80(%eax) #(for feedforward later) + paddq %xmm6,%xmm\_rn_ #inject the key into the block + .if \_NN_ == 13 + paddq %xmm1,%xmm\_rn_ #inject the initial tweak words + .elseif \_NN_ == 14 + paddq %xmm2,%xmm\_rn_ + .endif + .if \_NN_ >= R_1024_REGS #only save X[5..15] on stack, leave X[0..4] in regs + movq %xmm\_rn_,X_stk+8*\_NN_-0x80(%eax) + .endif + .endr +.endr + movdqa %xmm7,ksKey+16*WCNT-F_O(%ebp) #save overall key parity at the end of the array +# +.if _SKEIN_DEBUG #debug dump of state at this point + Skein_Debug_Block 1024 + Skein_Debug_Round 1024,SKEIN_RND_KEY_INITIAL,SAVE_REGS +.endif + addl $WCNT*8,%esi #skip to the next block + movl %esi,blkPtr(%ebx) #save the updated block pointer + # + # now the key schedule is computed. Start the rounds + # + xorl %edx,%edx #edx = round counter +.if SKEIN_ASM_UNROLL & 1024 +_UNROLL_CNT = ROUNDS_1024/8 +.else +_UNROLL_CNT = SKEIN_UNROLL_1024 + .if ((ROUNDS_1024/8) % _UNROLL_CNT) + .error "Invalid SKEIN_UNROLL_1024" + .endif + movl %ebp,%esi #use this as "rolling" pointer into ksTwk/ksKey +Skein_1024_round_loop: +.endif +# +_Rbase_ = 0 +.rept _UNROLL_CNT*2 + R_1024_FourRounds %_Rbase_ +_Rbase_ = _Rbase_+4 +.endr #rept _UNROLL_CNT +# +.if (SKEIN_ASM_UNROLL & 1024) == 0 + cmp $2*(ROUNDS_1024/8),%edx + jb Skein_1024_round_loop +.endif + andb $FIRST_MASK8,TWEAK +15-0x80(%edi) #clear tweak bit for next time thru + #---------------------------- + # feedforward: ctx->X[i] = X[i] ^ w[i], {i=0..15} + leal 0x80(%esp),%eax #allow short offsets to X_stk and Wcopy +.irp _NN_,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .if \_NN_ < R_1024_REGS + .if \_NN_ && 1 #already in regs: no load needed + movq Wcopy+ 8*\_NN_-0x80(%eax),%xmm7 #unaligned + xorpd %xmm7,%xmm\_NN_ + .else + xorpd Wcopy+ 8*\_NN_-0x80(%eax),%xmm\_NN_ #aligned + .endif + movq %xmm\_NN_,X_VARS+8*\_NN_-0x80(%edi) + .else + movq X_stk+8*\_NN_-0x80(%eax),%xmm7 #load X value from stack + .if \_NN_ && 1 + movq Wcopy+8*\_NN_-0x80(%eax),%xmm6 #unaligned + xorpd %xmm6,%xmm7 + .else + xorpd Wcopy+8*\_NN_-0x80(%eax),%xmm7 #aligned + .endif + movq %xmm7,X_VARS+8*\_NN_-0x80(%edi) + .endif +.endr +.if _SKEIN_DEBUG + Skein_Debug_Round 1024,SKEIN_RND_FEED_FWD #no need to save regs on stack here +.endif + # go back for more blocks, if needed + decl %ecx + jnz Skein1024_block_loop + + Reset_Stack _Skein1024_Process_Block + ret +# +.ifdef _SKEIN_CODE_SIZE +C_label Skein1024_Process_Block_CodeSize + movl $(_Skein1024_Process_Block_CodeSize - _Skein1024_Process_Block),%eax + ret +# +C_label Skein1024_Unroll_Cnt + .if _UNROLL_CNT <> ROUNDS_1024/8 + movl $_UNROLL_CNT,%eax + .else + xorl %eax,%eax + .endif + ret +.endif +# +.endif # _USE_ASM_ & 1024 +#---------------------------------------------------------------- + .end diff --git a/Additional_Implementations/skein_perf_core2.txt b/Additional_Implementations/skein_perf_core2.txt new file mode 100644 index 000000000000..d8b795675c3d --- /dev/null +++ b/Additional_Implementations/skein_perf_core2.txt @@ -0,0 +1,1440 @@ + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:27:59,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 3450.00 3450.00 | 8718.00 8718.00 | 41700.00 41730.00 | //: 32-bit, GCC_v3.42 [ C =...] + 2_ || 1719.00 1725.00 | 4323.00 4326.00 | 20835.00 20850.00 | //: 32-bit, GCC_v3.42 [ C =...] + 4_ || 861.00 861.00 | 2149.50 2151.00 | 10408.50 11277.00 | //: 32-bit, GCC_v3.42 [ C =...] + 8_ || 429.00 429.75 | 1074.75 1074.75 | 5204.25 5205.00 | //: 32-bit, GCC_v3.42 [ C =...] + 10_ || 343.80 344.40 | 865.80 866.40 | 4167.00 4167.60 | //: 32-bit, GCC_v3.42 [ C =...] + 16_ || 214.88 214.88 | 538.50 538.50 | 2603.25 2603.63 | //: 32-bit, GCC_v3.42 [ C =...] + 32_ || 107.06 115.88 | 269.25 269.25 | 1301.25 1301.25 | //: 32-bit, GCC_v3.42 [ C =...] + 64_ || 85.31 85.41 | 132.66 132.75 | 650.53 650.63 | //: 32-bit, GCC_v3.42 [ C =...] + 100_ || 82.20 88.86 | 126.78 126.78 | 416.46 416.46 | //: 32-bit, GCC_v3.42 [ C =...] + 128_ || 69.42 69.56 | 97.83 97.83 | 324.98 325.03 | //: 32-bit, GCC_v3.42 [ C =...] + 256_ || 56.70 56.74 | 76.34 76.34 | 242.95 242.98 | //: 32-bit, GCC_v3.42 [ C =...] + 512_ || 53.06 53.12 | 65.50 65.53 | 200.66 200.67 | //: 32-bit, GCC_v3.42 [ C =...] + 1000_ || 52.33 52.42 | 61.66 61.69 | 183.89 183.92 | //: 32-bit, GCC_v3.42 [ C =...] + 1024_ || 51.15 51.23 | 60.07 60.08 | 179.52 179.55 | //: 32-bit, GCC_v3.42 [ C =...] + 2048_ || 50.20 50.30 | 57.36 57.42 | 168.86 168.97 | //: 32-bit, GCC_v3.42 [ C =...] + 4096_ || 49.71 49.77 | 56.00 56.01 | 163.65 166.96 | //: 32-bit, GCC_v3.42 [ C =...] + 8192_ || 49.48 50.94 | 55.33 57.07 | 169.60 184.62 | //: 32-bit, GCC_v3.42 [ C =...] + 10000_ || 53.64 53.70 | 60.89 60.99 | 186.13 186.98 | //: 32-bit, GCC_v3.42 [ C =...] + 16384_ || 53.48 53.80 | 60.35 60.70 | 164.26 167.23 | //: 32-bit, GCC_v3.42 [ C =...] + 32768_ || 53.47 53.50 | 60.22 60.37 | 164.15 182.33 | //: 32-bit, GCC_v3.42 [ C =...] + 100000_ || 53.85 100.93 | 60.43 61.36 | 164.25 169.78 | //: 32-bit, GCC_v3.42 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [ C =...] + Block || 14464 bytes | 32544 bytes | 83024 bytes | //: 32-bit, GCC_v3.42 [ C =...] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:28:12,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2802.00 2814.00 | 5952.00 5952.00 | 30606.00 30606.00 | //: 32-bit, MSC_v9.00 [ C =...] + 2_ || 1392.00 1395.00 | 2976.00 2979.00 | 15309.00 15309.00 | //: 32-bit, MSC_v9.00 [ C =...] + 4_ || 696.00 697.50 | 1486.50 1486.50 | 7653.00 7654.50 | //: 32-bit, MSC_v9.00 [ C =...] + 8_ || 347.25 348.00 | 741.75 742.50 | 3825.75 3827.25 | //: 32-bit, MSC_v9.00 [ C =...] + 10_ || 278.40 278.40 | 593.40 593.40 | 3063.00 3063.00 | //: 32-bit, MSC_v9.00 [ C =...] + 16_ || 174.38 174.38 | 370.50 370.50 | 1913.25 1913.25 | //: 32-bit, MSC_v9.00 [ C =...] + 32_ || 86.25 86.25 | 186.00 186.75 | 957.00 957.19 | //: 32-bit, MSC_v9.00 [ C =...] + 64_ || 62.91 62.91 | 92.91 92.91 | 478.50 478.50 | //: 32-bit, MSC_v9.00 [ C =...] + 100_ || 65.52 65.58 | 88.02 88.08 | 306.30 306.30 | //: 32-bit, MSC_v9.00 [ C =...] + 128_ || 50.72 50.72 | 68.53 68.58 | 238.64 238.88 | //: 32-bit, MSC_v9.00 [ C =...] + 256_ || 44.88 45.05 | 56.11 56.13 | 178.17 178.24 | //: 32-bit, MSC_v9.00 [ C =...] + 512_ || 41.79 41.86 | 49.79 49.91 | 147.39 147.47 | //: 32-bit, MSC_v9.00 [ C =...] + 1000_ || 41.26 41.41 | 47.96 47.96 | 135.28 135.29 | //: 32-bit, MSC_v9.00 [ C =...] + 1024_ || 40.40 40.40 | 46.79 46.81 | 132.05 132.08 | //: 32-bit, MSC_v9.00 [ C =...] + 2048_ || 39.62 39.62 | 45.23 45.23 | 124.39 124.40 | //: 32-bit, MSC_v9.00 [ C =...] + 4096_ || 38.98 38.99 | 44.34 44.44 | 120.58 120.60 | //: 32-bit, MSC_v9.00 [ C =...] + 8192_ || 38.83 38.87 | 44.06 47.57 | 118.65 119.36 | //: 32-bit, MSC_v9.00 [ C =...] + 10000_ || 38.86 39.08 | 44.13 44.21 | 119.88 120.11 | //: 32-bit, MSC_v9.00 [ C =...] + 16384_ || 38.74 39.03 | 43.76 44.01 | 108.36 117.94 | //: 32-bit, MSC_v9.00 [ C =...] + 32768_ || 36.77 38.19 | 41.28 41.57 | 105.50 114.79 | //: 32-bit, MSC_v9.00 [ C =...] + 100000_ || 38.85 39.09 | 43.56 43.77 | 105.79 114.18 | //: 32-bit, MSC_v9.00 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =...] + Block || 10192 bytes | 22960 bytes | 53072 bytes | //: 32-bit, MSC_v9.00 [ C =...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:28:29,Oct 7 2008 by 'MSC_v6.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 8688.00 8712.00 | 26466.00 26472.00 | 61638.00 61680.00 | //: 32-bit, MSC_v6.00 [ C =...] + 2_ || 4347.00 4362.00 | 13293.00 13302.00 | 30036.00 30372.00 | //: 32-bit, MSC_v6.00 [ C =...] + 4_ || 2184.00 2199.00 | 6457.50 6508.50 | 15267.00 15285.00 | //: 32-bit, MSC_v6.00 [ C =...] + 8_ || 1093.50 1098.75 | 3227.25 3227.25 | 7398.75 7467.75 | //: 32-bit, MSC_v6.00 [ C =...] + 10_ || 873.60 878.40 | 2405.40 2574.00 | 5661.00 5668.20 | //: 32-bit, MSC_v6.00 [ C =...] + 16_ || 522.00 524.25 | 1455.00 1455.38 | 3459.38 3489.38 | //: 32-bit, MSC_v6.00 [ C =...] + 32_ || 260.06 261.00 | 727.69 732.56 | 1727.44 1728.00 | //: 32-bit, MSC_v6.00 [ C =...] + 64_ || 186.66 186.94 | 362.16 362.25 | 848.25 856.97 | //: 32-bit, MSC_v6.00 [ C =...] + 100_ || 194.10 194.10 | 344.52 344.58 | 542.22 545.28 | //: 32-bit, MSC_v6.00 [ C =...] + 128_ || 151.27 151.36 | 266.06 268.59 | 426.23 426.33 | //: 32-bit, MSC_v6.00 [ C =...] + 256_ || 137.67 137.91 | 219.66 219.68 | 314.74 317.74 | //: 32-bit, MSC_v6.00 [ C =...] + 512_ || 130.21 130.22 | 195.96 218.33 | 263.45 266.96 | //: 32-bit, MSC_v6.00 [ C =...] + 1000_ || 129.17 129.60 | 183.96 185.04 | 237.88 240.37 | //: 32-bit, MSC_v6.00 [ C =...] + 1024_ || 126.35 126.53 | 178.10 178.44 | 251.47 256.21 | //: 32-bit, MSC_v6.00 [ C =...] + 2048_ || 133.77 133.81 | 190.95 191.16 | 240.21 242.87 | //: 32-bit, MSC_v6.00 [ C =...] + 4096_ || 116.36 124.81 | 169.39 178.51 | 220.31 222.41 | //: 32-bit, MSC_v6.00 [ C =...] + 8192_ || 123.60 125.60 | 171.36 174.54 | 215.79 233.44 | //: 32-bit, MSC_v6.00 [ C =...] + 10000_ || 124.42 127.19 | 183.83 188.08 | 236.37 238.04 | //: 32-bit, MSC_v6.00 [ C =...] + 16384_ || 133.15 133.46 | 172.38 183.65 | 222.17 232.39 | //: 32-bit, MSC_v6.00 [ C =...] + 32768_ || 126.67 128.66 | 180.67 186.04 | 225.01 227.24 | //: 32-bit, MSC_v6.00 [ C =...] + 100000_ || 123.83 125.06 | 172.26 186.33 | 205.87 224.72 | //: 32-bit, MSC_v6.00 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1486 bytes | 1348 bytes | 1445 bytes | //: 32-bit, MSC_v6.00 [ C =...] + Block || 14094 bytes | 35580 bytes | 69258 bytes | //: 32-bit, MSC_v6.00 [ C =...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:28:44,Oct 7 2008 by 'MSC_v4.20', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5028.00 5058.00 | 9948.00 10044.00 | 35466.00 35520.00 | //: 32-bit, MSC_v4.20 [ C =...] + 2_ || 2508.00 2511.00 | 5070.00 5076.00 | 18090.00 18132.00 | //: 32-bit, MSC_v4.20 [ C =...] + 4_ || 1255.50 1255.50 | 2523.00 2523.00 | 9063.00 9063.00 | //: 32-bit, MSC_v4.20 [ C =...] + 8_ || 627.75 627.75 | 1261.50 1261.50 | 4536.00 4536.00 | //: 32-bit, MSC_v4.20 [ C =...] + 10_ || 502.20 502.20 | 1013.40 1014.60 | 3685.80 3685.80 | //: 32-bit, MSC_v4.20 [ C =...] + 16_ || 313.88 313.88 | 624.75 632.63 | 2284.88 2287.88 | //: 32-bit, MSC_v4.20 [ C =...] + 32_ || 155.25 155.25 | 312.38 312.38 | 1143.75 1143.75 | //: 32-bit, MSC_v4.20 [ C =...] + 64_ || 114.56 114.66 | 155.72 155.72 | 569.91 569.91 | //: 32-bit, MSC_v4.20 [ C =...] + 100_ || 120.66 120.78 | 148.92 148.98 | 363.60 363.66 | //: 32-bit, MSC_v4.20 [ C =...] + 128_ || 93.84 93.89 | 116.58 116.63 | 284.58 284.58 | //: 32-bit, MSC_v4.20 [ C =...] + 256_ || 83.46 83.48 | 95.20 95.20 | 213.77 213.77 | //: 32-bit, MSC_v4.20 [ C =...] + 512_ || 78.18 78.19 | 85.08 85.09 | 177.38 177.38 | //: 32-bit, MSC_v4.20 [ C =...] + 1000_ || 77.42 77.42 | 81.88 81.88 | 161.92 161.92 | //: 32-bit, MSC_v4.20 [ C =...] + 1024_ || 75.54 75.55 | 79.53 79.53 | 158.23 158.23 | //: 32-bit, MSC_v4.20 [ C =...] + 2048_ || 74.22 74.23 | 77.37 77.38 | 148.59 149.28 | //: 32-bit, MSC_v4.20 [ C =...] + 4096_ || 73.56 73.57 | 76.51 76.51 | 142.82 145.02 | //: 32-bit, MSC_v4.20 [ C =...] + 8192_ || 73.23 73.24 | 73.69 74.43 | 143.02 143.30 | //: 32-bit, MSC_v4.20 [ C =...] + 10000_ || 73.30 73.36 | 73.75 75.53 | 144.04 144.80 | //: 32-bit, MSC_v4.20 [ C =...] + 16384_ || 69.35 73.11 | 71.79 73.01 | 131.31 140.35 | //: 32-bit, MSC_v4.20 [ C =...] + 32768_ || 73.05 73.36 | 72.43 74.54 | 126.01 139.95 | //: 32-bit, MSC_v4.20 [ C =...] + 100000_ || 69.28 70.12 | 66.33 67.35 | 129.68 136.97 | //: 32-bit, MSC_v4.20 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1152 bytes | 1024 bytes | 1088 bytes | //: 32-bit, MSC_v4.20 [ C =...] + Block || 11968 bytes | 23776 bytes | 55360 bytes | //: 32-bit, MSC_v4.20 [ C =...] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:28:57,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 780.00 786.00 | 1110.00 1110.00 | 3288.00 3318.00 | //: 64-bit, MSC_v9.00 [ C =...] + 2_ || 402.00 402.00 | 549.00 552.00 | 1659.00 1659.00 | //: 64-bit, MSC_v9.00 [ C =...] + 4_ || 199.50 201.00 | 274.50 276.00 | 829.50 829.50 | //: 64-bit, MSC_v9.00 [ C =...] + 8_ || 96.75 97.50 | 134.25 135.00 | 414.75 414.75 | //: 64-bit, MSC_v9.00 [ C =...] + 10_ || 78.60 79.80 | 109.80 109.80 | 331.20 331.80 | //: 64-bit, MSC_v9.00 [ C =...] + 16_ || 48.38 48.38 | 67.13 67.13 | 224.25 224.63 | //: 64-bit, MSC_v9.00 [ C =...] + 32_ || 26.63 26.81 | 36.38 36.38 | 112.31 112.31 | //: 64-bit, MSC_v9.00 [ C =...] + 64_ || 17.06 17.06 | 16.78 16.78 | 51.66 51.75 | //: 64-bit, MSC_v9.00 [ C =...] + 100_ || 16.74 16.80 | 15.54 15.54 | 33.30 33.30 | //: 64-bit, MSC_v9.00 [ C =...] + 128_ || 12.98 13.08 | 11.95 12.00 | 25.78 25.83 | //: 64-bit, MSC_v9.00 [ C =...] + 256_ || 10.99 10.99 | 9.19 9.21 | 19.03 19.03 | //: 64-bit, MSC_v9.00 [ C =...] + 512_ || 10.14 10.18 | 7.84 7.85 | 15.60 15.60 | //: 64-bit, MSC_v9.00 [ C =...] + 1000_ || 9.88 10.67 | 7.38 7.38 | 14.16 14.17 | //: 64-bit, MSC_v9.00 [ C =...] + 1024_ || 9.60 9.64 | 7.18 7.18 | 13.74 13.74 | //: 64-bit, MSC_v9.00 [ C =...] + 2048_ || 9.35 9.38 | 6.83 6.83 | 12.84 12.84 | //: 64-bit, MSC_v9.00 [ C =...] + 4096_ || 9.28 9.28 | 6.69 6.70 | 12.40 12.40 | //: 64-bit, MSC_v9.00 [ C =...] + 8192_ || 9.18 9.21 | 6.58 6.59 | 12.28 12.28 | //: 64-bit, MSC_v9.00 [ C =...] + 10000_ || 9.21 9.22 | 6.60 6.60 | 12.27 12.39 | //: 64-bit, MSC_v9.00 [ C =...] + 16384_ || 9.19 9.20 | 6.53 6.55 | 12.12 12.12 | //: 64-bit, MSC_v9.00 [ C =...] + 32768_ || 9.16 9.17 | 6.51 6.55 | 12.08 12.53 | //: 64-bit, MSC_v9.00 [ C =...] + 100000_ || 9.98 10.01 | 7.04 7.08 | 12.36 13.14 | //: 64-bit, MSC_v9.00 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =...] + Block || 2272 bytes | 4944 bytes | 15264 bytes | //: 64-bit, MSC_v9.00 [ C =...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:28:59,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 6204.00 6252.00 | 11058.00 11124.00 | 25662.00 25788.00 | //: 32-bit, BCC_v5.51 [ C =...] + 2_ || 3048.00 3060.00 | 5469.00 5481.00 | 12576.00 12672.00 | //: 32-bit, BCC_v5.51 [ C =...] + 4_ || 1515.00 1521.00 | 2731.50 2733.00 | 6303.00 6348.00 | //: 32-bit, BCC_v5.51 [ C =...] + 8_ || 756.75 760.50 | 1364.25 1367.25 | 3147.75 3162.75 | //: 32-bit, BCC_v5.51 [ C =...] + 10_ || 605.40 607.80 | 1092.60 1095.00 | 2541.60 2545.80 | //: 32-bit, BCC_v5.51 [ C =...] + 16_ || 379.50 380.62 | 682.88 683.25 | 1584.38 1590.00 | //: 32-bit, BCC_v5.51 [ C =...] + 32_ || 187.88 188.62 | 340.69 341.06 | 794.81 797.62 | //: 32-bit, BCC_v5.51 [ C =...] + 64_ || 138.19 138.28 | 169.69 169.78 | 420.75 420.84 | //: 32-bit, BCC_v5.51 [ C =...] + 100_ || 145.02 145.08 | 160.80 160.86 | 269.16 269.52 | //: 32-bit, BCC_v5.51 [ C =...] + 128_ || 112.92 112.92 | 125.39 125.44 | 210.00 210.23 | //: 32-bit, BCC_v5.51 [ C =...] + 256_ || 100.27 100.29 | 103.08 103.08 | 156.33 156.42 | //: 32-bit, BCC_v5.51 [ C =...] + 512_ || 93.98 94.00 | 91.90 91.91 | 129.40 129.41 | //: 32-bit, BCC_v5.51 [ C =...] + 1000_ || 93.02 93.02 | 88.44 88.44 | 118.61 118.69 | //: 32-bit, BCC_v5.51 [ C =...] + 1024_ || 90.80 90.81 | 86.34 86.34 | 115.78 115.86 | //: 32-bit, BCC_v5.51 [ C =...] + 2048_ || 89.22 89.22 | 77.11 83.54 | 95.12 95.16 | //: 32-bit, BCC_v5.51 [ C =...] + 4096_ || 81.62 81.62 | 75.83 75.83 | 92.17 100.45 | //: 32-bit, BCC_v5.51 [ C =...] + 8192_ || 81.80 88.11 | 75.18 78.19 | 90.69 92.09 | //: 32-bit, BCC_v5.51 [ C =...] + 10000_ || 81.32 84.70 | 76.65 78.80 | 92.85 94.82 | //: 32-bit, BCC_v5.51 [ C =...] + 16384_ || 83.13 83.59 | 76.92 77.00 | 92.05 93.27 | //: 32-bit, BCC_v5.51 [ C =...] + 32768_ || 83.07 84.01 | 76.76 77.91 | 92.12 94.08 | //: 32-bit, BCC_v5.51 [ C =...] + 100000_ || 83.48 84.08 | 77.08 78.59 | 93.38 102.21 | //: 32-bit, BCC_v5.51 [ C =...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [ C =...] + Block || 10732 bytes | 20964 bytes | 45988 bytes | //: 32-bit, BCC_v5.51 [ C =...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:07,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2592.00 2604.00 | 4848.00 4854.00 | 22278.00 22284.00 | //: 32-bit, BCC_v5.51 [asm=...] + 2_ || 1287.00 1293.00 | 2430.00 2430.00 | 11139.00 11139.00 | //: 32-bit, BCC_v5.51 [asm=...] + 4_ || 637.50 639.00 | 1213.50 1213.50 | 5565.00 5566.50 | //: 32-bit, BCC_v5.51 [asm=...] + 8_ || 318.75 319.50 | 606.75 606.75 | 2782.50 2783.25 | //: 32-bit, BCC_v5.51 [asm=...] + 10_ || 255.60 255.60 | 486.00 486.60 | 2228.40 2228.40 | //: 32-bit, BCC_v5.51 [asm=...] + 16_ || 159.75 159.75 | 301.88 302.25 | 1391.25 1391.62 | //: 32-bit, BCC_v5.51 [asm=...] + 32_ || 78.75 78.75 | 151.31 151.31 | 695.44 695.62 | //: 32-bit, BCC_v5.51 [asm=...] + 64_ || 55.69 57.28 | 74.81 74.91 | 347.81 347.81 | //: 32-bit, BCC_v5.51 [asm=...] + 100_ || 57.42 57.48 | 69.84 69.90 | 222.60 222.60 | //: 32-bit, BCC_v5.51 [asm=...] + 128_ || 44.53 44.58 | 54.38 54.38 | 173.67 173.67 | //: 32-bit, BCC_v5.51 [asm=...] + 256_ || 38.55 38.55 | 43.99 44.02 | 129.05 129.05 | //: 32-bit, BCC_v5.51 [asm=...] + 512_ || 35.60 35.60 | 38.66 38.67 | 106.62 106.62 | //: 32-bit, BCC_v5.51 [asm=...] + 1000_ || 34.89 34.89 | 37.18 37.18 | 97.72 97.72 | //: 32-bit, BCC_v5.51 [asm=...] + 1024_ || 34.23 34.98 | 35.85 35.86 | 95.40 95.40 | //: 32-bit, BCC_v5.51 [asm=...] + 2048_ || 33.86 33.86 | 34.66 34.66 | 89.79 89.80 | //: 32-bit, BCC_v5.51 [asm=...] + 4096_ || 33.22 33.59 | 33.92 34.26 | 86.99 86.99 | //: 32-bit, BCC_v5.51 [asm=...] + 8192_ || 33.11 33.11 | 33.80 33.92 | 74.64 77.45 | //: 32-bit, BCC_v5.51 [asm=...] + 10000_ || 31.46 33.40 | 31.80 32.58 | 78.61 78.66 | //: 32-bit, BCC_v5.51 [asm=...] + 16384_ || 31.49 32.78 | 32.16 33.51 | 76.07 76.13 | //: 32-bit, BCC_v5.51 [asm=...] + 32768_ || 32.01 32.58 | 32.74 33.18 | 75.73 76.02 | //: 32-bit, BCC_v5.51 [asm=...] + 100000_ || 32.23 32.45 | 33.33 61.75 | 84.30 85.34 | //: 32-bit, BCC_v5.51 [asm=...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [asm=...] + Block || 7588 bytes | 16636 bytes | 38262 bytes | //: 32-bit, BCC_v5.51 [asm=...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:12,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2484.00 2490.00 | 4830.00 4836.00 | 22182.00 22188.00 | //: 32-bit, MSC_v9.00 [asm=...] + 2_ || 1254.00 1254.00 | 2415.00 2415.00 | 11091.00 11091.00 | //: 32-bit, MSC_v9.00 [asm=...] + 4_ || 627.00 627.00 | 1207.50 1207.50 | 5545.50 5545.50 | //: 32-bit, MSC_v9.00 [asm=...] + 8_ || 313.50 313.50 | 603.00 603.75 | 2390.25 2478.00 | //: 32-bit, MSC_v9.00 [asm=...] + 10_ || 250.20 252.00 | 485.40 488.40 | 1936.80 1959.00 | //: 32-bit, MSC_v9.00 [asm=...] + 16_ || 156.00 156.75 | 301.50 301.50 | 1386.00 1386.00 | //: 32-bit, MSC_v9.00 [asm=...] + 32_ || 77.81 77.81 | 150.94 151.31 | 692.81 692.81 | //: 32-bit, MSC_v9.00 [asm=...] + 64_ || 56.34 56.34 | 74.81 74.81 | 343.78 346.41 | //: 32-bit, MSC_v9.00 [asm=...] + 100_ || 58.62 58.68 | 70.74 70.80 | 221.76 221.76 | //: 32-bit, MSC_v9.00 [asm=...] + 128_ || 45.47 45.47 | 55.08 55.08 | 168.94 173.02 | //: 32-bit, MSC_v9.00 [asm=...] + 256_ || 40.10 40.10 | 44.95 44.95 | 128.88 128.88 | //: 32-bit, MSC_v9.00 [asm=...] + 512_ || 37.49 37.55 | 39.94 39.94 | 92.99 92.99 | //: 32-bit, MSC_v9.00 [asm=...] + 1000_ || 34.12 34.16 | 35.44 35.44 | 85.27 85.31 | //: 32-bit, MSC_v9.00 [asm=...] + 1024_ || 33.30 33.30 | 34.58 34.59 | 83.24 83.25 | //: 32-bit, MSC_v9.00 [asm=...] + 2048_ || 32.70 32.70 | 36.20 36.20 | 89.82 89.82 | //: 32-bit, MSC_v9.00 [asm=...] + 4096_ || 35.09 35.09 | 35.50 35.57 | 87.04 87.05 | //: 32-bit, MSC_v9.00 [asm=...] + 8192_ || 34.83 35.38 | 35.12 35.64 | 76.07 84.71 | //: 32-bit, MSC_v9.00 [asm=...] + 10000_ || 34.78 34.98 | 35.36 35.36 | 86.31 86.35 | //: 32-bit, MSC_v9.00 [asm=...] + 16384_ || 34.76 34.80 | 35.07 35.36 | 80.55 85.21 | //: 32-bit, MSC_v9.00 [asm=...] + 32768_ || 32.88 33.17 | 33.06 33.37 | 75.87 76.15 | //: 32-bit, MSC_v9.00 [asm=...] + 100000_ || 32.96 33.40 | 33.29 33.60 | 75.79 76.81 | //: 32-bit, MSC_v9.00 [asm=...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=...] + Block || 7588 bytes | 16636 bytes | 38262 bytes | //: 32-bit, MSC_v9.00 [asm=...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:17,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2490.00 2496.00 | 4824.00 4836.00 | 22332.00 22356.00 | //: 32-bit, GCC_v3.42 [asm=...] + 2_ || 1251.00 1260.00 | 2412.00 2415.00 | 11157.00 11166.00 | //: 32-bit, GCC_v3.42 [asm=...] + 4_ || 621.00 622.50 | 1204.50 1204.50 | 5571.00 5572.50 | //: 32-bit, GCC_v3.42 [asm=...] + 8_ || 310.50 311.25 | 602.25 602.25 | 2785.50 2786.25 | //: 32-bit, GCC_v3.42 [asm=...] + 10_ || 249.00 249.60 | 482.40 482.40 | 2233.20 2233.80 | //: 32-bit, GCC_v3.42 [asm=...] + 16_ || 155.25 155.63 | 300.75 301.50 | 1393.88 1393.88 | //: 32-bit, GCC_v3.42 [asm=...] + 32_ || 76.50 77.06 | 151.31 151.31 | 696.38 696.56 | //: 32-bit, GCC_v3.42 [asm=...] + 64_ || 55.78 56.06 | 75.19 75.19 | 348.19 348.19 | //: 32-bit, GCC_v3.42 [asm=...] + 100_ || 58.32 58.44 | 70.80 70.80 | 222.96 222.96 | //: 32-bit, GCC_v3.42 [asm=...] + 128_ || 45.14 45.52 | 55.08 55.13 | 173.72 173.77 | //: 32-bit, GCC_v3.42 [asm=...] + 256_ || 40.03 40.13 | 44.91 44.93 | 129.33 129.33 | //: 32-bit, GCC_v3.42 [asm=...] + 512_ || 37.38 37.50 | 39.77 39.79 | 106.58 106.66 | //: 32-bit, GCC_v3.42 [asm=...] + 1000_ || 36.94 37.03 | 38.19 38.19 | 97.66 97.69 | //: 32-bit, GCC_v3.42 [asm=...] + 1024_ || 35.75 36.13 | 37.24 37.24 | 95.29 95.32 | //: 32-bit, GCC_v3.42 [asm=...] + 2048_ || 35.36 35.44 | 35.94 35.94 | 88.77 89.67 | //: 32-bit, GCC_v3.42 [asm=...] + 4096_ || 35.02 35.02 | 35.31 35.38 | 77.07 86.35 | //: 32-bit, GCC_v3.42 [asm=...] + 8192_ || 32.18 32.20 | 32.30 32.31 | 74.72 77.04 | //: 32-bit, GCC_v3.42 [asm=...] + 10000_ || 32.28 32.34 | 32.41 32.43 | 78.36 78.77 | //: 32-bit, GCC_v3.42 [asm=...] + 16384_ || 32.16 33.29 | 32.20 33.16 | 76.16 78.39 | //: 32-bit, GCC_v3.42 [asm=...] + 32768_ || 33.20 33.70 | 33.22 33.33 | 75.84 76.10 | //: 32-bit, GCC_v3.42 [asm=...] + 100000_ || 33.26 33.96 | 33.11 33.41 | 75.75 76.29 | //: 32-bit, GCC_v3.42 [asm=...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [asm=...] + Block || 7588 bytes | 16636 bytes | 38262 bytes | //: 32-bit, GCC_v3.42 [asm=...] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:29:22,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 672.00 672.00 | 1068.00 1068.00 | 1920.00 1926.00 | //: 64-bit, MSC_v9.00 [asm=...] + 2_ || 336.00 336.00 | 534.00 534.00 | 963.00 963.00 | //: 64-bit, MSC_v9.00 [asm=...] + 4_ || 166.50 168.00 | 267.00 267.00 | 481.50 483.00 | //: 64-bit, MSC_v9.00 [asm=...] + 8_ || 81.00 81.00 | 130.50 131.25 | 240.00 240.75 | //: 64-bit, MSC_v9.00 [asm=...] + 10_ || 64.80 65.40 | 107.40 108.00 | 192.00 192.60 | //: 64-bit, MSC_v9.00 [asm=...] + 16_ || 40.13 40.13 | 65.63 65.63 | 120.00 120.00 | //: 64-bit, MSC_v9.00 [asm=...] + 32_ || 20.06 20.06 | 32.81 32.81 | 59.63 59.81 | //: 64-bit, MSC_v9.00 [asm=...] + 64_ || 14.25 14.34 | 16.31 16.31 | 32.44 32.44 | //: 64-bit, MSC_v9.00 [asm=...] + 100_ || 15.54 15.60 | 16.20 16.26 | 21.06 21.06 | //: 64-bit, MSC_v9.00 [asm=...] + 128_ || 11.81 11.86 | 11.44 11.48 | 14.86 14.86 | //: 64-bit, MSC_v9.00 [asm=...] + 256_ || 9.28 9.28 | 8.81 8.81 | 10.83 10.83 | //: 64-bit, MSC_v9.00 [asm=...] + 512_ || 8.43 8.43 | 7.46 7.46 | 8.66 8.66 | //: 64-bit, MSC_v9.00 [asm=...] + 1000_ || 8.18 8.18 | 6.97 6.97 | 7.77 7.78 | //: 64-bit, MSC_v9.00 [asm=...] + 1024_ || 7.98 8.50 | 6.81 7.38 | 7.58 7.58 | //: 64-bit, MSC_v9.00 [asm=...] + 2048_ || 7.75 7.75 | 6.47 6.47 | 7.05 7.05 | //: 64-bit, MSC_v9.00 [asm=...] + 4096_ || 7.65 7.65 | 6.30 6.30 | 6.78 6.78 | //: 64-bit, MSC_v9.00 [asm=...] + 8192_ || 7.59 7.59 | 6.21 6.21 | 6.64 6.64 | //: 64-bit, MSC_v9.00 [asm=...] + 10000_ || 7.59 7.59 | 6.23 6.23 | 6.69 6.69 | //: 64-bit, MSC_v9.00 [asm=...] + 16384_ || 7.57 7.57 | 6.17 6.17 | 6.57 6.57 | //: 64-bit, MSC_v9.00 [asm=...] + 32768_ || 7.55 7.56 | 6.15 6.15 | 6.53 6.55 | //: 64-bit, MSC_v9.00 [asm=...] + 100000_ || 7.55 7.71 | 6.14 6.38 | 6.56 6.86 | //: 64-bit, MSC_v9.00 [asm=...] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=...] + Block || 2323 bytes | 4733 bytes | 11817 bytes | //: 64-bit, MSC_v9.00 [asm=...] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:24,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 4728.00 4728.00 | 8352.00 8352.00 | 20034.00 20040.00 | //: 32-bit, GCC_v3.42 [ C =111] + 2_ || 2370.00 2370.00 | 4179.00 4179.00 | 9261.00 9264.00 | //: 32-bit, GCC_v3.42 [ C =111] + 4_ || 1092.00 1096.50 | 1924.50 1926.00 | 4624.50 4624.50 | //: 32-bit, GCC_v3.42 [ C =111] + 8_ || 544.50 545.25 | 1040.25 1047.75 | 2312.25 2313.00 | //: 32-bit, GCC_v3.42 [ C =111] + 10_ || 436.20 436.80 | 768.60 769.20 | 1852.20 1852.20 | //: 32-bit, GCC_v3.42 [ C =111] + 16_ || 272.63 273.00 | 480.38 519.38 | 1156.88 1157.25 | //: 32-bit, GCC_v3.42 [ C =111] + 32_ || 135.94 135.94 | 240.56 240.75 | 579.00 579.00 | //: 32-bit, GCC_v3.42 [ C =111] + 64_ || 100.88 101.53 | 129.75 129.84 | 289.59 289.69 | //: 32-bit, GCC_v3.42 [ C =111] + 100_ || 106.44 106.44 | 113.94 114.18 | 185.46 200.94 | //: 32-bit, GCC_v3.42 [ C =111] + 128_ || 83.06 83.06 | 89.11 89.11 | 144.61 144.61 | //: 32-bit, GCC_v3.42 [ C =111] + 256_ || 73.83 79.99 | 73.34 79.45 | 107.55 107.55 | //: 32-bit, GCC_v3.42 [ C =111] + 512_ || 69.16 69.18 | 65.32 65.39 | 88.89 88.92 | //: 32-bit, GCC_v3.42 [ C =111] + 1000_ || 68.45 68.45 | 62.84 62.92 | 81.36 81.38 | //: 32-bit, GCC_v3.42 [ C =111] + 1024_ || 66.83 66.86 | 61.34 61.34 | 79.42 79.43 | //: 32-bit, GCC_v3.42 [ C =111] + 2048_ || 65.67 65.73 | 59.33 59.33 | 74.70 74.71 | //: 32-bit, GCC_v3.42 [ C =111] + 4096_ || 65.08 65.15 | 58.33 58.33 | 72.33 72.34 | //: 32-bit, GCC_v3.42 [ C =111] + 8192_ || 65.76 70.08 | 62.66 62.66 | 77.08 77.15 | //: 32-bit, GCC_v3.42 [ C =111] + 10000_ || 70.01 70.33 | 62.84 62.92 | 77.70 77.70 | //: 32-bit, GCC_v3.42 [ C =111] + 16384_ || 69.93 70.32 | 62.63 62.71 | 72.64 72.73 | //: 32-bit, GCC_v3.42 [ C =111] + 32768_ || 69.31 69.90 | 58.90 59.54 | 73.37 76.24 | //: 32-bit, GCC_v3.42 [ C =111] + 100000_ || 67.54 70.40 | 59.09 59.39 | 72.65 73.26 | //: 32-bit, GCC_v3.42 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [ C =111] + Block || 2928 bytes | 5568 bytes | 11712 bytes | //: 32-bit, GCC_v3.42 [ C =111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:31,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2952.00 2958.00 | 6030.00 6036.00 | 13668.00 13674.00 | //: 32-bit, MSC_v9.00 [ C =111] + 2_ || 1476.00 1476.00 | 3015.00 3015.00 | 6831.00 6834.00 | //: 32-bit, MSC_v9.00 [ C =111] + 4_ || 738.00 739.50 | 1507.50 1507.50 | 3415.50 3415.50 | //: 32-bit, MSC_v9.00 [ C =111] + 8_ || 369.00 369.75 | 751.50 751.50 | 1707.00 1707.00 | //: 32-bit, MSC_v9.00 [ C =111] + 10_ || 295.80 295.80 | 603.00 603.60 | 1366.80 1366.80 | //: 32-bit, MSC_v9.00 [ C =111] + 16_ || 184.88 185.25 | 376.50 376.50 | 855.38 855.38 | //: 32-bit, MSC_v9.00 [ C =111] + 32_ || 91.31 91.50 | 188.44 188.63 | 427.50 427.50 | //: 32-bit, MSC_v9.00 [ C =111] + 64_ || 66.56 66.66 | 93.84 93.84 | 213.56 213.66 | //: 32-bit, MSC_v9.00 [ C =111] + 100_ || 69.96 70.02 | 88.98 89.04 | 136.92 137.52 | //: 32-bit, MSC_v9.00 [ C =111] + 128_ || 54.14 54.23 | 69.52 69.75 | 106.69 106.88 | //: 32-bit, MSC_v9.00 [ C =111] + 256_ || 47.70 47.77 | 57.12 57.19 | 79.24 79.29 | //: 32-bit, MSC_v9.00 [ C =111] + 512_ || 44.46 44.54 | 50.75 50.81 | 65.52 65.55 | //: 32-bit, MSC_v9.00 [ C =111] + 1000_ || 43.90 43.96 | 48.78 48.85 | 60.08 60.11 | //: 32-bit, MSC_v9.00 [ C =111] + 1024_ || 42.83 42.87 | 47.44 47.65 | 58.49 58.51 | //: 32-bit, MSC_v9.00 [ C =111] + 2048_ || 42.17 42.17 | 45.83 45.83 | 55.01 55.16 | //: 32-bit, MSC_v9.00 [ C =111] + 4096_ || 41.76 41.76 | 45.02 45.03 | 53.27 53.44 | //: 32-bit, MSC_v9.00 [ C =111] + 8192_ || 38.35 41.55 | 41.20 41.26 | 48.54 51.37 | //: 32-bit, MSC_v9.00 [ C =111] + 10000_ || 41.53 41.59 | 44.76 44.80 | 53.01 53.01 | //: 32-bit, MSC_v9.00 [ C =111] + 16384_ || 41.38 41.69 | 44.43 44.47 | 52.00 52.07 | //: 32-bit, MSC_v9.00 [ C =111] + 32768_ || 41.36 41.38 | 44.43 44.44 | 51.86 52.07 | //: 32-bit, MSC_v9.00 [ C =111] + 100000_ || 41.32 41.60 | 44.52 44.62 | 51.75 51.92 | //: 32-bit, MSC_v9.00 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =111] + Block || 1712 bytes | 3664 bytes | 7200 bytes | //: 32-bit, MSC_v9.00 [ C =111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:36,Oct 7 2008 by 'MSC_v6.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 9150.00 9156.00 | 16794.00 16836.00 | 36456.00 37386.00 | //: 32-bit, MSC_v6.00 [ C =111] + 2_ || 4335.00 4335.00 | 8244.00 8352.00 | 18156.00 18246.00 | //: 32-bit, MSC_v6.00 [ C =111] + 4_ || 2167.50 2167.50 | 4117.50 4201.50 | 9031.50 9060.00 | //: 32-bit, MSC_v6.00 [ C =111] + 8_ || 1083.00 1083.75 | 2122.50 2125.50 | 4515.00 4611.00 | //: 32-bit, MSC_v6.00 [ C =111] + 10_ || 874.80 874.80 | 1683.60 1695.60 | 3621.00 3705.00 | //: 32-bit, MSC_v6.00 [ C =111] + 16_ || 541.50 541.88 | 1041.38 1042.50 | 2268.38 2274.00 | //: 32-bit, MSC_v6.00 [ C =111] + 32_ || 271.88 272.25 | 515.63 526.13 | 1133.81 1139.06 | //: 32-bit, MSC_v6.00 [ C =111] + 64_ || 201.00 201.09 | 259.59 263.72 | 567.47 569.34 | //: 32-bit, MSC_v6.00 [ C =111] + 100_ || 211.92 211.98 | 250.32 251.10 | 363.06 363.60 | //: 32-bit, MSC_v6.00 [ C =111] + 128_ || 166.78 167.11 | 196.73 198.28 | 283.45 284.20 | //: 32-bit, MSC_v6.00 [ C =111] + 256_ || 147.94 147.94 | 160.57 160.71 | 212.18 212.72 | //: 32-bit, MSC_v6.00 [ C =111] + 512_ || 139.32 139.37 | 143.68 143.70 | 175.95 176.36 | //: 32-bit, MSC_v6.00 [ C =111] + 1000_ || 138.17 138.18 | 140.23 140.80 | 168.46 168.46 | //: 32-bit, MSC_v6.00 [ C =111] + 1024_ || 134.92 134.92 | 135.90 136.72 | 164.48 164.48 | //: 32-bit, MSC_v6.00 [ C =111] + 2048_ || 132.76 132.76 | 132.19 132.25 | 154.34 155.67 | //: 32-bit, MSC_v6.00 [ C =111] + 4096_ || 131.66 131.74 | 132.76 133.34 | 149.64 150.49 | //: 32-bit, MSC_v6.00 [ C =111] + 8192_ || 131.21 135.88 | 120.29 124.46 | 142.16 147.73 | //: 32-bit, MSC_v6.00 [ C =111] + 10000_ || 124.18 125.11 | 123.38 125.46 | 139.12 140.88 | //: 32-bit, MSC_v6.00 [ C =111] + 16384_ || 124.27 130.94 | 122.04 127.55 | 137.91 146.32 | //: 32-bit, MSC_v6.00 [ C =111] + 32768_ || 123.57 128.76 | 120.72 121.97 | 138.10 140.89 | //: 32-bit, MSC_v6.00 [ C =111] + 100000_ || 123.30 129.21 | 123.83 125.61 | 145.19 145.40 | //: 32-bit, MSC_v6.00 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1486 bytes | 1348 bytes | 1445 bytes | //: 32-bit, MSC_v6.00 [ C =111] + Block || 2435 bytes | 5119 bytes | 8894 bytes | //: 32-bit, MSC_v6.00 [ C =111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:48,Oct 7 2008 by 'MSC_v4.20', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5418.00 5430.00 | 9450.00 9462.00 | 20436.00 20472.00 | //: 32-bit, MSC_v4.20 [ C =111] + 2_ || 2709.00 2721.00 | 4725.00 4731.00 | 10212.00 10245.00 | //: 32-bit, MSC_v4.20 [ C =111] + 4_ || 1351.50 1354.50 | 2359.50 2361.00 | 5097.00 5107.50 | //: 32-bit, MSC_v4.20 [ C =111] + 8_ || 675.00 678.75 | 1179.75 1179.75 | 2549.25 2552.25 | //: 32-bit, MSC_v4.20 [ C =111] + 10_ || 540.60 546.60 | 943.20 944.40 | 2041.20 2041.80 | //: 32-bit, MSC_v4.20 [ C =111] + 16_ || 337.88 338.25 | 589.50 589.50 | 1273.88 1275.38 | //: 32-bit, MSC_v4.20 [ C =111] + 32_ || 167.81 167.81 | 294.94 295.13 | 636.75 637.13 | //: 32-bit, MSC_v4.20 [ C =111] + 64_ || 124.41 124.41 | 147.19 147.84 | 318.28 318.47 | //: 32-bit, MSC_v4.20 [ C =111] + 100_ || 131.46 131.52 | 140.10 140.28 | 203.76 203.94 | //: 32-bit, MSC_v4.20 [ C =111] + 128_ || 102.42 102.47 | 109.22 109.41 | 159.05 159.38 | //: 32-bit, MSC_v4.20 [ C =111] + 256_ || 91.10 91.27 | 90.59 90.59 | 118.73 118.78 | //: 32-bit, MSC_v4.20 [ C =111] + 512_ || 85.43 85.43 | 80.78 80.79 | 98.43 98.48 | //: 32-bit, MSC_v4.20 [ C =111] + 1000_ || 84.56 84.56 | 77.74 77.75 | 90.24 90.28 | //: 32-bit, MSC_v4.20 [ C =111] + 1024_ || 82.55 82.55 | 75.83 75.83 | 88.15 88.19 | //: 32-bit, MSC_v4.20 [ C =111] + 2048_ || 81.07 81.07 | 73.35 73.36 | 83.00 83.02 | //: 32-bit, MSC_v4.20 [ C =111] + 4096_ || 80.34 80.36 | 72.12 72.13 | 80.42 80.44 | //: 32-bit, MSC_v4.20 [ C =111] + 8192_ || 79.97 80.54 | 71.56 71.64 | 79.11 79.62 | //: 32-bit, MSC_v4.20 [ C =111] + 10000_ || 75.11 80.03 | 66.25 69.37 | 73.59 74.99 | //: 32-bit, MSC_v4.20 [ C =111] + 16384_ || 75.57 80.04 | 67.66 71.51 | 74.32 74.42 | //: 32-bit, MSC_v4.20 [ C =111] + 32768_ || 75.61 80.15 | 67.03 67.84 | 74.04 78.41 | //: 32-bit, MSC_v4.20 [ C =111] + 100000_ || 77.96 80.31 | 67.58 67.84 | 74.31 74.73 | //: 32-bit, MSC_v4.20 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1152 bytes | 1024 bytes | 1088 bytes | //: 32-bit, MSC_v4.20 [ C =111] + Block || 2064 bytes | 3840 bytes | 7616 bytes | //: 32-bit, MSC_v4.20 [ C =111] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:29:54,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 780.00 786.00 | 1422.00 1434.00 | 3810.00 3816.00 | //: 64-bit, MSC_v9.00 [ C =111] + 2_ || 384.00 390.00 | 705.00 708.00 | 1902.00 1902.00 | //: 64-bit, MSC_v9.00 [ C =111] + 4_ || 193.50 193.50 | 355.50 355.50 | 951.00 952.50 | //: 64-bit, MSC_v9.00 [ C =111] + 8_ || 93.75 93.75 | 171.00 171.75 | 474.75 475.50 | //: 64-bit, MSC_v9.00 [ C =111] + 10_ || 75.60 76.20 | 140.40 140.40 | 380.40 381.00 | //: 64-bit, MSC_v9.00 [ C =111] + 16_ || 51.38 51.38 | 93.00 93.00 | 257.25 257.63 | //: 64-bit, MSC_v9.00 [ C =111] + 32_ || 25.31 25.31 | 46.50 46.50 | 118.69 118.69 | //: 64-bit, MSC_v9.00 [ C =111] + 64_ || 16.69 16.69 | 21.38 21.38 | 59.53 59.53 | //: 64-bit, MSC_v9.00 [ C =111] + 100_ || 17.16 17.22 | 20.52 21.00 | 38.22 38.28 | //: 64-bit, MSC_v9.00 [ C =111] + 128_ || 13.27 13.27 | 15.80 15.80 | 29.63 29.67 | //: 64-bit, MSC_v9.00 [ C =111] + 256_ || 11.16 11.18 | 12.61 12.73 | 22.10 22.10 | //: 64-bit, MSC_v9.00 [ C =111] + 512_ || 10.05 10.07 | 11.00 11.07 | 18.18 19.68 | //: 64-bit, MSC_v9.00 [ C =111] + 1000_ || 9.69 9.69 | 10.42 10.42 | 16.51 16.51 | //: 64-bit, MSC_v9.00 [ C =111] + 1024_ || 9.44 9.44 | 10.18 10.18 | 16.11 16.12 | //: 64-bit, MSC_v9.00 [ C =111] + 2048_ || 9.21 9.21 | 9.62 9.62 | 15.06 15.06 | //: 64-bit, MSC_v9.00 [ C =111] + 4096_ || 9.10 9.10 | 9.36 9.37 | 14.55 14.55 | //: 64-bit, MSC_v9.00 [ C =111] + 8192_ || 8.97 8.97 | 9.20 9.21 | 14.48 14.66 | //: 64-bit, MSC_v9.00 [ C =111] + 10000_ || 8.97 8.97 | 9.38 9.38 | 14.38 14.40 | //: 64-bit, MSC_v9.00 [ C =111] + 16384_ || 8.95 9.01 | 9.26 9.26 | 14.16 14.29 | //: 64-bit, MSC_v9.00 [ C =111] + 32768_ || 8.90 9.24 | 9.18 9.18 | 14.46 14.75 | //: 64-bit, MSC_v9.00 [ C =111] + 100000_ || 9.18 9.71 | 9.35 9.49 | 14.79 14.99 | //: 64-bit, MSC_v9.00 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =111] + Block || 704 bytes | 1456 bytes | 2976 bytes | //: 64-bit, MSC_v9.00 [ C =111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:29:57,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 6420.00 6420.00 | 11040.00 11040.00 | 23358.00 23364.00 | //: 32-bit, BCC_v5.51 [ C =111] + 2_ || 3210.00 3210.00 | 5517.00 5520.00 | 11679.00 11682.00 | //: 32-bit, BCC_v5.51 [ C =111] + 4_ || 1605.00 1605.00 | 2758.50 2758.50 | 5832.00 5833.50 | //: 32-bit, BCC_v5.51 [ C =111] + 8_ || 802.50 802.50 | 1379.25 1379.25 | 2916.00 2916.75 | //: 32-bit, BCC_v5.51 [ C =111] + 10_ || 642.00 642.00 | 1103.40 1103.40 | 2335.80 2335.80 | //: 32-bit, BCC_v5.51 [ C =111] + 16_ || 400.88 401.25 | 689.25 689.62 | 1458.00 1458.00 | //: 32-bit, BCC_v5.51 [ C =111] + 32_ || 199.50 199.50 | 344.44 344.44 | 729.00 729.00 | //: 32-bit, BCC_v5.51 [ C =111] + 64_ || 146.06 146.25 | 171.66 172.50 | 364.41 364.50 | //: 32-bit, BCC_v5.51 [ C =111] + 100_ || 152.28 152.28 | 162.78 162.78 | 233.16 233.16 | //: 32-bit, BCC_v5.51 [ C =111] + 128_ || 118.69 118.69 | 126.89 126.89 | 181.88 181.88 | //: 32-bit, BCC_v5.51 [ C =111] + 256_ || 104.62 104.62 | 104.48 104.48 | 135.30 135.33 | //: 32-bit, BCC_v5.51 [ C =111] + 512_ || 97.50 97.50 | 93.13 93.14 | 112.00 112.00 | //: 32-bit, BCC_v5.51 [ C =111] + 1000_ || 96.26 96.26 | 89.53 89.54 | 102.70 102.71 | //: 32-bit, BCC_v5.51 [ C =111] + 1024_ || 93.91 93.91 | 87.40 87.40 | 100.27 100.27 | //: 32-bit, BCC_v5.51 [ C =111] + 2048_ || 92.14 92.14 | 84.56 84.56 | 94.38 94.39 | //: 32-bit, BCC_v5.51 [ C =111] + 4096_ || 91.28 91.28 | 76.72 83.12 | 84.42 86.14 | //: 32-bit, BCC_v5.51 [ C =111] + 8192_ || 83.85 86.88 | 76.06 80.17 | 83.06 87.27 | //: 32-bit, BCC_v5.51 [ C =111] + 10000_ || 83.92 87.25 | 76.30 83.56 | 86.42 87.19 | //: 32-bit, BCC_v5.51 [ C =111] + 16384_ || 85.71 87.12 | 77.78 77.82 | 84.43 84.51 | //: 32-bit, BCC_v5.51 [ C =111] + 32768_ || 85.60 86.59 | 77.64 78.17 | 84.32 84.94 | //: 32-bit, BCC_v5.51 [ C =111] + 100000_ || 86.18 87.75 | 78.03 79.63 | 84.77 88.79 | //: 32-bit, BCC_v5.51 [ C =111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [ C =111] + Block || 1888 bytes | 3028 bytes | 5864 bytes | //: 32-bit, BCC_v5.51 [ C =111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:04,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2664.00 2664.00 | 4998.00 4998.00 | 10704.00 10704.00 | //: 32-bit, BCC_v5.51 [asm=111] + 2_ || 1338.00 1338.00 | 2505.00 2508.00 | 5352.00 5352.00 | //: 32-bit, BCC_v5.51 [asm=111] + 4_ || 669.00 669.00 | 1246.50 1246.50 | 2668.50 2670.00 | //: 32-bit, BCC_v5.51 [asm=111] + 8_ || 334.50 334.50 | 623.25 623.25 | 1334.25 1334.25 | //: 32-bit, BCC_v5.51 [asm=111] + 10_ || 266.40 266.40 | 501.00 501.00 | 1058.40 1058.40 | //: 32-bit, BCC_v5.51 [asm=111] + 16_ || 166.50 166.50 | 312.75 321.00 | 628.50 629.25 | //: 32-bit, BCC_v5.51 [asm=111] + 32_ || 79.88 79.88 | 147.75 147.75 | 312.19 312.38 | //: 32-bit, BCC_v5.51 [asm=111] + 64_ || 56.53 56.53 | 73.22 73.22 | 156.09 156.09 | //: 32-bit, BCC_v5.51 [asm=111] + 100_ || 58.08 58.08 | 68.52 74.10 | 99.36 107.52 | //: 32-bit, BCC_v5.51 [asm=111] + 128_ || 45.19 45.23 | 53.20 53.20 | 77.81 77.81 | //: 32-bit, BCC_v5.51 [asm=111] + 256_ || 39.26 39.28 | 43.24 43.24 | 57.52 62.32 | //: 32-bit, BCC_v5.51 [asm=111] + 512_ || 36.13 36.13 | 37.76 37.77 | 47.17 47.24 | //: 32-bit, BCC_v5.51 [asm=111] + 1000_ || 35.51 35.71 | 36.22 36.23 | 42.92 43.04 | //: 32-bit, BCC_v5.51 [asm=111] + 1024_ || 34.51 34.51 | 34.78 35.12 | 42.05 42.05 | //: 32-bit, BCC_v5.51 [asm=111] + 2048_ || 33.69 33.70 | 33.82 33.83 | 38.84 39.04 | //: 32-bit, BCC_v5.51 [asm=111] + 4096_ || 32.01 33.99 | 33.64 33.64 | 37.82 37.97 | //: 32-bit, BCC_v5.51 [asm=111] + 8192_ || 31.77 32.58 | 32.80 33.00 | 36.98 37.59 | //: 32-bit, BCC_v5.51 [asm=111] + 10000_ || 33.75 33.75 | 33.13 33.25 | 37.32 37.86 | //: 32-bit, BCC_v5.51 [asm=111] + 16384_ || 31.90 36.52 | 35.86 35.90 | 37.26 40.33 | //: 32-bit, BCC_v5.51 [asm=111] + 32768_ || 34.29 34.47 | 33.87 34.03 | 37.77 38.04 | //: 32-bit, BCC_v5.51 [asm=111] + 100000_ || 33.20 34.48 | 33.75 33.91 | 37.98 38.23 | //: 32-bit, BCC_v5.51 [asm=111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [asm=111] + Block || 1276 bytes | 2532 bytes | 4983 bytes | //: 32-bit, BCC_v5.51 [asm=111] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:30:08,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2580.00 2598.00 | 4842.00 4848.00 | 10578.00 10602.00 | //: 32-bit, MSC_v9.00 [asm=111] + 2_ || 1299.00 1302.00 | 2445.00 2445.00 | 5277.00 5283.00 | //: 32-bit, MSC_v9.00 [asm=111] + 4_ || 648.00 648.00 | 1213.50 1215.00 | 2644.50 2649.00 | //: 32-bit, MSC_v9.00 [asm=111] + 8_ || 324.00 324.75 | 610.50 610.50 | 1322.25 1323.00 | //: 32-bit, MSC_v9.00 [asm=111] + 10_ || 259.80 259.80 | 484.20 484.20 | 1059.60 1060.20 | //: 32-bit, MSC_v9.00 [asm=111] + 16_ || 162.00 162.38 | 302.63 302.63 | 660.38 662.63 | //: 32-bit, MSC_v9.00 [asm=111] + 32_ || 80.81 81.00 | 141.56 141.56 | 308.63 308.63 | //: 32-bit, MSC_v9.00 [asm=111] + 64_ || 54.38 54.47 | 70.41 70.41 | 154.41 154.59 | //: 32-bit, MSC_v9.00 [asm=111] + 100_ || 57.18 57.24 | 66.42 66.48 | 98.40 98.46 | //: 32-bit, MSC_v9.00 [asm=111] + 128_ || 48.28 48.28 | 51.75 51.75 | 76.97 77.02 | //: 32-bit, MSC_v9.00 [asm=111] + 256_ || 39.05 39.05 | 42.45 42.47 | 56.95 56.95 | //: 32-bit, MSC_v9.00 [asm=111] + 512_ || 36.09 36.11 | 37.65 37.66 | 47.05 47.06 | //: 32-bit, MSC_v9.00 [asm=111] + 1000_ || 35.56 35.59 | 35.96 35.96 | 42.79 42.80 | //: 32-bit, MSC_v9.00 [asm=111] + 1024_ || 34.62 34.63 | 35.28 35.28 | 41.47 41.47 | //: 32-bit, MSC_v9.00 [asm=111] + 2048_ || 33.91 33.91 | 34.00 34.08 | 39.33 39.33 | //: 32-bit, MSC_v9.00 [asm=111] + 4096_ || 33.38 33.66 | 33.49 33.49 | 38.04 38.23 | //: 32-bit, MSC_v9.00 [asm=111] + 8192_ || 33.15 33.23 | 32.76 33.07 | 37.21 37.22 | //: 32-bit, MSC_v9.00 [asm=111] + 10000_ || 33.69 36.50 | 33.29 33.42 | 37.98 41.34 | //: 32-bit, MSC_v9.00 [asm=111] + 16384_ || 33.07 35.17 | 33.08 34.97 | 37.10 38.12 | //: 32-bit, MSC_v9.00 [asm=111] + 32768_ || 34.35 34.53 | 33.80 34.05 | 38.21 40.46 | //: 32-bit, MSC_v9.00 [asm=111] + 100000_ || 33.96 34.57 | 33.93 35.69 | 38.04 38.20 | //: 32-bit, MSC_v9.00 [asm=111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=111] + Block || 1276 bytes | 2532 bytes | 4983 bytes | //: 32-bit, MSC_v9.00 [asm=111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:13,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2514.00 2514.00 | 4836.00 4836.00 | 10392.00 10398.00 | //: 32-bit, GCC_v3.42 [asm=111] + 2_ || 1254.00 1260.00 | 2409.00 2412.00 | 5181.00 5184.00 | //: 32-bit, GCC_v3.42 [asm=111] + 4_ || 628.50 628.50 | 1204.50 1204.50 | 2596.50 2598.00 | //: 32-bit, GCC_v3.42 [asm=111] + 8_ || 312.75 312.75 | 602.25 603.00 | 1298.25 1299.00 | //: 32-bit, GCC_v3.42 [asm=111] + 10_ || 250.80 251.40 | 482.40 483.00 | 1035.00 1035.60 | //: 32-bit, GCC_v3.42 [asm=111] + 16_ || 157.50 157.50 | 302.25 302.63 | 652.50 652.50 | //: 32-bit, GCC_v3.42 [asm=111] + 32_ || 78.19 78.38 | 151.88 152.06 | 326.81 326.81 | //: 32-bit, GCC_v3.42 [asm=111] + 64_ || 57.09 57.19 | 75.47 75.47 | 163.31 163.41 | //: 32-bit, GCC_v3.42 [asm=111] + 100_ || 60.06 60.06 | 71.22 71.28 | 104.58 104.58 | //: 32-bit, GCC_v3.42 [asm=111] + 128_ || 46.83 46.88 | 55.45 55.50 | 81.33 81.38 | //: 32-bit, GCC_v3.42 [asm=111] + 256_ || 41.32 41.34 | 45.47 45.49 | 59.91 59.91 | //: 32-bit, GCC_v3.42 [asm=111] + 512_ || 38.51 38.52 | 40.16 40.16 | 49.49 49.49 | //: 32-bit, GCC_v3.42 [asm=111] + 1000_ || 37.92 37.93 | 38.60 38.60 | 45.40 45.41 | //: 32-bit, GCC_v3.42 [asm=111] + 1024_ || 37.08 37.08 | 37.93 38.33 | 45.24 45.25 | //: 32-bit, GCC_v3.42 [asm=111] + 2048_ || 36.55 36.56 | 36.88 36.88 | 42.42 42.49 | //: 32-bit, GCC_v3.42 [asm=111] + 4096_ || 35.77 35.77 | 33.56 37.02 | 37.73 39.65 | //: 32-bit, GCC_v3.42 [asm=111] + 8192_ || 32.68 34.17 | 33.13 33.19 | 38.41 40.47 | //: 32-bit, GCC_v3.42 [asm=111] + 10000_ || 35.92 36.59 | 35.00 36.14 | 37.65 39.24 | //: 32-bit, GCC_v3.42 [asm=111] + 16384_ || 33.37 34.20 | 32.77 33.93 | 36.86 37.94 | //: 32-bit, GCC_v3.42 [asm=111] + 32768_ || 34.22 34.41 | 33.82 34.06 | 37.39 37.74 | //: 32-bit, GCC_v3.42 [asm=111] + 100000_ || 34.23 34.34 | 33.81 34.20 | 37.34 37.86 | //: 32-bit, GCC_v3.42 [asm=111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [asm=111] + Block || 1276 bytes | 2532 bytes | 4983 bytes | //: 32-bit, GCC_v3.42 [asm=111] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:30:17,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 678.00 678.00 | 1098.00 1098.00 | 2034.00 2040.00 | //: 64-bit, MSC_v9.00 [asm=111] + 2_ || 339.00 339.00 | 546.00 546.00 | 1017.00 1020.00 | //: 64-bit, MSC_v9.00 [asm=111] + 4_ || 168.00 169.50 | 273.00 273.00 | 510.00 511.50 | //: 64-bit, MSC_v9.00 [asm=111] + 8_ || 81.75 82.50 | 134.25 134.25 | 254.25 255.00 | //: 64-bit, MSC_v9.00 [asm=111] + 10_ || 66.60 66.60 | 109.80 109.80 | 204.00 204.00 | //: 64-bit, MSC_v9.00 [asm=111] + 16_ || 40.88 40.88 | 66.75 67.13 | 127.50 127.50 | //: 64-bit, MSC_v9.00 [asm=111] + 32_ || 20.25 20.44 | 33.56 33.56 | 63.56 63.56 | //: 64-bit, MSC_v9.00 [asm=111] + 64_ || 14.91 15.00 | 16.50 16.50 | 31.69 31.69 | //: 64-bit, MSC_v9.00 [asm=111] + 100_ || 15.48 16.68 | 16.98 16.98 | 22.38 22.38 | //: 64-bit, MSC_v9.00 [asm=111] + 128_ || 12.80 12.80 | 12.94 12.94 | 15.84 15.89 | //: 64-bit, MSC_v9.00 [asm=111] + 256_ || 9.84 9.84 | 9.33 9.33 | 11.60 11.63 | //: 64-bit, MSC_v9.00 [asm=111] + 512_ || 8.75 8.79 | 8.53 8.57 | 9.36 9.38 | //: 64-bit, MSC_v9.00 [asm=111] + 1000_ || 8.45 8.45 | 7.93 7.93 | 8.39 8.39 | //: 64-bit, MSC_v9.00 [asm=111] + 1024_ || 8.25 8.25 | 7.14 7.14 | 8.19 8.19 | //: 64-bit, MSC_v9.00 [asm=111] + 2048_ || 8.00 8.00 | 6.77 7.33 | 7.58 7.58 | //: 64-bit, MSC_v9.00 [asm=111] + 4096_ || 7.88 7.88 | 6.58 6.58 | 7.29 7.29 | //: 64-bit, MSC_v9.00 [asm=111] + 8192_ || 7.81 7.81 | 6.49 6.49 | 7.13 7.15 | //: 64-bit, MSC_v9.00 [asm=111] + 10000_ || 7.81 7.81 | 6.50 6.50 | 7.18 7.18 | //: 64-bit, MSC_v9.00 [asm=111] + 16384_ || 7.79 7.79 | 6.42 6.42 | 7.04 7.04 | //: 64-bit, MSC_v9.00 [asm=111] + 32768_ || 7.77 7.77 | 6.40 6.40 | 7.03 7.03 | //: 64-bit, MSC_v9.00 [asm=111] + 100000_ || 8.08 8.09 | 6.40 6.71 | 6.98 7.21 | //: 64-bit, MSC_v9.00 [asm=111] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=111] + Block || 664 bytes | 1074 bytes | 2221 bytes | //: 64-bit, MSC_v9.00 [asm=111] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:19,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 4272.00 4296.00 | 7974.00 7980.00 | 17484.00 17496.00 | //: 32-bit, GCC_v3.42 [ C =332] + 2_ || 2139.00 2154.00 | 3981.00 3996.00 | 8736.00 8754.00 | //: 32-bit, GCC_v3.42 [ C =332] + 4_ || 1069.50 1071.00 | 1995.00 2002.50 | 4377.00 4378.50 | //: 32-bit, GCC_v3.42 [ C =332] + 8_ || 536.25 538.50 | 998.25 1000.50 | 2183.25 2186.25 | //: 32-bit, GCC_v3.42 [ C =332] + 10_ || 429.00 430.20 | 798.60 807.60 | 1749.60 1752.00 | //: 32-bit, GCC_v3.42 [ C =332] + 16_ || 267.75 270.00 | 498.00 499.88 | 1092.00 1093.13 | //: 32-bit, GCC_v3.42 [ C =332] + 32_ || 132.75 133.50 | 249.19 249.75 | 546.38 547.50 | //: 32-bit, GCC_v3.42 [ C =332] + 64_ || 98.44 99.00 | 123.94 124.03 | 272.25 272.34 | //: 32-bit, GCC_v3.42 [ C =332] + 100_ || 103.08 103.08 | 117.96 117.96 | 174.24 174.42 | //: 32-bit, GCC_v3.42 [ C =332] + 128_ || 80.72 121.13 | 92.34 133.22 | 137.06 137.39 | //: 32-bit, GCC_v3.42 [ C =332] + 256_ || 71.91 72.21 | 75.84 76.01 | 101.93 102.09 | //: 32-bit, GCC_v3.42 [ C =332] + 512_ || 67.50 67.59 | 67.62 67.75 | 83.95 84.47 | //: 32-bit, GCC_v3.42 [ C =332] + 1000_ || 66.71 67.00 | 64.95 65.28 | 77.12 77.20 | //: 32-bit, GCC_v3.42 [ C =332] + 1024_ || 64.89 64.96 | 63.19 63.23 | 74.67 74.67 | //: 32-bit, GCC_v3.42 [ C =332] + 2048_ || 63.35 63.36 | 61.13 61.14 | 70.19 70.19 | //: 32-bit, GCC_v3.42 [ C =332] + 4096_ || 62.80 62.80 | 60.11 60.12 | 62.58 62.65 | //: 32-bit, GCC_v3.42 [ C =332] + 8192_ || 57.83 59.01 | 55.02 60.12 | 66.75 66.86 | //: 32-bit, GCC_v3.42 [ C =332] + 10000_ || 62.69 62.87 | 59.76 59.87 | 67.20 67.63 | //: 32-bit, GCC_v3.42 [ C =332] + 16384_ || 62.50 62.75 | 55.96 59.35 | 62.39 63.28 | //: 32-bit, GCC_v3.42 [ C =332] + 32768_ || 58.66 59.69 | 56.17 56.62 | 61.97 63.07 | //: 32-bit, GCC_v3.42 [ C =332] + 100000_ || 59.31 59.99 | 56.30 57.46 | 62.79 63.27 | //: 32-bit, GCC_v3.42 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [ C =332] + Block || 6640 bytes | 13040 bytes | 18448 bytes | //: 32-bit, GCC_v3.42 [ C =332] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:30:25,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2988.00 2994.00 | 6240.00 6246.00 | 13794.00 13800.00 | //: 32-bit, MSC_v9.00 [ C =332] + 2_ || 1488.00 1503.00 | 3120.00 3126.00 | 6900.00 6903.00 | //: 32-bit, MSC_v9.00 [ C =332] + 4_ || 744.00 751.50 | 1560.00 1560.00 | 3445.50 3447.00 | //: 32-bit, MSC_v9.00 [ C =332] + 8_ || 372.00 372.75 | 777.75 779.25 | 1723.50 1723.50 | //: 32-bit, MSC_v9.00 [ C =332] + 10_ || 297.60 299.40 | 623.40 624.00 | 1379.40 1380.00 | //: 32-bit, MSC_v9.00 [ C =332] + 16_ || 186.38 186.38 | 389.25 389.63 | 861.75 861.75 | //: 32-bit, MSC_v9.00 [ C =332] + 32_ || 92.44 92.44 | 195.38 195.56 | 431.25 431.44 | //: 32-bit, MSC_v9.00 [ C =332] + 64_ || 67.59 67.78 | 97.03 97.13 | 215.53 215.63 | //: 32-bit, MSC_v9.00 [ C =332] + 100_ || 70.26 70.32 | 91.92 91.92 | 138.00 138.06 | //: 32-bit, MSC_v9.00 [ C =332] + 128_ || 54.98 55.08 | 71.44 71.48 | 107.58 107.58 | //: 32-bit, MSC_v9.00 [ C =332] + 256_ || 48.68 48.70 | 58.57 58.57 | 79.83 79.83 | //: 32-bit, MSC_v9.00 [ C =332] + 512_ || 45.43 45.46 | 52.22 52.23 | 65.98 66.01 | //: 32-bit, MSC_v9.00 [ C =332] + 1000_ || 44.88 44.89 | 50.20 50.20 | 60.44 60.45 | //: 32-bit, MSC_v9.00 [ C =332] + 1024_ || 43.81 43.81 | 48.98 48.99 | 59.00 59.00 | //: 32-bit, MSC_v9.00 [ C =332] + 2048_ || 43.00 43.00 | 47.36 47.37 | 55.50 55.50 | //: 32-bit, MSC_v9.00 [ C =332] + 4096_ || 42.59 42.59 | 46.56 46.57 | 53.75 53.75 | //: 32-bit, MSC_v9.00 [ C =332] + 8192_ || 42.38 42.39 | 46.16 46.16 | 52.87 52.87 | //: 32-bit, MSC_v9.00 [ C =332] + 10000_ || 42.42 42.42 | 46.30 46.31 | 53.29 53.31 | //: 32-bit, MSC_v9.00 [ C =332] + 16384_ || 42.28 42.60 | 45.96 46.75 | 52.45 52.52 | //: 32-bit, MSC_v9.00 [ C =332] + 32768_ || 42.25 42.36 | 45.84 45.85 | 52.30 52.32 | //: 32-bit, MSC_v9.00 [ C =332] + 100000_ || 42.21 42.50 | 43.60 45.77 | 49.55 50.03 | //: 32-bit, MSC_v9.00 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =332] + Block || 4560 bytes | 9232 bytes | 12560 bytes | //: 32-bit, MSC_v9.00 [ C =332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:31,Oct 7 2008 by 'MSC_v6.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 9054.00 9060.00 | 17406.00 17514.00 | 36888.00 37032.00 | //: 32-bit, MSC_v6.00 [ C =332] + 2_ || 4341.00 4341.00 | 9129.00 9168.00 | 18273.00 18423.00 | //: 32-bit, MSC_v6.00 [ C =332] + 4_ || 2169.00 2170.50 | 4590.00 4636.50 | 9240.00 9334.50 | //: 32-bit, MSC_v6.00 [ C =332] + 8_ || 1083.75 1084.50 | 2223.00 2243.25 | 4581.75 4663.50 | //: 32-bit, MSC_v6.00 [ C =332] + 10_ || 867.00 867.60 | 1776.60 1790.40 | 3648.00 3672.00 | //: 32-bit, MSC_v6.00 [ C =332] + 16_ || 541.13 541.50 | 1044.75 1051.50 | 2274.38 2278.50 | //: 32-bit, MSC_v6.00 [ C =332] + 32_ || 271.13 271.88 | 567.56 573.75 | 1139.25 1140.00 | //: 32-bit, MSC_v6.00 [ C =332] + 64_ || 201.09 201.09 | 270.84 272.81 | 569.72 571.59 | //: 32-bit, MSC_v6.00 [ C =332] + 100_ || 212.70 213.24 | 261.12 262.02 | 365.16 365.28 | //: 32-bit, MSC_v6.00 [ C =332] + 128_ || 166.08 166.45 | 204.84 205.41 | 284.48 288.80 | //: 32-bit, MSC_v6.00 [ C =332] + 256_ || 148.69 149.34 | 169.59 169.95 | 221.65 221.79 | //: 32-bit, MSC_v6.00 [ C =332] + 512_ || 140.47 140.53 | 148.24 148.48 | 179.11 179.11 | //: 32-bit, MSC_v6.00 [ C =332] + 1000_ || 139.66 139.66 | 139.37 139.82 | 163.58 165.01 | //: 32-bit, MSC_v6.00 [ C =332] + 1024_ || 136.26 136.27 | 141.22 141.49 | 157.43 158.56 | //: 32-bit, MSC_v6.00 [ C =332] + 2048_ || 134.25 134.25 | 135.90 137.12 | 151.73 152.42 | //: 32-bit, MSC_v6.00 [ C =332] + 4096_ || 133.89 134.06 | 131.19 134.61 | 147.72 150.40 | //: 32-bit, MSC_v6.00 [ C =332] + 8192_ || 132.87 134.43 | 134.17 135.04 | 143.82 148.00 | //: 32-bit, MSC_v6.00 [ C =332] + 10000_ || 133.42 134.41 | 124.60 130.22 | 137.58 138.21 | //: 32-bit, MSC_v6.00 [ C =332] + 16384_ || 131.73 132.54 | 121.35 122.08 | 135.51 139.44 | //: 32-bit, MSC_v6.00 [ C =332] + 32768_ || 124.97 134.41 | 128.74 129.78 | 142.57 143.53 | //: 32-bit, MSC_v6.00 [ C =332] + 100000_ || 126.77 134.34 | 126.62 129.40 | 135.08 139.54 | //: 32-bit, MSC_v6.00 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1486 bytes | 1348 bytes | 1445 bytes | //: 32-bit, MSC_v6.00 [ C =332] + Block || 6038 bytes | 13395 bytes | 15975 bytes | //: 32-bit, MSC_v6.00 [ C =332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:42,Oct 7 2008 by 'MSC_v4.20', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5436.00 5436.00 | 9474.00 9474.00 | 20430.00 20442.00 | //: 32-bit, MSC_v4.20 [ C =332] + 2_ || 2715.00 2718.00 | 4731.00 4734.00 | 10215.00 10218.00 | //: 32-bit, MSC_v4.20 [ C =332] + 4_ || 1359.00 1360.50 | 2364.00 2364.00 | 5098.50 5103.00 | //: 32-bit, MSC_v4.20 [ C =332] + 8_ || 680.25 680.25 | 1182.00 1182.75 | 2549.25 2550.00 | //: 32-bit, MSC_v4.20 [ C =332] + 10_ || 538.80 543.60 | 945.60 946.20 | 2042.40 2043.00 | //: 32-bit, MSC_v4.20 [ C =332] + 16_ || 339.38 339.75 | 590.63 591.00 | 1274.63 1275.00 | //: 32-bit, MSC_v4.20 [ C =332] + 32_ || 166.69 166.69 | 295.31 295.50 | 637.13 637.50 | //: 32-bit, MSC_v4.20 [ C =332] + 64_ || 123.47 123.66 | 147.28 147.38 | 318.56 318.75 | //: 32-bit, MSC_v4.20 [ C =332] + 100_ || 130.62 130.74 | 140.28 140.76 | 203.94 204.00 | //: 32-bit, MSC_v4.20 [ C =332] + 128_ || 101.44 101.67 | 109.31 109.45 | 159.14 159.33 | //: 32-bit, MSC_v4.20 [ C =332] + 256_ || 90.70 90.75 | 90.52 90.56 | 118.66 118.83 | //: 32-bit, MSC_v4.20 [ C =332] + 512_ || 85.00 85.03 | 80.81 80.87 | 98.46 98.46 | //: 32-bit, MSC_v4.20 [ C =332] + 1000_ || 84.00 84.01 | 77.78 77.78 | 90.31 90.32 | //: 32-bit, MSC_v4.20 [ C =332] + 1024_ || 81.99 82.13 | 75.93 75.93 | 88.18 88.18 | //: 32-bit, MSC_v4.20 [ C =332] + 2048_ || 80.66 80.68 | 73.43 73.43 | 83.02 83.02 | //: 32-bit, MSC_v4.20 [ C =332] + 4096_ || 79.98 80.38 | 72.23 72.27 | 80.45 80.45 | //: 32-bit, MSC_v4.20 [ C =332] + 8192_ || 79.63 80.15 | 71.66 71.73 | 79.15 79.22 | //: 32-bit, MSC_v4.20 [ C =332] + 10000_ || 79.65 80.07 | 71.85 72.33 | 79.79 79.82 | //: 32-bit, MSC_v4.20 [ C =332] + 16384_ || 79.66 79.71 | 71.40 71.41 | 78.77 78.85 | //: 32-bit, MSC_v4.20 [ C =332] + 32768_ || 75.39 79.68 | 67.83 71.25 | 78.23 78.50 | //: 32-bit, MSC_v4.20 [ C =332] + 100000_ || 75.49 77.32 | 67.60 67.87 | 74.33 75.55 | //: 32-bit, MSC_v4.20 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1152 bytes | 1024 bytes | 1088 bytes | //: 32-bit, MSC_v4.20 [ C =332] + Block || 4736 bytes | 8976 bytes | 12896 bytes | //: 32-bit, MSC_v4.20 [ C =332] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:30:49,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 780.00 798.00 | 1920.00 1920.00 | 3732.00 3732.00 | //: 64-bit, MSC_v9.00 [ C =332] + 2_ || 387.00 387.00 | 951.00 951.00 | 1866.00 1869.00 | //: 64-bit, MSC_v9.00 [ C =332] + 4_ || 199.50 199.50 | 477.00 480.00 | 936.00 936.00 | //: 64-bit, MSC_v9.00 [ C =332] + 8_ || 95.25 96.00 | 231.75 235.50 | 467.25 468.00 | //: 64-bit, MSC_v9.00 [ C =332] + 10_ || 76.80 76.80 | 189.00 191.40 | 402.60 402.60 | //: 64-bit, MSC_v9.00 [ C =332] + 16_ || 51.75 51.75 | 127.13 128.63 | 234.75 254.25 | //: 64-bit, MSC_v9.00 [ C =332] + 32_ || 23.63 23.63 | 58.13 58.31 | 115.69 115.69 | //: 64-bit, MSC_v9.00 [ C =332] + 64_ || 16.69 16.88 | 28.88 28.97 | 58.31 58.31 | //: 64-bit, MSC_v9.00 [ C =332] + 100_ || 17.10 17.16 | 27.66 27.90 | 37.62 37.62 | //: 64-bit, MSC_v9.00 [ C =332] + 128_ || 12.98 13.13 | 21.14 21.47 | 29.16 29.16 | //: 64-bit, MSC_v9.00 [ C =332] + 256_ || 11.27 11.30 | 17.04 17.18 | 21.66 21.73 | //: 64-bit, MSC_v9.00 [ C =332] + 512_ || 10.20 10.20 | 16.21 16.21 | 17.79 17.82 | //: 64-bit, MSC_v9.00 [ C =332] + 1000_ || 9.98 10.12 | 14.23 14.25 | 16.13 16.13 | //: 64-bit, MSC_v9.00 [ C =332] + 1024_ || 9.73 10.54 | 13.88 13.89 | 15.73 15.73 | //: 64-bit, MSC_v9.00 [ C =332] + 2048_ || 9.48 9.48 | 13.51 13.51 | 14.70 14.70 | //: 64-bit, MSC_v9.00 [ C =332] + 4096_ || 9.35 9.36 | 13.21 13.22 | 14.16 14.16 | //: 64-bit, MSC_v9.00 [ C =332] + 8192_ || 9.25 9.25 | 13.08 13.08 | 13.93 13.93 | //: 64-bit, MSC_v9.00 [ C =332] + 10000_ || 9.27 9.28 | 12.89 12.99 | 13.98 13.98 | //: 64-bit, MSC_v9.00 [ C =332] + 16384_ || 9.26 9.28 | 12.77 12.89 | 13.74 13.74 | //: 64-bit, MSC_v9.00 [ C =332] + 32768_ || 9.23 9.25 | 12.83 13.09 | 13.77 14.27 | //: 64-bit, MSC_v9.00 [ C =332] + 100000_ || 9.32 9.56 | 13.12 13.19 | 14.15 14.23 | //: 64-bit, MSC_v9.00 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =332] + Block || 1200 bytes | 2928 bytes | 5008 bytes | //: 64-bit, MSC_v9.00 [ C =332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:30:52,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5958.00 5958.00 | 10182.00 10188.00 | 21522.00 21522.00 | //: 32-bit, BCC_v5.51 [ C =332] + 2_ || 3228.00 3228.00 | 5091.00 5091.00 | 10761.00 10761.00 | //: 32-bit, BCC_v5.51 [ C =332] + 4_ || 1491.00 1491.00 | 2544.00 2544.00 | 5374.50 5821.50 | //: 32-bit, BCC_v5.51 [ C =332] + 8_ || 744.75 745.50 | 1272.00 1272.00 | 2686.50 2686.50 | //: 32-bit, BCC_v5.51 [ C =332] + 10_ || 595.80 595.80 | 1017.60 1102.80 | 2151.60 2152.20 | //: 32-bit, BCC_v5.51 [ C =332] + 16_ || 372.00 372.38 | 636.00 636.00 | 1343.25 1343.62 | //: 32-bit, BCC_v5.51 [ C =332] + 32_ || 184.69 184.69 | 317.62 317.81 | 671.62 671.62 | //: 32-bit, BCC_v5.51 [ C =332] + 64_ || 135.56 135.56 | 171.56 171.56 | 335.72 335.81 | //: 32-bit, BCC_v5.51 [ C =332] + 100_ || 141.90 141.90 | 150.18 150.18 | 232.68 232.68 | //: 32-bit, BCC_v5.51 [ C =332] + 128_ || 119.67 119.67 | 126.84 126.84 | 181.50 181.55 | //: 32-bit, BCC_v5.51 [ C =332] + 256_ || 106.15 106.15 | 104.25 104.27 | 135.00 135.02 | //: 32-bit, BCC_v5.51 [ C =332] + 512_ || 99.42 99.46 | 93.00 93.01 | 111.75 111.76 | //: 32-bit, BCC_v5.51 [ C =332] + 1000_ || 98.08 98.08 | 89.47 89.47 | 102.47 102.49 | //: 32-bit, BCC_v5.51 [ C =332] + 1024_ || 95.70 95.75 | 87.33 87.33 | 100.03 100.04 | //: 32-bit, BCC_v5.51 [ C =332] + 2048_ || 86.60 86.77 | 78.01 78.01 | 86.94 86.94 | //: 32-bit, BCC_v5.51 [ C =332] + 4096_ || 85.95 85.96 | 76.71 76.71 | 84.25 84.25 | //: 32-bit, BCC_v5.51 [ C =332] + 8192_ || 85.56 92.63 | 76.07 82.40 | 89.80 90.33 | //: 32-bit, BCC_v5.51 [ C =332] + 10000_ || 86.68 90.16 | 79.43 81.88 | 89.21 90.98 | //: 32-bit, BCC_v5.51 [ C =332] + 16384_ || 87.25 92.78 | 82.30 82.31 | 89.34 89.41 | //: 32-bit, BCC_v5.51 [ C =332] + 32768_ || 92.38 92.56 | 81.90 82.46 | 89.01 89.03 | //: 32-bit, BCC_v5.51 [ C =332] + 100000_ || 88.00 88.57 | 78.54 81.88 | 84.53 88.34 | //: 32-bit, BCC_v5.51 [ C =332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [ C =332] + Block || 4340 bytes | 7660 bytes | 10408 bytes | //: 32-bit, BCC_v5.51 [ C =332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:31:00,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2784.00 2784.00 | 5094.00 5100.00 | 10800.00 10806.00 | //: 32-bit, BCC_v5.51 [asm=332] + 2_ || 1374.00 1377.00 | 2538.00 2538.00 | 5370.00 5373.00 | //: 32-bit, BCC_v5.51 [asm=332] + 4_ || 687.00 687.00 | 1267.50 1267.50 | 2695.50 2697.00 | //: 32-bit, BCC_v5.51 [asm=332] + 8_ || 341.25 341.25 | 633.00 633.75 | 1348.50 1348.50 | //: 32-bit, BCC_v5.51 [asm=332] + 10_ || 278.40 282.00 | 512.40 513.00 | 1089.00 1089.00 | //: 32-bit, BCC_v5.51 [asm=332] + 16_ || 172.50 172.50 | 320.25 320.25 | 678.00 679.12 | //: 32-bit, BCC_v5.51 [asm=332] + 32_ || 85.88 85.88 | 159.38 159.56 | 339.75 339.75 | //: 32-bit, BCC_v5.51 [asm=332] + 64_ || 61.03 61.12 | 79.31 79.41 | 169.78 169.78 | //: 32-bit, BCC_v5.51 [asm=332] + 100_ || 62.40 62.46 | 73.92 73.98 | 108.48 108.72 | //: 32-bit, BCC_v5.51 [asm=332] + 128_ || 48.28 48.28 | 57.47 57.47 | 84.14 84.14 | //: 32-bit, BCC_v5.51 [asm=332] + 256_ || 42.00 42.02 | 46.03 46.31 | 61.90 61.90 | //: 32-bit, BCC_v5.51 [asm=332] + 512_ || 37.96 39.39 | 41.17 41.18 | 51.16 51.33 | //: 32-bit, BCC_v5.51 [asm=332] + 1000_ || 37.45 37.74 | 38.66 62.12 | 45.85 69.53 | //: 32-bit, BCC_v5.51 [asm=332] + 1024_ || 36.34 58.89 | 37.42 60.09 | 44.31 44.62 | //: 32-bit, BCC_v5.51 [asm=332] + 2048_ || 35.77 36.18 | 36.06 36.34 | 42.06 42.07 | //: 32-bit, BCC_v5.51 [asm=332] + 4096_ || 35.04 35.44 | 35.33 35.33 | 40.48 48.07 | //: 32-bit, BCC_v5.51 [asm=332] + 8192_ || 34.80 43.64 | 35.91 35.92 | 40.46 40.66 | //: 32-bit, BCC_v5.51 [asm=332] + 10000_ || 35.05 36.40 | 35.54 37.36 | 41.01 54.16 | //: 32-bit, BCC_v5.51 [asm=332] + 16384_ || 34.92 36.14 | 35.74 40.79 | 40.28 43.83 | //: 32-bit, BCC_v5.51 [asm=332] + 32768_ || 35.39 38.29 | 35.19 37.31 | 39.88 40.94 | //: 32-bit, BCC_v5.51 [asm=332] + 100000_ || 36.40 38.36 | 35.18 37.16 | 40.05 40.36 | //: 32-bit, BCC_v5.51 [asm=332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [asm=332] + Block || 3060 bytes | 6300 bytes | 8835 bytes | //: 32-bit, BCC_v5.51 [asm=332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:31:04,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2598.00 2604.00 | 4866.00 4878.00 | 10614.00 10632.00 | //: 32-bit, MSC_v9.00 [asm=332] + 2_ || 1290.00 1299.00 | 2451.00 2460.00 | 5331.00 5334.00 | //: 32-bit, MSC_v9.00 [asm=332] + 4_ || 648.00 649.50 | 1222.50 1222.50 | 2647.50 2656.50 | //: 32-bit, MSC_v9.00 [asm=332] + 8_ || 326.25 327.75 | 612.00 614.25 | 1330.50 1332.75 | //: 32-bit, MSC_v9.00 [asm=332] + 10_ || 260.40 261.00 | 490.20 490.20 | 1067.40 1067.40 | //: 32-bit, MSC_v9.00 [asm=332] + 16_ || 162.38 162.38 | 306.00 306.00 | 661.50 661.88 | //: 32-bit, MSC_v9.00 [asm=332] + 32_ || 80.81 80.81 | 153.94 153.94 | 333.75 333.94 | //: 32-bit, MSC_v9.00 [asm=332] + 64_ || 58.78 58.78 | 76.13 76.41 | 166.88 166.88 | //: 32-bit, MSC_v9.00 [asm=332] + 100_ || 60.78 60.78 | 72.00 72.00 | 106.86 106.92 | //: 32-bit, MSC_v9.00 [asm=332] + 128_ || 47.58 47.63 | 55.92 55.92 | 83.16 83.20 | //: 32-bit, MSC_v9.00 [asm=332] + 256_ || 42.05 42.05 | 45.75 45.75 | 61.59 61.64 | //: 32-bit, MSC_v9.00 [asm=332] + 512_ || 39.18 39.19 | 40.82 41.02 | 50.93 51.02 | //: 32-bit, MSC_v9.00 [asm=332] + 1000_ || 38.38 38.42 | 39.17 39.19 | 46.49 46.61 | //: 32-bit, MSC_v9.00 [asm=332] + 1024_ || 37.38 37.78 | 38.02 60.78 | 67.69 68.44 | //: 32-bit, MSC_v9.00 [asm=332] + 2048_ || 36.80 48.25 | 36.66 48.20 | 42.67 42.81 | //: 32-bit, MSC_v9.00 [asm=332] + 4096_ || 36.57 36.59 | 36.25 36.26 | 41.31 41.40 | //: 32-bit, MSC_v9.00 [asm=332] + 8192_ || 36.21 36.30 | 35.84 38.76 | 40.68 40.71 | //: 32-bit, MSC_v9.00 [asm=332] + 10000_ || 40.98 47.69 | 35.81 35.86 | 40.96 43.93 | //: 32-bit, MSC_v9.00 [asm=332] + 16384_ || 36.27 38.04 | 35.77 43.58 | 40.33 43.27 | //: 32-bit, MSC_v9.00 [asm=332] + 32768_ || 36.04 41.09 | 35.57 35.89 | 40.17 40.36 | //: 32-bit, MSC_v9.00 [asm=332] + 100000_ || 34.46 36.34 | 34.07 37.16 | 39.60 43.18 | //: 32-bit, MSC_v9.00 [asm=332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=332] + Block || 3060 bytes | 6300 bytes | 8835 bytes | //: 32-bit, MSC_v9.00 [asm=332] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:31:10,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2550.00 2568.00 | 4896.00 4902.00 | 10662.00 10728.00 | //: 32-bit, GCC_v3.42 [asm=332] + 2_ || 1275.00 1290.00 | 2445.00 2457.00 | 5355.00 5379.00 | //: 32-bit, GCC_v3.42 [asm=332] + 4_ || 640.50 645.00 | 1224.00 1225.50 | 2655.00 2668.50 | //: 32-bit, GCC_v3.42 [asm=332] + 8_ || 318.75 320.25 | 609.75 610.50 | 1328.25 1332.75 | //: 32-bit, GCC_v3.42 [asm=332] + 10_ || 254.40 257.40 | 488.40 490.20 | 1070.40 1074.60 | //: 32-bit, GCC_v3.42 [asm=332] + 16_ || 161.25 162.00 | 306.38 307.13 | 669.00 671.25 | //: 32-bit, GCC_v3.42 [asm=332] + 32_ || 79.88 80.06 | 153.56 153.75 | 333.00 335.63 | //: 32-bit, GCC_v3.42 [asm=332] + 64_ || 58.50 58.69 | 76.50 76.59 | 166.69 167.34 | //: 32-bit, GCC_v3.42 [asm=332] + 100_ || 60.78 61.02 | 72.36 73.08 | 107.04 107.58 | //: 32-bit, GCC_v3.42 [asm=332] + 128_ || 47.39 47.44 | 56.06 56.30 | 83.44 83.63 | //: 32-bit, GCC_v3.42 [asm=332] + 256_ || 41.79 41.88 | 46.03 46.10 | 61.71 61.92 | //: 32-bit, GCC_v3.42 [asm=332] + 512_ || 39.20 39.33 | 40.96 41.10 | 51.23 51.30 | //: 32-bit, GCC_v3.42 [asm=332] + 1000_ || 38.40 38.57 | 39.23 39.26 | 46.83 47.06 | //: 32-bit, GCC_v3.42 [asm=332] + 1024_ || 37.53 37.72 | 38.27 38.33 | 45.78 46.00 | //: 32-bit, GCC_v3.42 [asm=332] + 2048_ || 36.94 37.00 | 37.03 37.15 | 43.10 56.43 | //: 32-bit, GCC_v3.42 [asm=332] + 4096_ || 41.78 53.31 | 36.01 40.53 | 40.97 41.13 | //: 32-bit, GCC_v3.42 [asm=332] + 8192_ || 35.90 36.04 | 35.84 48.31 | 40.53 40.55 | //: 32-bit, GCC_v3.42 [asm=332] + 10000_ || 36.42 36.48 | 35.85 46.01 | 40.60 40.74 | //: 32-bit, GCC_v3.42 [asm=332] + 16384_ || 36.20 39.37 | 35.61 38.72 | 40.15 41.33 | //: 32-bit, GCC_v3.42 [asm=332] + 32768_ || 36.47 40.53 | 35.81 39.15 | 40.13 41.96 | //: 32-bit, GCC_v3.42 [asm=332] + 100000_ || 36.70 43.77 | 35.89 37.72 | 40.20 44.66 | //: 32-bit, GCC_v3.42 [asm=332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [asm=332] + Block || 3060 bytes | 6300 bytes | 8835 bytes | //: 32-bit, GCC_v3.42 [asm=332] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:31:14,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 684.00 690.00 | 1104.00 1104.00 | 2028.00 2034.00 | //: 64-bit, MSC_v9.00 [asm=332] + 2_ || 339.00 339.00 | 549.00 549.00 | 1014.00 1017.00 | //: 64-bit, MSC_v9.00 [asm=332] + 4_ || 168.00 169.50 | 276.00 276.00 | 507.00 508.50 | //: 64-bit, MSC_v9.00 [asm=332] + 8_ || 81.75 81.75 | 135.00 146.25 | 273.75 273.75 | //: 64-bit, MSC_v9.00 [asm=332] + 10_ || 70.80 70.80 | 120.00 120.00 | 219.00 219.00 | //: 64-bit, MSC_v9.00 [asm=332] + 16_ || 44.25 44.25 | 74.25 74.25 | 126.00 126.38 | //: 64-bit, MSC_v9.00 [asm=332] + 32_ || 20.06 20.25 | 33.75 33.75 | 63.00 63.00 | //: 64-bit, MSC_v9.00 [asm=332] + 64_ || 14.53 14.53 | 16.69 16.97 | 34.13 34.13 | //: 64-bit, MSC_v9.00 [asm=332] + 100_ || 15.72 15.72 | 16.74 16.74 | 22.20 22.20 | //: 64-bit, MSC_v9.00 [asm=332] + 128_ || 11.06 11.11 | 11.77 11.81 | 15.70 15.70 | //: 64-bit, MSC_v9.00 [asm=332] + 256_ || 9.52 9.52 | 9.05 9.07 | 12.38 12.40 | //: 64-bit, MSC_v9.00 [asm=332] + 512_ || 9.35 9.35 | 7.72 7.72 | 9.26 9.26 | //: 64-bit, MSC_v9.00 [asm=332] + 1000_ || 8.42 8.42 | 7.22 7.22 | 8.30 8.30 | //: 64-bit, MSC_v9.00 [asm=332] + 1024_ || 8.19 8.87 | 7.62 7.63 | 8.12 8.12 | //: 64-bit, MSC_v9.00 [asm=332] + 2048_ || 7.97 7.97 | 7.25 7.38 | 7.52 8.15 | //: 64-bit, MSC_v9.00 [asm=332] + 4096_ || 7.86 7.88 | 6.54 7.09 | 7.84 11.52 | //: 64-bit, MSC_v9.00 [asm=332] + 8192_ || 8.49 11.80 | 9.78 10.72 | 7.05 10.38 | //: 64-bit, MSC_v9.00 [asm=332] + 10000_ || 7.85 8.51 | 6.58 6.58 | 7.11 7.12 | //: 64-bit, MSC_v9.00 [asm=332] + 16384_ || 7.86 7.88 | 6.41 6.41 | 7.00 7.01 | //: 64-bit, MSC_v9.00 [asm=332] + 32768_ || 7.89 9.85 | 6.50 7.00 | 6.94 6.97 | //: 64-bit, MSC_v9.00 [asm=332] + 100000_ || 7.80 9.43 | 6.90 7.71 | 7.18 8.48 | //: 64-bit, MSC_v9.00 [asm=332] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=332] + Block || 1288 bytes | 2182 bytes | 3449 bytes | //: 64-bit, MSC_v9.00 [asm=332] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:31:16,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 3954.00 3966.00 | 7350.00 7350.00 | 40698.00 40704.00 | //: 32-bit, GCC_v3.42 [ C =335] + 2_ || 1977.00 1977.00 | 3678.00 3678.00 | 22035.00 24258.00 | //: 32-bit, GCC_v3.42 [ C =335] + 4_ || 1072.50 1072.50 | 1837.50 1839.00 | 10161.00 11007.00 | //: 32-bit, GCC_v3.42 [ C =335] + 8_ || 495.00 495.75 | 993.75 999.75 | 5456.25 8527.50 | //: 32-bit, GCC_v3.42 [ C =335] + 10_ || 429.60 430.80 | 730.80 801.60 | 4380.00 5951.40 | //: 32-bit, GCC_v3.42 [ C =335] + 16_ || 249.00 269.25 | 499.13 502.13 | 2741.63 4381.13 | //: 32-bit, GCC_v3.42 [ C =335] + 32_ || 133.31 135.19 | 249.19 251.25 | 1369.69 2140.69 | //: 32-bit, GCC_v3.42 [ C =335] + 64_ || 90.84 99.00 | 114.47 123.94 | 635.06 635.16 | //: 32-bit, GCC_v3.42 [ C =335] + 100_ || 95.28 103.56 | 108.90 109.44 | 406.50 580.20 | //: 32-bit, GCC_v3.42 [ C =335] + 128_ || 74.44 80.44 | 85.50 91.88 | 317.25 317.30 | //: 32-bit, GCC_v3.42 [ C =335] + 256_ || 66.00 71.48 | 69.70 69.80 | 237.12 237.12 | //: 32-bit, GCC_v3.42 [ C =335] + 512_ || 66.96 66.98 | 67.36 67.39 | 224.53 224.58 | //: 32-bit, GCC_v3.42 [ C =335] + 1000_ || 66.20 66.21 | 64.66 64.73 | 205.97 206.02 | //: 32-bit, GCC_v3.42 [ C =335] + 1024_ || 64.61 87.60 | 63.19 63.20 | 175.51 194.46 | //: 32-bit, GCC_v3.42 [ C =335] + 2048_ || 58.87 66.35 | 56.44 60.15 | 165.23 193.47 | //: 32-bit, GCC_v3.42 [ C =335] + 4096_ || 58.09 71.92 | 55.50 55.51 | 168.27 197.84 | //: 32-bit, GCC_v3.42 [ C =335] + 8192_ || 57.83 61.97 | 55.02 64.00 | 173.49 203.78 | //: 32-bit, GCC_v3.42 [ C =335] + 10000_ || 62.69 63.71 | 59.52 62.18 | 176.01 194.46 | //: 32-bit, GCC_v3.42 [ C =335] + 16384_ || 62.78 65.42 | 59.37 63.71 | 182.36 201.21 | //: 32-bit, GCC_v3.42 [ C =335] + 32768_ || 62.48 70.41 | 59.61 63.00 | 184.14 189.59 | //: 32-bit, GCC_v3.42 [ C =335] + 100000_ || 61.14 68.82 | 59.72 62.87 | 190.22 202.45 | //: 32-bit, GCC_v3.42 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [ C =335] + Block || 6640 bytes | 13040 bytes | 41968 bytes | //: 32-bit, GCC_v3.42 [ C =335] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:31:27,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2994.00 2994.00 | 6240.00 6240.00 | 14598.00 14604.00 | //: 32-bit, MSC_v9.00 [ C =335] + 2_ || 1488.00 1494.00 | 3123.00 3126.00 | 7308.00 7311.00 | //: 32-bit, MSC_v9.00 [ C =335] + 4_ || 744.00 745.50 | 1558.50 1558.50 | 3646.50 3648.00 | //: 32-bit, MSC_v9.00 [ C =335] + 8_ || 372.00 372.00 | 779.25 780.00 | 1827.75 1827.75 | //: 32-bit, MSC_v9.00 [ C =335] + 10_ || 300.60 301.20 | 624.00 624.60 | 1459.20 1461.00 | //: 32-bit, MSC_v9.00 [ C =335] + 16_ || 187.13 187.88 | 389.63 389.63 | 913.50 913.88 | //: 32-bit, MSC_v9.00 [ C =335] + 32_ || 92.63 93.19 | 195.38 195.56 | 456.56 456.56 | //: 32-bit, MSC_v9.00 [ C =335] + 64_ || 67.69 67.78 | 97.03 97.13 | 228.66 228.75 | //: 32-bit, MSC_v9.00 [ C =335] + 100_ || 70.62 70.68 | 91.86 91.92 | 146.10 146.16 | //: 32-bit, MSC_v9.00 [ C =335] + 128_ || 54.84 55.27 | 71.48 71.48 | 112.88 112.92 | //: 32-bit, MSC_v9.00 [ C =335] + 256_ || 48.49 48.54 | 58.48 58.50 | 83.37 83.48 | //: 32-bit, MSC_v9.00 [ C =335] + 512_ || 45.42 45.42 | 52.07 52.23 | 68.57 68.60 | //: 32-bit, MSC_v9.00 [ C =335] + 1000_ || 44.65 44.65 | 50.20 50.20 | 62.74 62.76 | //: 32-bit, MSC_v9.00 [ C =335] + 1024_ || 43.80 43.80 | 48.98 48.99 | 61.13 61.14 | //: 32-bit, MSC_v9.00 [ C =335] + 2048_ || 43.00 43.00 | 47.36 47.37 | 57.45 57.47 | //: 32-bit, MSC_v9.00 [ C =335] + 4096_ || 42.33 42.34 | 46.57 46.57 | 55.59 55.60 | //: 32-bit, MSC_v9.00 [ C =335] + 8192_ || 42.21 42.25 | 46.16 46.17 | 54.66 54.67 | //: 32-bit, MSC_v9.00 [ C =335] + 10000_ || 42.16 42.42 | 46.31 46.73 | 55.11 55.13 | //: 32-bit, MSC_v9.00 [ C =335] + 16384_ || 42.28 42.29 | 46.21 46.24 | 54.20 54.24 | //: 32-bit, MSC_v9.00 [ C =335] + 32768_ || 42.35 42.36 | 45.95 46.10 | 50.90 51.80 | //: 32-bit, MSC_v9.00 [ C =335] + 100000_ || 40.09 40.55 | 45.76 45.97 | 51.00 53.08 | //: 32-bit, MSC_v9.00 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [ C =335] + Block || 4560 bytes | 9232 bytes | 29280 bytes | //: 32-bit, MSC_v9.00 [ C =335] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:31:35,Oct 7 2008 by 'MSC_v6.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 8670.00 8682.00 | 17376.00 17436.00 | 55890.00 56922.00 | //: 32-bit, MSC_v6.00 [ C =335] + 2_ || 4344.00 4347.00 | 9315.00 9414.00 | 27747.00 27771.00 | //: 32-bit, MSC_v6.00 [ C =335] + 4_ || 2164.50 2164.50 | 4500.00 4522.50 | 13807.50 13896.00 | //: 32-bit, MSC_v6.00 [ C =335] + 8_ || 1082.25 1088.25 | 2235.00 2245.50 | 6796.50 6931.50 | //: 32-bit, MSC_v6.00 [ C =335] + 10_ || 866.40 871.80 | 1800.60 1811.40 | 5465.40 5560.80 | //: 32-bit, MSC_v6.00 [ C =335] + 16_ || 548.25 548.25 | 1124.25 1129.88 | 3447.00 3447.75 | //: 32-bit, MSC_v6.00 [ C =335] + 32_ || 272.25 272.63 | 560.44 563.81 | 1723.50 1723.88 | //: 32-bit, MSC_v6.00 [ C =335] + 64_ || 202.59 203.44 | 282.47 283.50 | 837.00 840.09 | //: 32-bit, MSC_v6.00 [ C =335] + 100_ || 214.38 215.88 | 269.28 269.82 | 532.74 532.86 | //: 32-bit, MSC_v6.00 [ C =335] + 128_ || 167.63 167.67 | 208.64 210.70 | 418.03 421.73 | //: 32-bit, MSC_v6.00 [ C =335] + 256_ || 149.41 150.16 | 173.23 173.79 | 317.27 320.23 | //: 32-bit, MSC_v6.00 [ C =335] + 512_ || 147.41 147.48 | 148.73 148.88 | 265.04 265.10 | //: 32-bit, MSC_v6.00 [ C =335] + 1000_ || 140.53 140.53 | 144.00 144.20 | 244.22 250.33 | //: 32-bit, MSC_v6.00 [ C =335] + 1024_ || 141.71 141.73 | 142.42 142.59 | 235.18 235.96 | //: 32-bit, MSC_v6.00 [ C =335] + 2048_ || 135.36 135.38 | 137.08 137.16 | 220.27 221.11 | //: 32-bit, MSC_v6.00 [ C =335] + 4096_ || 124.41 128.93 | 123.98 126.91 | 200.32 204.77 | //: 32-bit, MSC_v6.00 [ C =335] + 8192_ || 124.35 126.84 | 124.37 130.27 | 204.17 219.92 | //: 32-bit, MSC_v6.00 [ C =335] + 10000_ || 126.45 133.37 | 133.76 134.30 | 217.48 218.27 | //: 32-bit, MSC_v6.00 [ C =335] + 16384_ || 133.14 135.72 | 128.15 128.86 | 191.67 194.97 | //: 32-bit, MSC_v6.00 [ C =335] + 32768_ || 129.20 133.13 | 127.94 129.85 | 202.98 210.08 | //: 32-bit, MSC_v6.00 [ C =335] + 100000_ || 130.83 133.01 | 121.08 129.21 | 192.14 200.80 | //: 32-bit, MSC_v6.00 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1486 bytes | 1348 bytes | 1445 bytes | //: 32-bit, MSC_v6.00 [ C =335] + Block || 6038 bytes | 13395 bytes | 37221 bytes | //: 32-bit, MSC_v6.00 [ C =335] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:31:48,Oct 7 2008 by 'MSC_v4.20', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5430.00 5436.00 | 9474.00 9474.00 | 21330.00 21366.00 | //: 32-bit, MSC_v4.20 [ C =335] + 2_ || 2706.00 2724.00 | 4731.00 4734.00 | 10662.00 10683.00 | //: 32-bit, MSC_v4.20 [ C =335] + 4_ || 1359.00 1359.00 | 2364.00 2365.50 | 5322.00 5332.50 | //: 32-bit, MSC_v4.20 [ C =335] + 8_ || 675.00 679.50 | 1182.00 1182.75 | 2661.75 2666.25 | //: 32-bit, MSC_v4.20 [ C =335] + 10_ || 540.00 543.60 | 945.60 945.60 | 2132.40 2136.00 | //: 32-bit, MSC_v4.20 [ C =335] + 16_ || 337.50 339.75 | 590.63 591.00 | 1330.50 1332.75 | //: 32-bit, MSC_v4.20 [ C =335] + 32_ || 166.69 167.25 | 295.31 295.50 | 665.25 666.38 | //: 32-bit, MSC_v4.20 [ C =335] + 64_ || 123.66 123.84 | 147.19 147.28 | 332.63 333.19 | //: 32-bit, MSC_v4.20 [ C =335] + 100_ || 130.62 130.98 | 140.28 140.76 | 212.88 213.24 | //: 32-bit, MSC_v4.20 [ C =335] + 128_ || 101.44 101.72 | 109.31 109.45 | 166.08 166.41 | //: 32-bit, MSC_v4.20 [ C =335] + 256_ || 90.54 90.73 | 90.54 90.56 | 124.99 125.11 | //: 32-bit, MSC_v4.20 [ C =335] + 512_ || 84.93 85.02 | 80.95 80.95 | 101.98 101.98 | //: 32-bit, MSC_v4.20 [ C =335] + 1000_ || 84.00 84.02 | 77.78 77.79 | 93.61 93.62 | //: 32-bit, MSC_v4.20 [ C =335] + 1024_ || 81.96 82.10 | 75.93 75.93 | 91.37 91.39 | //: 32-bit, MSC_v4.20 [ C =335] + 2048_ || 80.68 80.69 | 73.49 73.49 | 85.58 85.59 | //: 32-bit, MSC_v4.20 [ C =335] + 4096_ || 79.98 80.00 | 72.23 72.24 | 82.21 82.57 | //: 32-bit, MSC_v4.20 [ C =335] + 8192_ || 79.62 80.01 | 71.61 72.15 | 80.57 81.37 | //: 32-bit, MSC_v4.20 [ C =335] + 10000_ || 79.72 80.04 | 71.86 71.92 | 81.67 81.67 | //: 32-bit, MSC_v4.20 [ C =335] + 16384_ || 79.47 79.72 | 67.22 67.80 | 76.81 77.22 | //: 32-bit, MSC_v4.20 [ C =335] + 32768_ || 75.20 79.32 | 67.07 68.60 | 75.91 78.02 | //: 32-bit, MSC_v4.20 [ C =335] + 100000_ || 75.38 75.82 | 67.48 69.43 | 74.87 77.52 | //: 32-bit, MSC_v4.20 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1152 bytes | 1024 bytes | 1088 bytes | //: 32-bit, MSC_v4.20 [ C =335] + Block || 4736 bytes | 8976 bytes | 28880 bytes | //: 32-bit, MSC_v4.20 [ C =335] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:31:56,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 780.00 798.00 | 1890.00 1920.00 | 3498.00 3498.00 | //: 64-bit, MSC_v9.00 [ C =335] + 2_ || 387.00 387.00 | 951.00 969.00 | 1752.00 1752.00 | //: 64-bit, MSC_v9.00 [ C =335] + 4_ || 201.00 201.00 | 477.00 478.50 | 877.50 877.50 | //: 64-bit, MSC_v9.00 [ C =335] + 8_ || 95.25 96.00 | 234.75 237.00 | 440.25 441.00 | //: 64-bit, MSC_v9.00 [ C =335] + 10_ || 77.40 78.00 | 190.80 195.00 | 350.40 379.20 | //: 64-bit, MSC_v9.00 [ C =335] + 16_ || 52.13 52.13 | 126.75 127.13 | 237.00 237.00 | //: 64-bit, MSC_v9.00 [ C =335] + 32_ || 23.81 25.69 | 58.69 59.25 | 109.88 109.88 | //: 64-bit, MSC_v9.00 [ C =335] + 64_ || 16.97 16.97 | 29.34 29.72 | 54.75 54.75 | //: 64-bit, MSC_v9.00 [ C =335] + 100_ || 17.10 17.10 | 27.72 28.08 | 35.28 35.28 | //: 64-bit, MSC_v9.00 [ C =335] + 128_ || 13.03 13.03 | 21.19 21.52 | 27.33 27.33 | //: 64-bit, MSC_v9.00 [ C =335] + 256_ || 11.20 11.23 | 17.02 17.16 | 20.23 20.25 | //: 64-bit, MSC_v9.00 [ C =335] + 512_ || 10.21 10.22 | 14.95 16.15 | 16.56 17.94 | //: 64-bit, MSC_v9.00 [ C =335] + 1000_ || 9.95 10.00 | 14.23 14.24 | 15.09 15.10 | //: 64-bit, MSC_v9.00 [ C =335] + 1024_ || 9.71 10.50 | 13.91 13.91 | 14.68 14.68 | //: 64-bit, MSC_v9.00 [ C =335] + 2048_ || 9.48 9.73 | 13.43 13.51 | 13.73 13.75 | //: 64-bit, MSC_v9.00 [ C =335] + 4096_ || 9.36 9.36 | 13.21 13.22 | 13.28 13.28 | //: 64-bit, MSC_v9.00 [ C =335] + 8192_ || 9.28 9.31 | 12.83 12.94 | 12.97 14.48 | //: 64-bit, MSC_v9.00 [ C =335] + 10000_ || 9.30 10.06 | 12.94 14.10 | 13.07 14.36 | //: 64-bit, MSC_v9.00 [ C =335] + 16384_ || 9.25 9.27 | 12.98 13.01 | 12.83 12.83 | //: 64-bit, MSC_v9.00 [ C =335] + 32768_ || 9.22 9.24 | 12.81 12.91 | 12.90 12.90 | //: 64-bit, MSC_v9.00 [ C =335] + 100000_ || 9.33 9.58 | 13.94 13.95 | 13.24 13.92 | //: 64-bit, MSC_v9.00 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [ C =335] + Block || 1200 bytes | 2928 bytes | 10880 bytes | //: 64-bit, MSC_v9.00 [ C =335] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:32:00,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 5964.00 6462.00 | 10182.00 10188.00 | 21516.00 21516.00 | //: 32-bit, BCC_v5.51 [ C =335] + 2_ || 2979.00 2982.00 | 5091.00 5091.00 | 10758.00 10761.00 | //: 32-bit, BCC_v5.51 [ C =335] + 4_ || 1489.50 1489.50 | 2545.50 2757.00 | 5374.50 5374.50 | //: 32-bit, BCC_v5.51 [ C =335] + 8_ || 744.75 745.50 | 1272.00 1272.00 | 2687.25 2687.25 | //: 32-bit, BCC_v5.51 [ C =335] + 10_ || 595.20 595.20 | 1017.60 1017.60 | 2151.60 2152.20 | //: 32-bit, BCC_v5.51 [ C =335] + 16_ || 372.38 372.38 | 636.00 636.00 | 1343.25 1343.62 | //: 32-bit, BCC_v5.51 [ C =335] + 32_ || 200.25 200.25 | 317.62 317.81 | 671.44 672.00 | //: 32-bit, BCC_v5.51 [ C =335] + 64_ || 135.56 135.56 | 158.34 158.34 | 335.72 335.72 | //: 32-bit, BCC_v5.51 [ C =335] + 100_ || 141.78 141.84 | 150.18 150.18 | 214.74 214.74 | //: 32-bit, BCC_v5.51 [ C =335] + 128_ || 110.44 110.44 | 117.05 117.09 | 167.53 167.53 | //: 32-bit, BCC_v5.51 [ C =335] + 256_ || 106.15 106.15 | 104.25 104.27 | 135.00 135.02 | //: 32-bit, BCC_v5.51 [ C =335] + 512_ || 99.46 99.50 | 93.00 93.01 | 111.62 111.63 | //: 32-bit, BCC_v5.51 [ C =335] + 1000_ || 98.07 98.17 | 89.48 89.48 | 102.39 102.39 | //: 32-bit, BCC_v5.51 [ C =335] + 1024_ || 95.84 95.84 | 87.35 87.35 | 99.96 99.96 | //: 32-bit, BCC_v5.51 [ C =335] + 2048_ || 93.84 94.01 | 84.51 84.52 | 94.10 94.10 | //: 32-bit, BCC_v5.51 [ C =335] + 4096_ || 93.12 93.13 | 83.10 83.10 | 84.16 91.18 | //: 32-bit, BCC_v5.51 [ C =335] + 8192_ || 92.65 93.16 | 82.39 82.91 | 89.71 89.79 | //: 32-bit, BCC_v5.51 [ C =335] + 10000_ || 92.75 93.17 | 77.57 78.30 | 83.49 85.13 | //: 32-bit, BCC_v5.51 [ C =335] + 16384_ || 87.43 88.16 | 77.83 79.58 | 84.22 84.89 | //: 32-bit, BCC_v5.51 [ C =335] + 32768_ || 87.33 88.36 | 77.63 78.37 | 84.49 86.56 | //: 32-bit, BCC_v5.51 [ C =335] + 100000_ || 87.96 89.42 | 77.90 78.17 | 84.30 85.04 | //: 32-bit, BCC_v5.51 [ C =335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [ C =335] + Block || 4340 bytes | 7660 bytes | 24192 bytes | //: 32-bit, BCC_v5.51 [ C =335] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:32:08,Oct 7 2008 by 'BCC_v5.51', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2718.00 2718.00 | 5076.00 5082.00 | 10746.00 10752.00 | //: 32-bit, BCC_v5.51 [asm=335] + 2_ || 1359.00 1362.00 | 2499.00 2499.00 | 5373.00 5376.00 | //: 32-bit, BCC_v5.51 [asm=335] + 4_ || 679.50 681.00 | 1245.00 1251.00 | 2673.00 2674.50 | //: 32-bit, BCC_v5.51 [asm=335] + 8_ || 340.50 340.50 | 622.50 625.50 | 1335.75 1336.50 | //: 32-bit, BCC_v5.51 [asm=335] + 10_ || 269.40 269.40 | 499.20 499.80 | 1075.80 1075.80 | //: 32-bit, BCC_v5.51 [asm=335] + 16_ || 170.62 170.62 | 310.88 311.25 | 667.50 667.88 | //: 32-bit, BCC_v5.51 [asm=335] + 32_ || 82.88 83.06 | 156.00 156.00 | 334.31 334.31 | //: 32-bit, BCC_v5.51 [asm=335] + 64_ || 59.25 59.34 | 77.25 77.34 | 166.50 167.53 | //: 32-bit, BCC_v5.51 [asm=335] + 100_ || 60.48 60.54 | 72.30 72.36 | 107.16 107.22 | //: 32-bit, BCC_v5.51 [asm=335] + 128_ || 47.02 47.02 | 56.30 56.30 | 83.53 84.66 | //: 32-bit, BCC_v5.51 [asm=335] + 256_ || 42.21 42.23 | 46.59 46.62 | 62.53 62.53 | //: 32-bit, BCC_v5.51 [asm=335] + 512_ || 39.22 39.23 | 41.00 41.00 | 51.18 51.18 | //: 32-bit, BCC_v5.51 [asm=335] + 1000_ || 38.02 38.03 | 39.37 39.37 | 46.66 46.67 | //: 32-bit, BCC_v5.51 [asm=335] + 1024_ || 34.83 34.83 | 35.55 35.56 | 42.06 42.12 | //: 32-bit, BCC_v5.51 [asm=335] + 2048_ || 33.65 33.65 | 34.07 34.07 | 39.49 39.49 | //: 32-bit, BCC_v5.51 [asm=335] + 4096_ || 33.34 33.40 | 33.47 33.68 | 38.18 38.18 | //: 32-bit, BCC_v5.51 [asm=335] + 8192_ || 32.90 33.36 | 32.87 33.00 | 37.36 38.25 | //: 32-bit, BCC_v5.51 [asm=335] + 10000_ || 32.68 33.54 | 33.04 33.37 | 37.54 39.47 | //: 32-bit, BCC_v5.51 [asm=335] + 16384_ || 31.89 36.04 | 35.74 35.85 | 40.30 40.32 | //: 32-bit, BCC_v5.51 [asm=335] + 32768_ || 35.05 36.16 | 35.79 35.94 | 40.28 40.31 | //: 32-bit, BCC_v5.51 [asm=335] + 100000_ || 34.30 35.27 | 33.66 34.18 | 38.25 39.54 | //: 32-bit, BCC_v5.51 [asm=335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 996 bytes | 1000 bytes | 1068 bytes | //: 32-bit, BCC_v5.51 [asm=335] + Block || 3060 bytes | 6300 bytes | 20391 bytes | //: 32-bit, BCC_v5.51 [asm=335] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:32:11,Oct 7 2008 by 'MSC_v9.00', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2586.00 2592.00 | 4896.00 4902.00 | 10668.00 10668.00 | //: 32-bit, MSC_v9.00 [asm=335] + 2_ || 1311.00 1317.00 | 2448.00 2451.00 | 5340.00 5343.00 | //: 32-bit, MSC_v9.00 [asm=335] + 4_ || 652.50 654.00 | 1224.00 1227.00 | 2665.50 2665.50 | //: 32-bit, MSC_v9.00 [asm=335] + 8_ || 327.00 328.50 | 613.50 614.25 | 1332.75 1332.75 | //: 32-bit, MSC_v9.00 [asm=335] + 10_ || 263.40 263.40 | 489.60 489.60 | 1069.20 1069.80 | //: 32-bit, MSC_v9.00 [asm=335] + 16_ || 163.88 163.88 | 306.00 306.38 | 666.38 666.75 | //: 32-bit, MSC_v9.00 [asm=335] + 32_ || 81.00 81.00 | 154.13 154.31 | 334.31 334.50 | //: 32-bit, MSC_v9.00 [asm=335] + 64_ || 58.88 58.97 | 76.41 76.59 | 167.16 167.25 | //: 32-bit, MSC_v9.00 [asm=335] + 100_ || 61.08 61.14 | 72.30 72.36 | 107.04 107.10 | //: 32-bit, MSC_v9.00 [asm=335] + 128_ || 47.81 47.86 | 56.16 56.20 | 83.34 83.34 | //: 32-bit, MSC_v9.00 [asm=335] + 256_ || 42.14 42.16 | 45.89 45.89 | 61.64 61.76 | //: 32-bit, MSC_v9.00 [asm=335] + 512_ || 36.23 36.23 | 37.66 37.66 | 46.98 46.99 | //: 32-bit, MSC_v9.00 [asm=335] + 1000_ || 35.57 35.57 | 36.11 36.12 | 43.07 43.12 | //: 32-bit, MSC_v9.00 [asm=335] + 1024_ || 34.85 34.85 | 35.24 35.24 | 42.05 42.06 | //: 32-bit, MSC_v9.00 [asm=335] + 2048_ || 34.17 34.17 | 33.88 34.06 | 39.46 39.53 | //: 32-bit, MSC_v9.00 [asm=335] + 4096_ || 33.74 33.85 | 33.38 33.46 | 38.32 38.32 | //: 32-bit, MSC_v9.00 [asm=335] + 8192_ || 33.65 33.67 | 33.17 34.20 | 37.70 37.71 | //: 32-bit, MSC_v9.00 [asm=335] + 10000_ || 33.68 34.51 | 33.29 36.32 | 37.91 39.80 | //: 32-bit, MSC_v9.00 [asm=335] + 16384_ || 33.26 35.05 | 32.98 35.06 | 37.34 39.35 | //: 32-bit, MSC_v9.00 [asm=335] + 32768_ || 36.34 36.38 | 35.62 35.76 | 40.21 41.08 | //: 32-bit, MSC_v9.00 [asm=335] + 100000_ || 36.32 36.43 | 35.91 35.98 | 38.02 38.19 | //: 32-bit, MSC_v9.00 [asm=335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 864 bytes | 704 bytes | 720 bytes | //: 32-bit, MSC_v9.00 [asm=335] + Block || 3060 bytes | 6300 bytes | 20391 bytes | //: 32-bit, MSC_v9.00 [asm=335] + +Skein performance, in clks per byte, dtMin = 36 clks. + [compiled 14:32:16,Oct 7 2008 by 'GCC_v3.42', 32-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 2562.00 2562.00 | 4866.00 4866.00 | 10698.00 10722.00 | //: 32-bit, GCC_v3.42 [asm=335] + 2_ || 1269.00 1275.00 | 2436.00 2439.00 | 5343.00 5355.00 | //: 32-bit, GCC_v3.42 [asm=335] + 4_ || 645.00 645.00 | 1222.50 1224.00 | 2667.00 2676.00 | //: 32-bit, GCC_v3.42 [asm=335] + 8_ || 322.50 323.25 | 610.50 610.50 | 1332.75 1338.00 | //: 32-bit, GCC_v3.42 [asm=335] + 10_ || 254.40 255.60 | 486.60 487.20 | 1070.40 1072.20 | //: 32-bit, GCC_v3.42 [asm=335] + 16_ || 161.63 162.00 | 306.00 306.00 | 668.25 669.38 | //: 32-bit, GCC_v3.42 [asm=335] + 32_ || 73.69 73.69 | 141.56 141.94 | 307.31 307.50 | //: 32-bit, GCC_v3.42 [asm=335] + 64_ || 54.28 54.38 | 70.59 70.59 | 153.66 153.75 | //: 32-bit, GCC_v3.42 [asm=335] + 100_ || 56.16 56.40 | 66.66 66.66 | 98.40 107.22 | //: 32-bit, GCC_v3.42 [asm=335] + 128_ || 47.67 47.81 | 56.16 56.20 | 83.25 83.25 | //: 32-bit, GCC_v3.42 [asm=335] + 256_ || 41.72 41.86 | 45.84 45.87 | 61.48 61.52 | //: 32-bit, GCC_v3.42 [asm=335] + 512_ || 38.66 38.68 | 40.70 40.70 | 50.68 50.71 | //: 32-bit, GCC_v3.42 [asm=335] + 1000_ || 38.09 38.57 | 38.98 38.99 | 46.42 46.43 | //: 32-bit, GCC_v3.42 [asm=335] + 1024_ || 37.16 37.17 | 38.10 38.10 | 45.29 45.30 | //: 32-bit, GCC_v3.42 [asm=335] + 2048_ || 36.50 36.78 | 36.76 36.76 | 42.45 42.54 | //: 32-bit, GCC_v3.42 [asm=335] + 4096_ || 36.23 36.52 | 36.06 36.06 | 41.19 41.21 | //: 32-bit, GCC_v3.42 [asm=335] + 8192_ || 33.71 35.10 | 33.02 33.08 | 37.40 37.47 | //: 32-bit, GCC_v3.42 [asm=335] + 10000_ || 33.41 33.76 | 33.24 33.24 | 37.68 38.45 | //: 32-bit, GCC_v3.42 [asm=335] + 16384_ || 33.63 34.72 | 33.12 35.81 | 40.09 40.41 | //: 32-bit, GCC_v3.42 [asm=335] + 32768_ || 33.96 34.18 | 33.53 33.86 | 37.88 38.29 | //: 32-bit, GCC_v3.42 [asm=335] + 100000_ || 35.61 36.38 | 34.24 35.18 | 37.98 38.64 | //: 32-bit, GCC_v3.42 [asm=335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 1568 bytes | 1264 bytes | 1472 bytes | //: 32-bit, GCC_v3.42 [asm=335] + Block || 3060 bytes | 6300 bytes | 20391 bytes | //: 32-bit, GCC_v3.42 [asm=335] + +Skein performance, in clks per byte, dtMin = 24 clks. + [compiled 14:32:20,Oct 7 2008 by 'MSC_v9.00', 64-bit] + ================================================================= + || Skein block size | + ||--------------------------------------------------------------| + Message || 256 bits | 512 bits | 1024 bits | + Length ||====================|====================|====================| + (bytes) || min median | min median | min median | +=========||====================|====================|====================| + 1_ || 684.00 690.00 | 1104.00 1104.00 | 2022.00 2022.00 | //: 64-bit, MSC_v9.00 [asm=335] + 2_ || 339.00 342.00 | 549.00 549.00 | 1011.00 1014.00 | //: 64-bit, MSC_v9.00 [asm=335] + 4_ || 168.00 169.50 | 277.50 277.50 | 505.50 505.50 | //: 64-bit, MSC_v9.00 [asm=335] + 8_ || 81.00 81.75 | 135.00 135.00 | 252.00 252.00 | //: 64-bit, MSC_v9.00 [asm=335] + 10_ || 65.40 65.40 | 109.80 109.80 | 201.60 202.20 | //: 64-bit, MSC_v9.00 [asm=335] + 16_ || 40.88 40.88 | 67.13 67.50 | 126.00 126.00 | //: 64-bit, MSC_v9.00 [asm=335] + 32_ || 20.06 20.25 | 33.56 33.75 | 62.81 63.00 | //: 64-bit, MSC_v9.00 [asm=335] + 64_ || 14.53 14.63 | 18.19 18.28 | 33.84 33.94 | //: 64-bit, MSC_v9.00 [asm=335] + 100_ || 15.78 15.78 | 16.80 16.80 | 22.02 22.08 | //: 64-bit, MSC_v9.00 [asm=335] + 128_ || 11.11 11.11 | 11.77 11.77 | 15.61 15.66 | //: 64-bit, MSC_v9.00 [asm=335] + 256_ || 9.52 9.52 | 9.07 9.09 | 11.41 11.41 | //: 64-bit, MSC_v9.00 [asm=335] + 512_ || 8.63 8.64 | 7.72 7.72 | 9.15 9.16 | //: 64-bit, MSC_v9.00 [asm=335] + 1000_ || 8.41 8.42 | 7.21 7.22 | 8.24 8.26 | //: 64-bit, MSC_v9.00 [asm=335] + 1024_ || 8.89 8.90 | 7.62 7.63 | 8.08 8.08 | //: 64-bit, MSC_v9.00 [asm=335] + 2048_ || 8.00 8.00 | 6.69 7.25 | 7.50 7.50 | //: 64-bit, MSC_v9.00 [asm=335] + 4096_ || 7.89 7.89 | 6.52 6.52 | 7.22 7.22 | //: 64-bit, MSC_v9.00 [asm=335] + 8192_ || 7.84 7.84 | 6.44 6.44 | 7.07 7.07 | //: 64-bit, MSC_v9.00 [asm=335] + 10000_ || 7.84 7.84 | 6.45 6.50 | 7.12 7.12 | //: 64-bit, MSC_v9.00 [asm=335] + 16384_ || 7.82 7.82 | 6.40 6.40 | 6.99 7.01 | //: 64-bit, MSC_v9.00 [asm=335] + 32768_ || 7.79 7.80 | 6.37 6.37 | 6.96 6.96 | //: 64-bit, MSC_v9.00 [asm=335] + 100000_ || 8.11 8.11 | 6.49 6.74 | 6.95 7.26 | //: 64-bit, MSC_v9.00 [asm=335] +=========||====================|====================|====================| +Code Size|| | | | +=========||====================|====================|====================| + API || 992 bytes | 1312 bytes | 864 bytes | //: 64-bit, MSC_v9.00 [asm=335] + Block || 1288 bytes | 2182 bytes | 7133 bytes | //: 64-bit, MSC_v9.00 [asm=335] diff --git a/Additional_Implementations/skein_rot_search2.c b/Additional_Implementations/skein_rot_search2.c new file mode 100644 index 000000000000..a47f5c81d3e3 --- /dev/null +++ b/Additional_Implementations/skein_rot_search2.c @@ -0,0 +1,2538 @@ +/*********************************************************************** +** +** Generate Skein rotation constant candidate sets and test them. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include "brg_types.h" /* get Brian Gladman's platform-specific definitions */ + +#define uint unsigned int +#define u08b uint_8t +#define u32b uint_32t +#define u64b uint_64t + +/* Threefish algorithm parameters */ +#ifndef BITS_PER_WORD +#define BITS_PER_WORD (64) /* number of bits in each word of a Threefish block */ +#endif + +#define ROUNDS_PER_CYCLE (8) /* when do we inject keys and start reusing rotation constants? */ +#define MAX_BITS_PER_BLK (1024) + +#define MAX_WORDS_PER_BLK (MAX_BITS_PER_BLK/BITS_PER_WORD) +#define MAX_ROTS_PER_CYCLE (MAX_WORDS_PER_BLK*(ROUNDS_PER_CYCLE/2)) + +/* default search parameters for different block sizes */ +#define DEFAULT_GEN_CNT_4 (5500) +#define DEFAULT_ROUND_CNT_4 ( 8) +#define MIN_HW_OR_4 (50) +#define MAX_SAT_ROUNDS_4 ( 9) + +#define DEFAULT_GEN_CNT_8 (1600) +#define DEFAULT_ROUND_CNT_8 ( 8) +#define MIN_HW_OR_8 (36) +#define MAX_SAT_ROUNDS_8 (10) + +#define DEFAULT_GEN_CNT_16 (400) /* the 1024-bit search is slower, so search for fewer iterations :-( */ +#define DEFAULT_ROUND_CNT_16 ( 9) +#define MIN_HW_OR_16 (40) +#define MAX_SAT_ROUNDS_16 (11) + +#define MAX_ROT_VER_CNT ( 4) +#define MAX_ROT_VER_MASK ((1 << MAX_ROT_VER_CNT ) - 1) + +#define MAX_POP_CNT (1024) /* size of population */ +#define MIN_POP_CNT ( 32) +#define DEFAULT_POP_CNT (MAX_POP_CNT) + +#define ID_RECALC_BIT_NUM (16) +#define TWIDDLE_CNT_BIT0 (17) +#define TWIDDLE_CNT_MASK ((1 << TWIDDLE_CNT_BIT0 ) - 1) +#define ID_RECALC_BIT ( 1 << ID_RECALC_BIT_NUM ) +#define ID_NUM_MASK ((1 << ID_RECALC_BIT_NUM ) - 1) + +#if BITS_PER_WORD == 64 +typedef u64b Word; +#elif BITS_PER_WORD == 32 +typedef u32b Word; +#else +#error "Invalid BITS_PER_WORD" +#endif + +/* tstFlag bits */ +#define TST_FLG_SHOW (1u << 0) +#define TST_FLG_SHOW_HIST (1u << 1) +#define TST_FLG_VERBOSE (1u << 2) +#define TST_FLG_STDERR (1u << 3) +#define TST_FLG_QUICK_EXIT (1u << 4) +#define TST_FLG_USE_ABS (1u << 5) +#define TST_FLG_KEEP_MIN_HW (1u << 6) +#define TST_FLG_WEIGHT_REP (1u << 7) +#define TST_FLG_CHECK_ONE (1u << 8) +#define TST_FLG_DO_RAND (1u << 9) + +/* parameters for ShowSearchRec */ +#define SHOW_ROTS_FINAL (4) +#define SHOW_ROTS_H (3) +#define SHOW_ROTS_PRELIM (2) +#define SHOW_ROTS (1) +#define SHOW_NONE (0) + +typedef struct { Word x[MAX_WORDS_PER_BLK]; } Block; + +typedef void cycle_func(Word *b, const u08b *rotates, int rounds); + +typedef struct /* record for dealing with rotation searches */ + { + u08b rotList[MAX_ROTS_PER_CYCLE]; /* rotation constants */ + uint CRC; /* CRC of rotates[] -- use as a quick "ID" */ + uint ID; /* (get_rotation index) + (TwiddleCnt << TWIDDLE_CNT_BIT0) */ + uint parentCRC; /* CRC of the parent (allows us to track genealogy a bit) */ + uint rWorst; /* "worst" min bit-to-bit differential */ + u08b hw_OR[MAX_ROT_VER_CNT]; /* min hamming weights (over all words), using OR */ + } rSearchRec; + +typedef struct /* pass a bunch of parameters to RunSearch */ + { + uint tstFlags; + uint rounds; + uint minHW_or; + uint minOffs; + uint diffBits; + uint genCntMax; + uint sampleCnt; + uint maxSatRnds; + uint seed0; + uint rotVerMask; + uint popCnt; + uint runHours; /* 0 ==> never */ + uint dupRotMask; /* zero --> allow dup rots within the same round */ + uint regradeCnt; /* default = 3 */ + u64b goodRotCntMask; /* which rotation values are ok? */ + } testParms; + +/* globals */ +cycle_func *fwd_cycle = NULL; +cycle_func *rev_cycle = NULL; +cycle_func *fwd_cycle_or = NULL; /* slow but steady */ +cycle_func *rev_cycle_or = NULL; +cycle_func *fwd_cycle_or_rN = NULL; /* optimized for the current # rounds (for speed) */ +cycle_func *rev_cycle_or_rN = NULL; +const char *rotFileName = NULL; /* read from file instead of generate random? */ +uint bitsPerBlock = 0; /* default is to process all block sizes */ +uint rotsPerCycle; +uint wordsPerBlock; + +/* macro "functions" */ +#define RotCnt_Bad(rotCnt) (((t.goodRotCntMask >> (rotCnt)) & 1) == 0) +#define left_rot(a,N) (((a) << (N)) | ((a) >> (BITS_PER_WORD - (N)))) +#define right_rot(a,N) (((a) >> (N)) | ((a) << (BITS_PER_WORD - (N)))) +#define DUP_64(w32) ((w32) | (((u64b) (w32)) << 32)) + +/********************** use RC4 to generate test data ******************/ +/* Note: this works identically on all platforms (big/little-endian) */ +static struct + { + uint I,J; /* RC4 vars */ + u08b state[256]; + } prng; + +void RandBytes(void *dst,uint byteCnt) + { + u08b a,b; + u08b *d = (u08b *) dst; + + for (;byteCnt;byteCnt--,d++) /* run RC4 */ + { + prng.I = (prng.I+1) & 0xFF; + a = prng.state[prng.I]; + prng.J = (prng.J+a) & 0xFF; + b = prng.state[prng.J]; + prng.state[prng.I] = b; + prng.state[prng.J] = a; + *d = prng.state[(a+b) & 0xFF]; + } + } + +/* get a pseudo-random 8-bit integer */ +uint Rand08(void) + { + u08b b; + RandBytes(&b,1); + return (uint) b; + } + +/* get a pseudo-random 32-bit integer in a portable way */ +uint Rand32(void) + { + uint i,n; + u08b tmp[sizeof(uint)]; + + RandBytes(tmp,sizeof(tmp)); + + for (i=n=0;i> (8*i)); + + /* initialize the permutation */ + for (i=0;i<256;i++) + prng.state[i]=(u08b) i; + + /* now run the RC4 key schedule */ + for (i=j=0;i<256;i++) + { + j = (j + prng.state[i] + tmp[i%8]) & 0xFF; + tmp[256] = prng.state[i]; + prng.state[i] = prng.state[j]; + prng.state[j] = tmp[256]; + } + prng.I = prng.J = 0; /* init I,J variables for RC4 */ + + /* discard some initial RC4 keystream before returning */ + RandBytes(tmp,sizeof(tmp)); + } + +/* implementations of Skein round functions for various block sizes */ +void fwd_cycle_16(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 0]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 1]); b[ 3] ^= b[ 2]; + b[ 4] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[ 2]); b[ 5] ^= b[ 4]; + b[ 6] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[ 3]); b[ 7] ^= b[ 6]; + b[ 8] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[ 4]); b[ 9] ^= b[ 8]; + b[10] += b[11]; b[11] = left_rot(b[11], rotates[ 5]); b[11] ^= b[10]; + b[12] += b[13]; b[13] = left_rot(b[13], rotates[ 6]); b[13] ^= b[12]; + b[14] += b[15]; b[15] = left_rot(b[15], rotates[ 7]); b[15] ^= b[14]; + if (rounds == 1) break; + + b[ 0] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[ 8]); b[ 9] ^= b[ 0]; + b[ 2] += b[13]; b[13] = left_rot(b[13], rotates[ 9]); b[13] ^= b[ 2]; + b[ 6] += b[11]; b[11] = left_rot(b[11], rotates[10]); b[11] ^= b[ 6]; + b[ 4] += b[15]; b[15] = left_rot(b[15], rotates[11]); b[15] ^= b[ 4]; + b[10] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[12]); b[ 7] ^= b[10]; + b[12] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[13]); b[ 3] ^= b[12]; + b[14] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[14]); b[ 5] ^= b[14]; + b[ 8] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[15]); b[ 1] ^= b[ 8]; + if (rounds == 2) break; + + b[ 0] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[16]); b[ 7] ^= b[ 0]; + b[ 2] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[17]); b[ 5] ^= b[ 2]; + b[ 4] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[18]); b[ 3] ^= b[ 4]; + b[ 6] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[19]); b[ 1] ^= b[ 6]; + b[12] += b[15]; b[15] = left_rot(b[15], rotates[20]); b[15] ^= b[12]; + b[14] += b[13]; b[13] = left_rot(b[13], rotates[21]); b[13] ^= b[14]; + b[ 8] += b[11]; b[11] = left_rot(b[11], rotates[22]); b[11] ^= b[ 8]; + b[10] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[23]); b[ 9] ^= b[10]; + if (rounds == 3) break; + + b[ 0] += b[15]; b[15] = left_rot(b[15], rotates[24]); b[15] ^= b[ 0]; + b[ 2] += b[11]; b[11] = left_rot(b[11], rotates[25]); b[11] ^= b[ 2]; + b[ 6] += b[13]; b[13] = left_rot(b[13], rotates[26]); b[13] ^= b[ 6]; + b[ 4] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[27]); b[ 9] ^= b[ 4]; + b[14] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[28]); b[ 1] ^= b[14]; + b[ 8] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[29]); b[ 5] ^= b[ 8]; + b[10] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[30]); b[ 3] ^= b[10]; + b[12] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[31]); b[ 7] ^= b[12]; + if (rounds == 4) break; + + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[32]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[33]); b[ 3] ^= b[ 2]; + b[ 4] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[34]); b[ 5] ^= b[ 4]; + b[ 6] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[35]); b[ 7] ^= b[ 6]; + b[ 8] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[36]); b[ 9] ^= b[ 8]; + b[10] += b[11]; b[11] = left_rot(b[11], rotates[37]); b[11] ^= b[10]; + b[12] += b[13]; b[13] = left_rot(b[13], rotates[38]); b[13] ^= b[12]; + b[14] += b[15]; b[15] = left_rot(b[15], rotates[39]); b[15] ^= b[14]; + if (rounds == 5) break; + + b[ 0] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[40]); b[ 9] ^= b[ 0]; + b[ 2] += b[13]; b[13] = left_rot(b[13], rotates[41]); b[13] ^= b[ 2]; + b[ 6] += b[11]; b[11] = left_rot(b[11], rotates[42]); b[11] ^= b[ 6]; + b[ 4] += b[15]; b[15] = left_rot(b[15], rotates[43]); b[15] ^= b[ 4]; + b[10] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[44]); b[ 7] ^= b[10]; + b[12] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[45]); b[ 3] ^= b[12]; + b[14] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[46]); b[ 5] ^= b[14]; + b[ 8] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[47]); b[ 1] ^= b[ 8]; + if (rounds == 6) break; + + b[ 0] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[48]); b[ 7] ^= b[ 0]; + b[ 2] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[49]); b[ 5] ^= b[ 2]; + b[ 4] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[50]); b[ 3] ^= b[ 4]; + b[ 6] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[51]); b[ 1] ^= b[ 6]; + b[12] += b[15]; b[15] = left_rot(b[15], rotates[52]); b[15] ^= b[12]; + b[14] += b[13]; b[13] = left_rot(b[13], rotates[53]); b[13] ^= b[14]; + b[ 8] += b[11]; b[11] = left_rot(b[11], rotates[54]); b[11] ^= b[ 8]; + b[10] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[55]); b[ 9] ^= b[10]; + if (rounds == 7) break; + + b[ 0] += b[15]; b[15] = left_rot(b[15], rotates[56]); b[15] ^= b[ 0]; + b[ 2] += b[11]; b[11] = left_rot(b[11], rotates[57]); b[11] ^= b[ 2]; + b[ 6] += b[13]; b[13] = left_rot(b[13], rotates[58]); b[13] ^= b[ 6]; + b[ 4] += b[ 9]; b[ 9] = left_rot(b[ 9], rotates[59]); b[ 9] ^= b[ 4]; + b[14] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[60]); b[ 1] ^= b[14]; + b[ 8] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[61]); b[ 5] ^= b[ 8]; + b[10] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[62]); b[ 3] ^= b[10]; + b[12] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[63]); b[ 7] ^= b[12]; + } + } + +void fwd_cycle_8(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 0]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 1]); b[ 3] ^= b[ 2]; + b[ 4] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[ 2]); b[ 5] ^= b[ 4]; + b[ 6] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[ 3]); b[ 7] ^= b[ 6]; + if (rounds == 1) break; + + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 4]); b[ 1] ^= b[ 2]; + b[ 4] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[ 5]); b[ 7] ^= b[ 4]; + b[ 6] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[ 6]); b[ 5] ^= b[ 6]; + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 7]); b[ 3] ^= b[ 0]; + if (rounds == 2) break; + + b[ 4] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 8]); b[ 1] ^= b[ 4]; + b[ 6] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 9]); b[ 3] ^= b[ 6]; + b[ 0] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[10]); b[ 5] ^= b[ 0]; + b[ 2] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[11]); b[ 7] ^= b[ 2]; + if (rounds == 3) break; + + b[ 6] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[12]); b[ 1] ^= b[ 6]; + b[ 0] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[13]); b[ 7] ^= b[ 0]; + b[ 2] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[14]); b[ 5] ^= b[ 2]; + b[ 4] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[15]); b[ 3] ^= b[ 4]; + if (rounds == 4) break; + + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[16]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[17]); b[ 3] ^= b[ 2]; + b[ 4] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[18]); b[ 5] ^= b[ 4]; + b[ 6] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[19]); b[ 7] ^= b[ 6]; + if (rounds == 5) break; + + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[20]); b[ 1] ^= b[ 2]; + b[ 4] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[21]); b[ 7] ^= b[ 4]; + b[ 6] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[22]); b[ 5] ^= b[ 6]; + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[23]); b[ 3] ^= b[ 0]; + if (rounds == 6) break; + + b[ 4] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[24]); b[ 1] ^= b[ 4]; + b[ 6] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[25]); b[ 3] ^= b[ 6]; + b[ 0] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[26]); b[ 5] ^= b[ 0]; + b[ 2] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[27]); b[ 7] ^= b[ 2]; + if (rounds == 7) break; + + b[ 6] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[28]); b[ 1] ^= b[ 6]; + b[ 0] += b[ 7]; b[ 7] = left_rot(b[ 7], rotates[29]); b[ 7] ^= b[ 0]; + b[ 2] += b[ 5]; b[ 5] = left_rot(b[ 5], rotates[30]); b[ 5] ^= b[ 2]; + b[ 4] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[31]); b[ 3] ^= b[ 4]; + } + } + +void fwd_cycle_4(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 0]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 1]); b[ 3] ^= b[ 2]; + if (rounds == 1) break; + + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 2]); b[ 3] ^= b[ 0]; + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 3]); b[ 1] ^= b[ 2]; + if (rounds == 2) break; + + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 4]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 5]); b[ 3] ^= b[ 2]; + if (rounds == 3) break; + + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 6]); b[ 3] ^= b[ 0]; + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 7]); b[ 1] ^= b[ 2]; + if (rounds == 4) break; + + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[ 8]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[ 9]); b[ 3] ^= b[ 2]; + if (rounds == 5) break; + + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[10]); b[ 3] ^= b[ 0]; + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[11]); b[ 1] ^= b[ 2]; + if (rounds == 6) break; + + b[ 0] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[12]); b[ 1] ^= b[ 0]; + b[ 2] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[13]); b[ 3] ^= b[ 2]; + if (rounds == 7) break; + + b[ 0] += b[ 3]; b[ 3] = left_rot(b[ 3], rotates[14]); b[ 3] ^= b[ 0]; + b[ 2] += b[ 1]; b[ 1] = left_rot(b[ 1], rotates[15]); b[ 1] ^= b[ 2]; + } + } + +/* reverse versions of the cipher */ +void rev_cycle_16(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + b[ 7] ^= b[12]; b[ 7] = right_rot(b[ 7], rotates[63]); b[12] -= b[ 7]; + b[ 3] ^= b[10]; b[ 3] = right_rot(b[ 3], rotates[62]); b[10] -= b[ 3]; + b[ 5] ^= b[ 8]; b[ 5] = right_rot(b[ 5], rotates[61]); b[ 8] -= b[ 5]; + b[ 1] ^= b[14]; b[ 1] = right_rot(b[ 1], rotates[60]); b[14] -= b[ 1]; + b[ 9] ^= b[ 4]; b[ 9] = right_rot(b[ 9], rotates[59]); b[ 4] -= b[ 9]; + b[13] ^= b[ 6]; b[13] = right_rot(b[13], rotates[58]); b[ 6] -= b[13]; + b[11] ^= b[ 2]; b[11] = right_rot(b[11], rotates[57]); b[ 2] -= b[11]; + b[15] ^= b[ 0]; b[15] = right_rot(b[15], rotates[56]); b[ 0] -= b[15]; + case 7: + b[ 9] ^= b[10]; b[ 9] = right_rot(b[ 9], rotates[55]); b[10] -= b[ 9]; + b[11] ^= b[ 8]; b[11] = right_rot(b[11], rotates[54]); b[ 8] -= b[11]; + b[13] ^= b[14]; b[13] = right_rot(b[13], rotates[53]); b[14] -= b[13]; + b[15] ^= b[12]; b[15] = right_rot(b[15], rotates[52]); b[12] -= b[15]; + b[ 1] ^= b[ 6]; b[ 1] = right_rot(b[ 1], rotates[51]); b[ 6] -= b[ 1]; + b[ 3] ^= b[ 4]; b[ 3] = right_rot(b[ 3], rotates[50]); b[ 4] -= b[ 3]; + b[ 5] ^= b[ 2]; b[ 5] = right_rot(b[ 5], rotates[49]); b[ 2] -= b[ 5]; + b[ 7] ^= b[ 0]; b[ 7] = right_rot(b[ 7], rotates[48]); b[ 0] -= b[ 7]; + case 6: + b[ 1] ^= b[ 8]; b[ 1] = right_rot(b[ 1], rotates[47]); b[ 8] -= b[ 1]; + b[ 5] ^= b[14]; b[ 5] = right_rot(b[ 5], rotates[46]); b[14] -= b[ 5]; + b[ 3] ^= b[12]; b[ 3] = right_rot(b[ 3], rotates[45]); b[12] -= b[ 3]; + b[ 7] ^= b[10]; b[ 7] = right_rot(b[ 7], rotates[44]); b[10] -= b[ 7]; + b[15] ^= b[ 4]; b[15] = right_rot(b[15], rotates[43]); b[ 4] -= b[15]; + b[11] ^= b[ 6]; b[11] = right_rot(b[11], rotates[42]); b[ 6] -= b[11]; + b[13] ^= b[ 2]; b[13] = right_rot(b[13], rotates[41]); b[ 2] -= b[13]; + b[ 9] ^= b[ 0]; b[ 9] = right_rot(b[ 9], rotates[40]); b[ 0] -= b[ 9]; + case 5: + b[15] ^= b[14]; b[15] = right_rot(b[15], rotates[39]); b[14] -= b[15]; + b[13] ^= b[12]; b[13] = right_rot(b[13], rotates[38]); b[12] -= b[13]; + b[11] ^= b[10]; b[11] = right_rot(b[11], rotates[37]); b[10] -= b[11]; + b[ 9] ^= b[ 8]; b[ 9] = right_rot(b[ 9], rotates[36]); b[ 8] -= b[ 9]; + b[ 7] ^= b[ 6]; b[ 7] = right_rot(b[ 7], rotates[35]); b[ 6] -= b[ 7]; + b[ 5] ^= b[ 4]; b[ 5] = right_rot(b[ 5], rotates[34]); b[ 4] -= b[ 5]; + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[33]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[32]); b[ 0] -= b[ 1]; + case 4: + b[ 7] ^= b[12]; b[ 7] = right_rot(b[ 7], rotates[31]); b[12] -= b[ 7]; + b[ 3] ^= b[10]; b[ 3] = right_rot(b[ 3], rotates[30]); b[10] -= b[ 3]; + b[ 5] ^= b[ 8]; b[ 5] = right_rot(b[ 5], rotates[29]); b[ 8] -= b[ 5]; + b[ 1] ^= b[14]; b[ 1] = right_rot(b[ 1], rotates[28]); b[14] -= b[ 1]; + b[ 9] ^= b[ 4]; b[ 9] = right_rot(b[ 9], rotates[27]); b[ 4] -= b[ 9]; + b[13] ^= b[ 6]; b[13] = right_rot(b[13], rotates[26]); b[ 6] -= b[13]; + b[11] ^= b[ 2]; b[11] = right_rot(b[11], rotates[25]); b[ 2] -= b[11]; + b[15] ^= b[ 0]; b[15] = right_rot(b[15], rotates[24]); b[ 0] -= b[15]; + case 3: + b[ 9] ^= b[10]; b[ 9] = right_rot(b[ 9], rotates[23]); b[10] -= b[ 9]; + b[11] ^= b[ 8]; b[11] = right_rot(b[11], rotates[22]); b[ 8] -= b[11]; + b[13] ^= b[14]; b[13] = right_rot(b[13], rotates[21]); b[14] -= b[13]; + b[15] ^= b[12]; b[15] = right_rot(b[15], rotates[20]); b[12] -= b[15]; + b[ 1] ^= b[ 6]; b[ 1] = right_rot(b[ 1], rotates[19]); b[ 6] -= b[ 1]; + b[ 3] ^= b[ 4]; b[ 3] = right_rot(b[ 3], rotates[18]); b[ 4] -= b[ 3]; + b[ 5] ^= b[ 2]; b[ 5] = right_rot(b[ 5], rotates[17]); b[ 2] -= b[ 5]; + b[ 7] ^= b[ 0]; b[ 7] = right_rot(b[ 7], rotates[16]); b[ 0] -= b[ 7]; + case 2: + b[ 1] ^= b[ 8]; b[ 1] = right_rot(b[ 1], rotates[15]); b[ 8] -= b[ 1]; + b[ 5] ^= b[14]; b[ 5] = right_rot(b[ 5], rotates[14]); b[14] -= b[ 5]; + b[ 3] ^= b[12]; b[ 3] = right_rot(b[ 3], rotates[13]); b[12] -= b[ 3]; + b[ 7] ^= b[10]; b[ 7] = right_rot(b[ 7], rotates[12]); b[10] -= b[ 7]; + b[15] ^= b[ 4]; b[15] = right_rot(b[15], rotates[11]); b[ 4] -= b[15]; + b[11] ^= b[ 6]; b[11] = right_rot(b[11], rotates[10]); b[ 6] -= b[11]; + b[13] ^= b[ 2]; b[13] = right_rot(b[13], rotates[ 9]); b[ 2] -= b[13]; + b[ 9] ^= b[ 0]; b[ 9] = right_rot(b[ 9], rotates[ 8]); b[ 0] -= b[ 9]; + case 1: + b[15] ^= b[14]; b[15] = right_rot(b[15], rotates[ 7]); b[14] -= b[15]; + b[13] ^= b[12]; b[13] = right_rot(b[13], rotates[ 6]); b[12] -= b[13]; + b[11] ^= b[10]; b[11] = right_rot(b[11], rotates[ 5]); b[10] -= b[11]; + b[ 9] ^= b[ 8]; b[ 9] = right_rot(b[ 9], rotates[ 4]); b[ 8] -= b[ 9]; + b[ 7] ^= b[ 6]; b[ 7] = right_rot(b[ 7], rotates[ 3]); b[ 6] -= b[ 7]; + b[ 5] ^= b[ 4]; b[ 5] = right_rot(b[ 5], rotates[ 2]); b[ 4] -= b[ 5]; + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[ 1]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[ 0]); b[ 0] -= b[ 1]; + } + + } + } + +void rev_cycle_8(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + b[ 3] ^= b[ 4]; b[ 3] = right_rot(b[ 3], rotates[31]); b[ 4] -= b[ 3]; + b[ 5] ^= b[ 2]; b[ 5] = right_rot(b[ 5], rotates[30]); b[ 2] -= b[ 5]; + b[ 7] ^= b[ 0]; b[ 7] = right_rot(b[ 7], rotates[29]); b[ 0] -= b[ 7]; + b[ 1] ^= b[ 6]; b[ 1] = right_rot(b[ 1], rotates[28]); b[ 6] -= b[ 1]; + case 7: + b[ 7] ^= b[ 2]; b[ 7] = right_rot(b[ 7], rotates[27]); b[ 2] -= b[ 7]; + b[ 5] ^= b[ 0]; b[ 5] = right_rot(b[ 5], rotates[26]); b[ 0] -= b[ 5]; + b[ 3] ^= b[ 6]; b[ 3] = right_rot(b[ 3], rotates[25]); b[ 6] -= b[ 3]; + b[ 1] ^= b[ 4]; b[ 1] = right_rot(b[ 1], rotates[24]); b[ 4] -= b[ 1]; + case 6: + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[23]); b[ 0] -= b[ 3]; + b[ 5] ^= b[ 6]; b[ 5] = right_rot(b[ 5], rotates[22]); b[ 6] -= b[ 5]; + b[ 7] ^= b[ 4]; b[ 7] = right_rot(b[ 7], rotates[21]); b[ 4] -= b[ 7]; + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[20]); b[ 2] -= b[ 1]; + case 5: + b[ 7] ^= b[ 6]; b[ 7] = right_rot(b[ 7], rotates[19]); b[ 6] -= b[ 7]; + b[ 5] ^= b[ 4]; b[ 5] = right_rot(b[ 5], rotates[18]); b[ 4] -= b[ 5]; + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[17]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[16]); b[ 0] -= b[ 1]; + case 4: + b[ 3] ^= b[ 4]; b[ 3] = right_rot(b[ 3], rotates[15]); b[ 4] -= b[ 3]; + b[ 5] ^= b[ 2]; b[ 5] = right_rot(b[ 5], rotates[14]); b[ 2] -= b[ 5]; + b[ 7] ^= b[ 0]; b[ 7] = right_rot(b[ 7], rotates[13]); b[ 0] -= b[ 7]; + b[ 1] ^= b[ 6]; b[ 1] = right_rot(b[ 1], rotates[12]); b[ 6] -= b[ 1]; + case 3: + b[ 7] ^= b[ 2]; b[ 7] = right_rot(b[ 7], rotates[11]); b[ 2] -= b[ 7]; + b[ 5] ^= b[ 0]; b[ 5] = right_rot(b[ 5], rotates[10]); b[ 0] -= b[ 5]; + b[ 3] ^= b[ 6]; b[ 3] = right_rot(b[ 3], rotates[ 9]); b[ 6] -= b[ 3]; + b[ 1] ^= b[ 4]; b[ 1] = right_rot(b[ 1], rotates[ 8]); b[ 4] -= b[ 1]; + case 2: + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[ 7]); b[ 0] -= b[ 3]; + b[ 5] ^= b[ 6]; b[ 5] = right_rot(b[ 5], rotates[ 6]); b[ 6] -= b[ 5]; + b[ 7] ^= b[ 4]; b[ 7] = right_rot(b[ 7], rotates[ 5]); b[ 4] -= b[ 7]; + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[ 4]); b[ 2] -= b[ 1]; + case 1: + b[ 7] ^= b[ 6]; b[ 7] = right_rot(b[ 7], rotates[ 3]); b[ 6] -= b[ 7]; + b[ 5] ^= b[ 4]; b[ 5] = right_rot(b[ 5], rotates[ 2]); b[ 4] -= b[ 5]; + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[ 1]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[ 0]); b[ 0] -= b[ 1]; + } + } + } + +void rev_cycle_4(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[15]); b[ 2] -= b[ 1]; + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[14]); b[ 0] -= b[ 3]; + case 7: + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[13]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[12]); b[ 0] -= b[ 1]; + case 6: + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[11]); b[ 2] -= b[ 1]; + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[10]); b[ 0] -= b[ 3]; + case 5: + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[ 9]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[ 8]); b[ 0] -= b[ 1]; + case 4: + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[ 7]); b[ 2] -= b[ 1]; + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[ 6]); b[ 0] -= b[ 3]; + case 3: + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[ 5]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[ 4]); b[ 0] -= b[ 1]; + case 2: + b[ 1] ^= b[ 2]; b[ 1] = right_rot(b[ 1], rotates[ 3]); b[ 2] -= b[ 1]; + b[ 3] ^= b[ 0]; b[ 3] = right_rot(b[ 3], rotates[ 2]); b[ 0] -= b[ 3]; + case 1: + b[ 3] ^= b[ 2]; b[ 3] = right_rot(b[ 3], rotates[ 1]); b[ 2] -= b[ 3]; + b[ 1] ^= b[ 0]; b[ 1] = right_rot(b[ 1], rotates[ 0]); b[ 0] -= b[ 1]; + } + } + } + +#ifdef TEST_OR /* enable this to simplify testing, since OR is not invertible */ +#define AddOp(I,J) b[I] += b[J] +#define SubOp(I,J) b[I] -= b[J] +#define XorOp(I,J) b[I] ^= b[J] +#else /* this is the "real" OR version */ +#define AddOp(I,J) b[I] |= b[J] +#define SubOp(I,J) b[I] |= b[J] +#define XorOp(I,J) b[I] |= b[J] +#endif + +/* "OR" versions of the cipher: replace ADD, XOR with OR */ +void fwd_cycle_16_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[ 2]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[ 3]); XorOp( 7, 6); + AddOp( 8, 9); b[ 9] = left_rot(b[ 9], rotates[ 4]); XorOp( 9, 8); + AddOp(10,11); b[11] = left_rot(b[11], rotates[ 5]); XorOp(11,10); + AddOp(12,13); b[13] = left_rot(b[13], rotates[ 6]); XorOp(13,12); + AddOp(14,15); b[15] = left_rot(b[15], rotates[ 7]); XorOp(15,14); + if (rounds == 1) break; + + AddOp( 0, 9); b[ 9] = left_rot(b[ 9], rotates[ 8]); XorOp( 9, 0); + AddOp( 2,13); b[13] = left_rot(b[13], rotates[ 9]); XorOp(13, 2); + AddOp( 6,11); b[11] = left_rot(b[11], rotates[10]); XorOp(11, 6); + AddOp( 4,15); b[15] = left_rot(b[15], rotates[11]); XorOp(15, 4); + AddOp(10, 7); b[ 7] = left_rot(b[ 7], rotates[12]); XorOp( 7,10); + AddOp(12, 3); b[ 3] = left_rot(b[ 3], rotates[13]); XorOp( 3,12); + AddOp(14, 5); b[ 5] = left_rot(b[ 5], rotates[14]); XorOp( 5,14); + AddOp( 8, 1); b[ 1] = left_rot(b[ 1], rotates[15]); XorOp( 1, 8); + if (rounds == 2) break; + + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[16]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[17]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[18]); XorOp( 3, 4); + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[19]); XorOp( 1, 6); + AddOp(12,15); b[15] = left_rot(b[15], rotates[20]); XorOp(15,12); + AddOp(14,13); b[13] = left_rot(b[13], rotates[21]); XorOp(13,14); + AddOp( 8,11); b[11] = left_rot(b[11], rotates[22]); XorOp(11, 8); + AddOp(10, 9); b[ 9] = left_rot(b[ 9], rotates[23]); XorOp( 9,10); + if (rounds == 3) break; + + AddOp( 0,15); b[15] = left_rot(b[15], rotates[24]); XorOp(15, 0); + AddOp( 2,11); b[11] = left_rot(b[11], rotates[25]); XorOp(11, 2); + AddOp( 6,13); b[13] = left_rot(b[13], rotates[26]); XorOp(13, 6); + AddOp( 4, 9); b[ 9] = left_rot(b[ 9], rotates[27]); XorOp( 9, 4); + AddOp(14, 1); b[ 1] = left_rot(b[ 1], rotates[28]); XorOp( 1,14); + AddOp( 8, 5); b[ 5] = left_rot(b[ 5], rotates[29]); XorOp( 5, 8); + AddOp(10, 3); b[ 3] = left_rot(b[ 3], rotates[30]); XorOp( 3,10); + AddOp(12, 7); b[ 7] = left_rot(b[ 7], rotates[31]); XorOp( 7,12); + if (rounds == 4) break; + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[32]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[33]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[34]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[35]); XorOp( 7, 6); + AddOp( 8, 9); b[ 9] = left_rot(b[ 9], rotates[36]); XorOp( 9, 8); + AddOp(10,11); b[11] = left_rot(b[11], rotates[37]); XorOp(11,10); + AddOp(12,13); b[13] = left_rot(b[13], rotates[38]); XorOp(13,12); + AddOp(14,15); b[15] = left_rot(b[15], rotates[39]); XorOp(15,14); + if (rounds == 5) break; + + AddOp( 0, 9); b[ 9] = left_rot(b[ 9], rotates[40]); XorOp( 9, 0); + AddOp( 2,13); b[13] = left_rot(b[13], rotates[41]); XorOp(13, 2); + AddOp( 6,11); b[11] = left_rot(b[11], rotates[42]); XorOp(11, 6); + AddOp( 4,15); b[15] = left_rot(b[15], rotates[43]); XorOp(15, 4); + AddOp(10, 7); b[ 7] = left_rot(b[ 7], rotates[44]); XorOp( 7,10); + AddOp(12, 3); b[ 3] = left_rot(b[ 3], rotates[45]); XorOp( 3,12); + AddOp(14, 5); b[ 5] = left_rot(b[ 5], rotates[46]); XorOp( 5,14); + AddOp( 8, 1); b[ 1] = left_rot(b[ 1], rotates[47]); XorOp( 1, 8); + if (rounds == 6) break; + + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[48]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[49]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[50]); XorOp( 3, 4); + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[51]); XorOp( 1, 6); + AddOp(12,15); b[15] = left_rot(b[15], rotates[52]); XorOp(15,12); + AddOp(14,13); b[13] = left_rot(b[13], rotates[53]); XorOp(13,14); + AddOp( 8,11); b[11] = left_rot(b[11], rotates[54]); XorOp(11, 8); + AddOp(10, 9); b[ 9] = left_rot(b[ 9], rotates[55]); XorOp( 9,10); + if (rounds == 7) break; + + AddOp( 0,15); b[15] = left_rot(b[15], rotates[56]); XorOp(15, 0); + AddOp( 2,11); b[11] = left_rot(b[11], rotates[57]); XorOp(11, 2); + AddOp( 6,13); b[13] = left_rot(b[13], rotates[58]); XorOp(13, 6); + AddOp( 4, 9); b[ 9] = left_rot(b[ 9], rotates[59]); XorOp( 9, 4); + AddOp(14, 1); b[ 1] = left_rot(b[ 1], rotates[60]); XorOp( 1,14); + AddOp( 8, 5); b[ 5] = left_rot(b[ 5], rotates[61]); XorOp( 5, 8); + AddOp(10, 3); b[ 3] = left_rot(b[ 3], rotates[62]); XorOp( 3,10); + AddOp(12, 7); b[ 7] = left_rot(b[ 7], rotates[63]); XorOp( 7,12); + } + } + +void fwd_cycle_8_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[ 2]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[ 3]); XorOp( 7, 6); + if (rounds == 1) break; + + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 4]); XorOp( 1, 2); + AddOp( 4, 7); b[ 7] = left_rot(b[ 7], rotates[ 5]); XorOp( 7, 4); + AddOp( 6, 5); b[ 5] = left_rot(b[ 5], rotates[ 6]); XorOp( 5, 6); + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 7]); XorOp( 3, 0); + if (rounds == 2) break; + + AddOp( 4, 1); b[ 1] = left_rot(b[ 1], rotates[ 8]); XorOp( 1, 4); + AddOp( 6, 3); b[ 3] = left_rot(b[ 3], rotates[ 9]); XorOp( 3, 6); + AddOp( 0, 5); b[ 5] = left_rot(b[ 5], rotates[10]); XorOp( 5, 0); + AddOp( 2, 7); b[ 7] = left_rot(b[ 7], rotates[11]); XorOp( 7, 2); + if (rounds == 3) break; + + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[12]); XorOp( 1, 6); + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[13]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[14]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[15]); XorOp( 3, 4); + if (rounds == 4) break; + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[16]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[17]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[18]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[19]); XorOp( 7, 6); + if (rounds == 5) break; + + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[20]); XorOp( 1, 2); + AddOp( 4, 7); b[ 7] = left_rot(b[ 7], rotates[21]); XorOp( 7, 4); + AddOp( 6, 5); b[ 5] = left_rot(b[ 5], rotates[22]); XorOp( 5, 6); + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[23]); XorOp( 3, 0); + if (rounds == 6) break; + + AddOp( 4, 1); b[ 1] = left_rot(b[ 1], rotates[24]); XorOp( 1, 4); + AddOp( 6, 3); b[ 3] = left_rot(b[ 3], rotates[25]); XorOp( 3, 6); + AddOp( 0, 5); b[ 5] = left_rot(b[ 5], rotates[26]); XorOp( 5, 0); + AddOp( 2, 7); b[ 7] = left_rot(b[ 7], rotates[27]); XorOp( 7, 2); + if (rounds == 7) break; + + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[28]); XorOp( 1, 6); + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[29]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[30]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[31]); XorOp( 3, 4); + } + } + +void fwd_cycle_4_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds -=8) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + if (rounds == 1) break; + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 2]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 3]); XorOp( 1, 2); + if (rounds == 2) break; + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 4]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 5]); XorOp( 3, 2); + if (rounds == 3) break; + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 6]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 7]); XorOp( 1, 2); + if (rounds == 4) break; + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 8]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 9]); XorOp( 3, 2); + if (rounds == 5) break; + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[10]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[11]); XorOp( 1, 2); + if (rounds == 6) break; + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[12]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[13]); XorOp( 3, 2); + if (rounds == 7) break; + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[14]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[15]); XorOp( 1, 2); + } + } + +/* reverse versions of the cipher, using OR */ +void rev_cycle_16_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + XorOp( 7,12); b[ 7] = right_rot(b[ 7], rotates[63]); SubOp(12, 7); + XorOp( 3,10); b[ 3] = right_rot(b[ 3], rotates[62]); SubOp(10, 3); + XorOp( 5, 8); b[ 5] = right_rot(b[ 5], rotates[61]); SubOp( 8, 5); + XorOp( 1,14); b[ 1] = right_rot(b[ 1], rotates[60]); SubOp(14, 1); + XorOp( 9, 4); b[ 9] = right_rot(b[ 9], rotates[59]); SubOp( 4, 9); + XorOp(13, 6); b[13] = right_rot(b[13], rotates[58]); SubOp( 6,13); + XorOp(11, 2); b[11] = right_rot(b[11], rotates[57]); SubOp( 2,11); + XorOp(15, 0); b[15] = right_rot(b[15], rotates[56]); SubOp( 0,15); + case 7: + XorOp( 9,10); b[ 9] = right_rot(b[ 9], rotates[55]); SubOp(10, 9); + XorOp(11, 8); b[11] = right_rot(b[11], rotates[54]); SubOp( 8,11); + XorOp(13,14); b[13] = right_rot(b[13], rotates[53]); SubOp(14,13); + XorOp(15,12); b[15] = right_rot(b[15], rotates[52]); SubOp(12,15); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[51]); SubOp( 6, 1); + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[50]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[49]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[48]); SubOp( 0, 7); + case 6: + XorOp( 1, 8); b[ 1] = right_rot(b[ 1], rotates[47]); SubOp( 8, 1); + XorOp( 5,14); b[ 5] = right_rot(b[ 5], rotates[46]); SubOp(14, 5); + XorOp( 3,12); b[ 3] = right_rot(b[ 3], rotates[45]); SubOp(12, 3); + XorOp( 7,10); b[ 7] = right_rot(b[ 7], rotates[44]); SubOp(10, 7); + XorOp(15, 4); b[15] = right_rot(b[15], rotates[43]); SubOp( 4,15); + XorOp(11, 6); b[11] = right_rot(b[11], rotates[42]); SubOp( 6,11); + XorOp(13, 2); b[13] = right_rot(b[13], rotates[41]); SubOp( 2,13); + XorOp( 9, 0); b[ 9] = right_rot(b[ 9], rotates[40]); SubOp( 0, 9); + case 5: + XorOp(15,14); b[15] = right_rot(b[15], rotates[39]); SubOp(14,15); + XorOp(13,12); b[13] = right_rot(b[13], rotates[38]); SubOp(12,13); + XorOp(11,10); b[11] = right_rot(b[11], rotates[37]); SubOp(10,11); + XorOp( 9, 8); b[ 9] = right_rot(b[ 9], rotates[36]); SubOp( 8, 9); + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[35]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[34]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[33]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[32]); SubOp( 0, 1); + case 4: + XorOp( 7,12); b[ 7] = right_rot(b[ 7], rotates[31]); SubOp(12, 7); + XorOp( 3,10); b[ 3] = right_rot(b[ 3], rotates[30]); SubOp(10, 3); + XorOp( 5, 8); b[ 5] = right_rot(b[ 5], rotates[29]); SubOp( 8, 5); + XorOp( 1,14); b[ 1] = right_rot(b[ 1], rotates[28]); SubOp(14, 1); + XorOp( 9, 4); b[ 9] = right_rot(b[ 9], rotates[27]); SubOp( 4, 9); + XorOp(13, 6); b[13] = right_rot(b[13], rotates[26]); SubOp( 6,13); + XorOp(11, 2); b[11] = right_rot(b[11], rotates[25]); SubOp( 2,11); + XorOp(15, 0); b[15] = right_rot(b[15], rotates[24]); SubOp( 0,15); + case 3: + XorOp( 9,10); b[ 9] = right_rot(b[ 9], rotates[23]); SubOp(10, 9); + XorOp(11, 8); b[11] = right_rot(b[11], rotates[22]); SubOp( 8,11); + XorOp(13,14); b[13] = right_rot(b[13], rotates[21]); SubOp(14,13); + XorOp(15,12); b[15] = right_rot(b[15], rotates[20]); SubOp(12,15); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[19]); SubOp( 6, 1); + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[18]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[17]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[16]); SubOp( 0, 7); + case 2: + XorOp( 1, 8); b[ 1] = right_rot(b[ 1], rotates[15]); SubOp( 8, 1); + XorOp( 5,14); b[ 5] = right_rot(b[ 5], rotates[14]); SubOp(14, 5); + XorOp( 3,12); b[ 3] = right_rot(b[ 3], rotates[13]); SubOp(12, 3); + XorOp( 7,10); b[ 7] = right_rot(b[ 7], rotates[12]); SubOp(10, 7); + XorOp(15, 4); b[15] = right_rot(b[15], rotates[11]); SubOp( 4,15); + XorOp(11, 6); b[11] = right_rot(b[11], rotates[10]); SubOp( 6,11); + XorOp(13, 2); b[13] = right_rot(b[13], rotates[ 9]); SubOp( 2,13); + XorOp( 9, 0); b[ 9] = right_rot(b[ 9], rotates[ 8]); SubOp( 0, 9); + case 1: + XorOp(15,14); b[15] = right_rot(b[15], rotates[ 7]); SubOp(14,15); + XorOp(13,12); b[13] = right_rot(b[13], rotates[ 6]); SubOp(12,13); + XorOp(11,10); b[11] = right_rot(b[11], rotates[ 5]); SubOp(10,11); + XorOp( 9, 8); b[ 9] = right_rot(b[ 9], rotates[ 4]); SubOp( 8, 9); + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[ 3]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[ 2]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + + } + } + +void rev_cycle_8_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[31]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[30]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[29]); SubOp( 0, 7); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[28]); SubOp( 6, 1); + case 7: + XorOp( 7, 2); b[ 7] = right_rot(b[ 7], rotates[27]); SubOp( 2, 7); + XorOp( 5, 0); b[ 5] = right_rot(b[ 5], rotates[26]); SubOp( 0, 5); + XorOp( 3, 6); b[ 3] = right_rot(b[ 3], rotates[25]); SubOp( 6, 3); + XorOp( 1, 4); b[ 1] = right_rot(b[ 1], rotates[24]); SubOp( 4, 1); + case 6: + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[23]); SubOp( 0, 3); + XorOp( 5, 6); b[ 5] = right_rot(b[ 5], rotates[22]); SubOp( 6, 5); + XorOp( 7, 4); b[ 7] = right_rot(b[ 7], rotates[21]); SubOp( 4, 7); + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[20]); SubOp( 2, 1); + case 5: + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[19]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[18]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[17]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[16]); SubOp( 0, 1); + case 4: + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[15]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[14]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[13]); SubOp( 0, 7); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[12]); SubOp( 6, 1); + case 3: + XorOp( 7, 2); b[ 7] = right_rot(b[ 7], rotates[11]); SubOp( 2, 7); + XorOp( 5, 0); b[ 5] = right_rot(b[ 5], rotates[10]); SubOp( 0, 5); + XorOp( 3, 6); b[ 3] = right_rot(b[ 3], rotates[ 9]); SubOp( 6, 3); + XorOp( 1, 4); b[ 1] = right_rot(b[ 1], rotates[ 8]); SubOp( 4, 1); + case 2: + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 7]); SubOp( 0, 3); + XorOp( 5, 6); b[ 5] = right_rot(b[ 5], rotates[ 6]); SubOp( 6, 5); + XorOp( 7, 4); b[ 7] = right_rot(b[ 7], rotates[ 5]); SubOp( 4, 7); + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 4]); SubOp( 2, 1); + case 1: + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[ 3]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[ 2]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + } + } + +void rev_cycle_4_or(Word *b, const u08b *rotates, int rounds) + { + for (;rounds > 0;rounds = (rounds-1) & ~7) + { + switch (rounds & 7) + { + case 0: + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[15]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[14]); SubOp( 0, 3); + case 7: + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[13]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[12]); SubOp( 0, 1); + case 6: + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[11]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[10]); SubOp( 0, 3); + case 5: + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 9]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 8]); SubOp( 0, 1); + case 4: + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 7]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 6]); SubOp( 0, 3); + case 3: + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 5]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 4]); SubOp( 0, 1); + case 2: + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 3]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 2]); SubOp( 0, 3); + case 1: + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + } + } + +/* optimized versions for default round counts */ +#if defined(__BORLANDC__) +#pragma argsused +#elif defined(_MSC_VER) +#pragma warning(disable:4100) +#endif +void fwd_cycle_16_or_r9(Word *b, const u08b *rotates, int rounds) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[ 2]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[ 3]); XorOp( 7, 6); + AddOp( 8, 9); b[ 9] = left_rot(b[ 9], rotates[ 4]); XorOp( 9, 8); + AddOp(10,11); b[11] = left_rot(b[11], rotates[ 5]); XorOp(11,10); + AddOp(12,13); b[13] = left_rot(b[13], rotates[ 6]); XorOp(13,12); + AddOp(14,15); b[15] = left_rot(b[15], rotates[ 7]); XorOp(15,14); + + AddOp( 0, 9); b[ 9] = left_rot(b[ 9], rotates[ 8]); XorOp( 9, 0); + AddOp( 2,13); b[13] = left_rot(b[13], rotates[ 9]); XorOp(13, 2); + AddOp( 6,11); b[11] = left_rot(b[11], rotates[10]); XorOp(11, 6); + AddOp( 4,15); b[15] = left_rot(b[15], rotates[11]); XorOp(15, 4); + AddOp(10, 7); b[ 7] = left_rot(b[ 7], rotates[12]); XorOp( 7,10); + AddOp(12, 3); b[ 3] = left_rot(b[ 3], rotates[13]); XorOp( 3,12); + AddOp(14, 5); b[ 5] = left_rot(b[ 5], rotates[14]); XorOp( 5,14); + AddOp( 8, 1); b[ 1] = left_rot(b[ 1], rotates[15]); XorOp( 1, 8); + + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[16]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[17]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[18]); XorOp( 3, 4); + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[19]); XorOp( 1, 6); + AddOp(12,15); b[15] = left_rot(b[15], rotates[20]); XorOp(15,12); + AddOp(14,13); b[13] = left_rot(b[13], rotates[21]); XorOp(13,14); + AddOp( 8,11); b[11] = left_rot(b[11], rotates[22]); XorOp(11, 8); + AddOp(10, 9); b[ 9] = left_rot(b[ 9], rotates[23]); XorOp( 9,10); + + AddOp( 0,15); b[15] = left_rot(b[15], rotates[24]); XorOp(15, 0); + AddOp( 2,11); b[11] = left_rot(b[11], rotates[25]); XorOp(11, 2); + AddOp( 6,13); b[13] = left_rot(b[13], rotates[26]); XorOp(13, 6); + AddOp( 4, 9); b[ 9] = left_rot(b[ 9], rotates[27]); XorOp( 9, 4); + AddOp(14, 1); b[ 1] = left_rot(b[ 1], rotates[28]); XorOp( 1,14); + AddOp( 8, 5); b[ 5] = left_rot(b[ 5], rotates[29]); XorOp( 5, 8); + AddOp(10, 3); b[ 3] = left_rot(b[ 3], rotates[30]); XorOp( 3,10); + AddOp(12, 7); b[ 7] = left_rot(b[ 7], rotates[31]); XorOp( 7,12); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[32]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[33]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[34]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[35]); XorOp( 7, 6); + AddOp( 8, 9); b[ 9] = left_rot(b[ 9], rotates[36]); XorOp( 9, 8); + AddOp(10,11); b[11] = left_rot(b[11], rotates[37]); XorOp(11,10); + AddOp(12,13); b[13] = left_rot(b[13], rotates[38]); XorOp(13,12); + AddOp(14,15); b[15] = left_rot(b[15], rotates[39]); XorOp(15,14); + + AddOp( 0, 9); b[ 9] = left_rot(b[ 9], rotates[40]); XorOp( 9, 0); + AddOp( 2,13); b[13] = left_rot(b[13], rotates[41]); XorOp(13, 2); + AddOp( 6,11); b[11] = left_rot(b[11], rotates[42]); XorOp(11, 6); + AddOp( 4,15); b[15] = left_rot(b[15], rotates[43]); XorOp(15, 4); + AddOp(10, 7); b[ 7] = left_rot(b[ 7], rotates[44]); XorOp( 7,10); + AddOp(12, 3); b[ 3] = left_rot(b[ 3], rotates[45]); XorOp( 3,12); + AddOp(14, 5); b[ 5] = left_rot(b[ 5], rotates[46]); XorOp( 5,14); + AddOp( 8, 1); b[ 1] = left_rot(b[ 1], rotates[47]); XorOp( 1, 8); + + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[48]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[49]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[50]); XorOp( 3, 4); + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[51]); XorOp( 1, 6); + AddOp(12,15); b[15] = left_rot(b[15], rotates[52]); XorOp(15,12); + AddOp(14,13); b[13] = left_rot(b[13], rotates[53]); XorOp(13,14); + AddOp( 8,11); b[11] = left_rot(b[11], rotates[54]); XorOp(11, 8); + AddOp(10, 9); b[ 9] = left_rot(b[ 9], rotates[55]); XorOp( 9,10); + + AddOp( 0,15); b[15] = left_rot(b[15], rotates[56]); XorOp(15, 0); + AddOp( 2,11); b[11] = left_rot(b[11], rotates[57]); XorOp(11, 2); + AddOp( 6,13); b[13] = left_rot(b[13], rotates[58]); XorOp(13, 6); + AddOp( 4, 9); b[ 9] = left_rot(b[ 9], rotates[59]); XorOp( 9, 4); + AddOp(14, 1); b[ 1] = left_rot(b[ 1], rotates[60]); XorOp( 1,14); + AddOp( 8, 5); b[ 5] = left_rot(b[ 5], rotates[61]); XorOp( 5, 8); + AddOp(10, 3); b[ 3] = left_rot(b[ 3], rotates[62]); XorOp( 3,10); + AddOp(12, 7); b[ 7] = left_rot(b[ 7], rotates[63]); XorOp( 7,12); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[ 2]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[ 3]); XorOp( 7, 6); + AddOp( 8, 9); b[ 9] = left_rot(b[ 9], rotates[ 4]); XorOp( 9, 8); + AddOp(10,11); b[11] = left_rot(b[11], rotates[ 5]); XorOp(11,10); + AddOp(12,13); b[13] = left_rot(b[13], rotates[ 6]); XorOp(13,12); + AddOp(14,15); b[15] = left_rot(b[15], rotates[ 7]); XorOp(15,14); + } + +#if defined(__BORLANDC__) +#pragma argsused +#endif +void fwd_cycle_8_or_r8(Word *b, const u08b *rotates, int rounds) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[ 2]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[ 3]); XorOp( 7, 6); + + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 4]); XorOp( 1, 2); + AddOp( 4, 7); b[ 7] = left_rot(b[ 7], rotates[ 5]); XorOp( 7, 4); + AddOp( 6, 5); b[ 5] = left_rot(b[ 5], rotates[ 6]); XorOp( 5, 6); + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 7]); XorOp( 3, 0); + + AddOp( 4, 1); b[ 1] = left_rot(b[ 1], rotates[ 8]); XorOp( 1, 4); + AddOp( 6, 3); b[ 3] = left_rot(b[ 3], rotates[ 9]); XorOp( 3, 6); + AddOp( 0, 5); b[ 5] = left_rot(b[ 5], rotates[10]); XorOp( 5, 0); + AddOp( 2, 7); b[ 7] = left_rot(b[ 7], rotates[11]); XorOp( 7, 2); + + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[12]); XorOp( 1, 6); + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[13]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[14]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[15]); XorOp( 3, 4); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[16]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[17]); XorOp( 3, 2); + AddOp( 4, 5); b[ 5] = left_rot(b[ 5], rotates[18]); XorOp( 5, 4); + AddOp( 6, 7); b[ 7] = left_rot(b[ 7], rotates[19]); XorOp( 7, 6); + + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[20]); XorOp( 1, 2); + AddOp( 4, 7); b[ 7] = left_rot(b[ 7], rotates[21]); XorOp( 7, 4); + AddOp( 6, 5); b[ 5] = left_rot(b[ 5], rotates[22]); XorOp( 5, 6); + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[23]); XorOp( 3, 0); + + AddOp( 4, 1); b[ 1] = left_rot(b[ 1], rotates[24]); XorOp( 1, 4); + AddOp( 6, 3); b[ 3] = left_rot(b[ 3], rotates[25]); XorOp( 3, 6); + AddOp( 0, 5); b[ 5] = left_rot(b[ 5], rotates[26]); XorOp( 5, 0); + AddOp( 2, 7); b[ 7] = left_rot(b[ 7], rotates[27]); XorOp( 7, 2); + + AddOp( 6, 1); b[ 1] = left_rot(b[ 1], rotates[28]); XorOp( 1, 6); + AddOp( 0, 7); b[ 7] = left_rot(b[ 7], rotates[29]); XorOp( 7, 0); + AddOp( 2, 5); b[ 5] = left_rot(b[ 5], rotates[30]); XorOp( 5, 2); + AddOp( 4, 3); b[ 3] = left_rot(b[ 3], rotates[31]); XorOp( 3, 4); + } + +#ifdef __BORLANDC__ +#pragma argsused +#endif +void fwd_cycle_4_or_r8(Word *b, const u08b *rotates, int rounds) + { + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 0]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 1]); XorOp( 3, 2); + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 2]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 3]); XorOp( 1, 2); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 4]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 5]); XorOp( 3, 2); + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[ 6]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[ 7]); XorOp( 1, 2); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[ 8]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[ 9]); XorOp( 3, 2); + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[10]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[11]); XorOp( 1, 2); + + AddOp( 0, 1); b[ 1] = left_rot(b[ 1], rotates[12]); XorOp( 1, 0); + AddOp( 2, 3); b[ 3] = left_rot(b[ 3], rotates[13]); XorOp( 3, 2); + + AddOp( 0, 3); b[ 3] = left_rot(b[ 3], rotates[14]); XorOp( 3, 0); + AddOp( 2, 1); b[ 1] = left_rot(b[ 1], rotates[15]); XorOp( 1, 2); + } + +/* reverse versions of the cipher, using OR, for fixed round numbers */ +#ifdef __BORLANDC__ +#pragma argsused +#endif +void rev_cycle_16_or_r9(Word *b, const u08b *rotates, int rounds) + { + XorOp(15,14); b[15] = right_rot(b[15], rotates[ 7]); SubOp(14,15); + XorOp(13,12); b[13] = right_rot(b[13], rotates[ 6]); SubOp(12,13); + XorOp(11,10); b[11] = right_rot(b[11], rotates[ 5]); SubOp(10,11); + XorOp( 9, 8); b[ 9] = right_rot(b[ 9], rotates[ 4]); SubOp( 8, 9); + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[ 3]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[ 2]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + + XorOp( 7,12); b[ 7] = right_rot(b[ 7], rotates[63]); SubOp(12, 7); + XorOp( 3,10); b[ 3] = right_rot(b[ 3], rotates[62]); SubOp(10, 3); + XorOp( 5, 8); b[ 5] = right_rot(b[ 5], rotates[61]); SubOp( 8, 5); + XorOp( 1,14); b[ 1] = right_rot(b[ 1], rotates[60]); SubOp(14, 1); + XorOp( 9, 4); b[ 9] = right_rot(b[ 9], rotates[59]); SubOp( 4, 9); + XorOp(13, 6); b[13] = right_rot(b[13], rotates[58]); SubOp( 6,13); + XorOp(11, 2); b[11] = right_rot(b[11], rotates[57]); SubOp( 2,11); + XorOp(15, 0); b[15] = right_rot(b[15], rotates[56]); SubOp( 0,15); + + XorOp( 9,10); b[ 9] = right_rot(b[ 9], rotates[55]); SubOp(10, 9); + XorOp(11, 8); b[11] = right_rot(b[11], rotates[54]); SubOp( 8,11); + XorOp(13,14); b[13] = right_rot(b[13], rotates[53]); SubOp(14,13); + XorOp(15,12); b[15] = right_rot(b[15], rotates[52]); SubOp(12,15); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[51]); SubOp( 6, 1); + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[50]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[49]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[48]); SubOp( 0, 7); + + XorOp( 1, 8); b[ 1] = right_rot(b[ 1], rotates[47]); SubOp( 8, 1); + XorOp( 5,14); b[ 5] = right_rot(b[ 5], rotates[46]); SubOp(14, 5); + XorOp( 3,12); b[ 3] = right_rot(b[ 3], rotates[45]); SubOp(12, 3); + XorOp( 7,10); b[ 7] = right_rot(b[ 7], rotates[44]); SubOp(10, 7); + XorOp(15, 4); b[15] = right_rot(b[15], rotates[43]); SubOp( 4,15); + XorOp(11, 6); b[11] = right_rot(b[11], rotates[42]); SubOp( 6,11); + XorOp(13, 2); b[13] = right_rot(b[13], rotates[41]); SubOp( 2,13); + XorOp( 9, 0); b[ 9] = right_rot(b[ 9], rotates[40]); SubOp( 0, 9); + + XorOp(15,14); b[15] = right_rot(b[15], rotates[39]); SubOp(14,15); + XorOp(13,12); b[13] = right_rot(b[13], rotates[38]); SubOp(12,13); + XorOp(11,10); b[11] = right_rot(b[11], rotates[37]); SubOp(10,11); + XorOp( 9, 8); b[ 9] = right_rot(b[ 9], rotates[36]); SubOp( 8, 9); + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[35]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[34]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[33]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[32]); SubOp( 0, 1); + + XorOp( 7,12); b[ 7] = right_rot(b[ 7], rotates[31]); SubOp(12, 7); + XorOp( 3,10); b[ 3] = right_rot(b[ 3], rotates[30]); SubOp(10, 3); + XorOp( 5, 8); b[ 5] = right_rot(b[ 5], rotates[29]); SubOp( 8, 5); + XorOp( 1,14); b[ 1] = right_rot(b[ 1], rotates[28]); SubOp(14, 1); + XorOp( 9, 4); b[ 9] = right_rot(b[ 9], rotates[27]); SubOp( 4, 9); + XorOp(13, 6); b[13] = right_rot(b[13], rotates[26]); SubOp( 6,13); + XorOp(11, 2); b[11] = right_rot(b[11], rotates[25]); SubOp( 2,11); + XorOp(15, 0); b[15] = right_rot(b[15], rotates[24]); SubOp( 0,15); + + XorOp( 9,10); b[ 9] = right_rot(b[ 9], rotates[23]); SubOp(10, 9); + XorOp(11, 8); b[11] = right_rot(b[11], rotates[22]); SubOp( 8,11); + XorOp(13,14); b[13] = right_rot(b[13], rotates[21]); SubOp(14,13); + XorOp(15,12); b[15] = right_rot(b[15], rotates[20]); SubOp(12,15); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[19]); SubOp( 6, 1); + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[18]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[17]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[16]); SubOp( 0, 7); + + XorOp( 1, 8); b[ 1] = right_rot(b[ 1], rotates[15]); SubOp( 8, 1); + XorOp( 5,14); b[ 5] = right_rot(b[ 5], rotates[14]); SubOp(14, 5); + XorOp( 3,12); b[ 3] = right_rot(b[ 3], rotates[13]); SubOp(12, 3); + XorOp( 7,10); b[ 7] = right_rot(b[ 7], rotates[12]); SubOp(10, 7); + XorOp(15, 4); b[15] = right_rot(b[15], rotates[11]); SubOp( 4,15); + XorOp(11, 6); b[11] = right_rot(b[11], rotates[10]); SubOp( 6,11); + XorOp(13, 2); b[13] = right_rot(b[13], rotates[ 9]); SubOp( 2,13); + XorOp( 9, 0); b[ 9] = right_rot(b[ 9], rotates[ 8]); SubOp( 0, 9); + + XorOp(15,14); b[15] = right_rot(b[15], rotates[ 7]); SubOp(14,15); + XorOp(13,12); b[13] = right_rot(b[13], rotates[ 6]); SubOp(12,13); + XorOp(11,10); b[11] = right_rot(b[11], rotates[ 5]); SubOp(10,11); + XorOp( 9, 8); b[ 9] = right_rot(b[ 9], rotates[ 4]); SubOp( 8, 9); + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[ 3]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[ 2]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + +#ifdef __BORLANDC__ +#pragma argsused +#endif +void rev_cycle_8_or_r8(Word *b, const u08b *rotates, int rounds) + { + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[31]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[30]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[29]); SubOp( 0, 7); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[28]); SubOp( 6, 1); + + XorOp( 7, 2); b[ 7] = right_rot(b[ 7], rotates[27]); SubOp( 2, 7); + XorOp( 5, 0); b[ 5] = right_rot(b[ 5], rotates[26]); SubOp( 0, 5); + XorOp( 3, 6); b[ 3] = right_rot(b[ 3], rotates[25]); SubOp( 6, 3); + XorOp( 1, 4); b[ 1] = right_rot(b[ 1], rotates[24]); SubOp( 4, 1); + + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[23]); SubOp( 0, 3); + XorOp( 5, 6); b[ 5] = right_rot(b[ 5], rotates[22]); SubOp( 6, 5); + XorOp( 7, 4); b[ 7] = right_rot(b[ 7], rotates[21]); SubOp( 4, 7); + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[20]); SubOp( 2, 1); + + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[19]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[18]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[17]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[16]); SubOp( 0, 1); + + XorOp( 3, 4); b[ 3] = right_rot(b[ 3], rotates[15]); SubOp( 4, 3); + XorOp( 5, 2); b[ 5] = right_rot(b[ 5], rotates[14]); SubOp( 2, 5); + XorOp( 7, 0); b[ 7] = right_rot(b[ 7], rotates[13]); SubOp( 0, 7); + XorOp( 1, 6); b[ 1] = right_rot(b[ 1], rotates[12]); SubOp( 6, 1); + + XorOp( 7, 2); b[ 7] = right_rot(b[ 7], rotates[11]); SubOp( 2, 7); + XorOp( 5, 0); b[ 5] = right_rot(b[ 5], rotates[10]); SubOp( 0, 5); + XorOp( 3, 6); b[ 3] = right_rot(b[ 3], rotates[ 9]); SubOp( 6, 3); + XorOp( 1, 4); b[ 1] = right_rot(b[ 1], rotates[ 8]); SubOp( 4, 1); + + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 7]); SubOp( 0, 3); + XorOp( 5, 6); b[ 5] = right_rot(b[ 5], rotates[ 6]); SubOp( 6, 5); + XorOp( 7, 4); b[ 7] = right_rot(b[ 7], rotates[ 5]); SubOp( 4, 7); + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 4]); SubOp( 2, 1); + + XorOp( 7, 6); b[ 7] = right_rot(b[ 7], rotates[ 3]); SubOp( 6, 7); + XorOp( 5, 4); b[ 5] = right_rot(b[ 5], rotates[ 2]); SubOp( 4, 5); + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + +#ifdef __BORLANDC__ +#pragma argsused +#endif +void rev_cycle_4_or_r8(Word *b, const u08b *rotates, int rounds) + { + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[15]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[14]); SubOp( 0, 3); + + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[13]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[12]); SubOp( 0, 1); + + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[11]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[10]); SubOp( 0, 3); + + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 9]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 8]); SubOp( 0, 1); + + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 7]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 6]); SubOp( 0, 3); + + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 5]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 4]); SubOp( 0, 1); + + XorOp( 1, 2); b[ 1] = right_rot(b[ 1], rotates[ 3]); SubOp( 2, 1); + XorOp( 3, 0); b[ 3] = right_rot(b[ 3], rotates[ 2]); SubOp( 0, 3); + + XorOp( 3, 2); b[ 3] = right_rot(b[ 3], rotates[ 1]); SubOp( 2, 3); + XorOp( 1, 0); b[ 1] = right_rot(b[ 1], rotates[ 0]); SubOp( 0, 1); + } + + +/* test that fwd and rev ciphers are truly inverses */ +void InverseChecks(void) + { + uint i,j,k,wCnt,tstCnt; + int r,rN; + Block pt,ct,xt; + u08b rots[MAX_ROTS_PER_CYCLE]; + uint TEST_CNT = (sizeof(size_t) == 8) ? 64 : 8; + + cycle_func *fwd; + cycle_func *rev; + cycle_func *fwd_or; + cycle_func *fwd_or_rN; +#ifdef TEST_OR + cycle_func *rev_or; + cycle_func *rev_or_rN; +#endif + + Rand_Init(0); + for (wCnt=4;wCnt<=MAX_WORDS_PER_BLK;wCnt *= 2) + { + switch (wCnt) + { + case 4: fwd = fwd_cycle_4 ; rev = rev_cycle_4 ; + fwd_or = fwd_cycle_4_or ; fwd_or_rN = fwd_cycle_4_or_r8 ; break; + case 8: fwd = fwd_cycle_8 ; rev = rev_cycle_8 ; + fwd_or = fwd_cycle_8_or ; fwd_or_rN = fwd_cycle_8_or_r8 ; break; + default: fwd = fwd_cycle_16 ; rev = rev_cycle_16 ; + fwd_or = fwd_cycle_16_or ; fwd_or_rN = fwd_cycle_16_or_r9 ; break; + } +#ifdef TEST_OR + switch (wCnt) + { + case 4: rev_or_rN = rev_cycle_4_or_r8 ; rev_or = rev_cycle_4_or ; break; + case 8: rev_or_rN = rev_cycle_8_or_r8 ; rev_or = rev_cycle_8_or ; break; + default: rev_or_rN = rev_cycle_16_or_r9 ; rev_or = rev_cycle_16_or ; break; + } +#endif + for (tstCnt=0;tstCnt> 1) & DUP_64(0x55555555)); + x = (x & DUP_64(0x33333333)) + ((x >> 2) & DUP_64(0x33333333)); + x = (x & DUP_64(0x0F0F0F0F)) + ((x >> 4) & DUP_64(0x0F0F0F0F)); + x = (x & DUP_64(0x00FF00FF)) + ((x >> 8) & DUP_64(0x00FF00FF)); + x = (x & DUP_64(0x0000FFFF)) + ((x >>16) & DUP_64(0x0000FFFF)); + x = (x & DUP_64(0x000000FF)) + ((x >>32) & DUP_64(0x000000FF)); +#else + x = (x & 0x55555555) + ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x & 0x0F0F0F0F) + ((x >> 4) & 0x0F0F0F0F); + x = (x & 0x00FF00FF) + ((x >> 8) & 0x00FF00FF); + x = (x & 0x0000FFFF) + ((x >>16) & 0x000000FF); +#endif + return (uint) x; + } + + +/* use the CRC value as quick ID to help identify/verify rotation sets */ +void Set_CRC(rSearchRec *r) + { +#define CRC_FDBK ((0x04C11DB7u >> 1) ^ 0x80000000u) /* CRC-32-IEEE-802.3 (from Wikipedia) */ + uint i,h=~0u; + + for (i=0;irotList[i]; + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + h = (h & 1) ? (h >> 1) ^ CRC_FDBK : (h >> 1); + } + r->CRC = h; + } + +/* qsort routine for search records: keep in descending order */ +int Compare_SearchRec_Descending(const void *aPtr,const void *bPtr) + { + uint wA = ((const rSearchRec *) aPtr)->rWorst; + uint wB = ((const rSearchRec *) bPtr)->rWorst; + + if (wA < wB) + return +1; + if (wA > wB) + return -1; + else + { /* equal metric. Sort by ID number */ + wA = ((const rSearchRec *) aPtr)->ID; + wB = ((const rSearchRec *) bPtr)->ID; + if (wA < wB) + return -1; + if (wA > wB) + return +1; + return 0; + } + } + +const char *ASCII_TimeDate(void) + { + time_t t; + time(&t); + return ctime(&t); + } + +/* test the rotation set for minimum hamming weight >= minHW */ +/* [try to do it fast: rely on rotational symmetry using OR, */ +/* and do an early exit if hamming weight is too low] */ +int Cycle_Min_HW(uint rounds, const u08b *rotList,uint minHW,uint verMask) + { + uint i,j,v,hw,hMin; + u08b rots[MAX_ROTS_PER_CYCLE]; + Block b; + + hMin = BITS_PER_WORD; + for (v=0;v= rotsPerCycle/2) ? i - rotsPerCycle/2 : i + rotsPerCycle/2]; + } + } + else + memcpy(rots,rotList,rotsPerCycle*sizeof(rots[0])); + for (i=0;i hw) + return 0; /* stop if this isn't good enough */ + if (hMin > hw) /* else keep track of min */ + hMin = hw; + } + } + } + return hMin; + } + +/* compute/set the minimum hamming weight of the rotation set */ +/* [more thorough check than Cycle_Min_HW] */ +uint Set_Min_hw_OR(rSearchRec *r,uint verMask,uint rounds) + { + uint i,j,v,hw,hwMin; + u08b rots[MAX_ROTS_PER_CYCLE]; + Block b; + + Set_CRC(r); + hwMin = BITS_PER_WORD; + for (v=0;vhw_OR[v] = BITS_PER_WORD; + if ((verMask & (1 << v)) == 0) + continue; + if (v & 1) + { /* do it on the "half-cycle" */ + for (i=0;irotList[(i >= rotsPerCycle/2) ? i - rotsPerCycle/2 : i + rotsPerCycle/2]; + } + } + else + memcpy(rots,r->rotList,rotsPerCycle*sizeof(rots[0])); + for (i=0;i hw) + hwMin = hw; + if (r->hw_OR[v] > (u08b) hw) + r->hw_OR[v] = (u08b) hw; + } + } + } + return hwMin; + } + +/* show how the Hamming weight varies as a function of # rounds */ +void Show_HW_rounds(const u08b *rotates) + { + uint i,r,minHW,hw[4]; + + for (r=4;r<12;r++) + { + minHW = bitsPerBlock; + for (i=0;i<4;i++) + { + hw[i]=Cycle_Min_HW(r,rotates,0,1 << i); + if (minHW > hw[i]) + minHW = hw[i]; + } + printf("%2d rounds: minHW = %2d [",r,minHW); + for (i=0;i<4;i++) /* show the different "versions" */ + printf(" %2d",hw[i]); + printf(" ]\n"); + } + } + +/* read rotations value from file */ +const u08b *get_rotation_file(const char *rfName) + { + enum { MAX_LINE = 512 }; + char line[MAX_LINE+4]; + uint i,rotVal; + uint rotShow=0; + static FILE *rf=NULL; + static u08b rotates[MAX_ROTS_PER_CYCLE]; + static uint rotCnt =0; +/**** sample format: ++++++++++++++ Preliminary results: sampleCnt = 1024, block = 256 bits +rMin = 0.425. #079C[*21] [CRC=D89E7C72. hw_OR=62. cnt= 1024. blkSize= 256] + 46 52 + 21 38 + 13 13 + 20 27 + 14 40 + 43 26 + 35 29 + 19 63 +rMin = 0.425. #0646[*17] [CRC=527174F3. hw_OR=61. cnt= 1024. blkSize= 256] + 26 24 + 50 48 + 40 25 + 36 55 + 10 20 + 10 16 + 60 55 + 18 7 +... +****/ + if (rfName[0] == '+') + { + rfName++; + rotShow = 1; + } + if (rf == NULL) + { + rf = fopen(rfName,"rt"); + if (rf == NULL) + { + printf("Unable to open rotation file '%s'",rfName); + exit(2); + } + rotCnt=0; + for (;;) /* skip to "preliminary results" section */ + { + line[0]=0; + if (fgets(line,sizeof(line)-4,rf) == NULL || line[0] == 0) + { + fclose(rf); /* eof --> stop */ + rf = NULL; + return NULL; + } + /* check for the header */ + if (line[0] != '+' || line[1] != '+' || line[2] != '+' || + strstr(line,"reliminary results:") == NULL) + continue; + /* now check for the correct block size */ + for (i=strlen(line);i;i--) /* start at eol and look backwards */ + if (line[i-1] == '=') /* check for '=' sign for block size */ + break; + if (i > 0 && sscanf(line+i,"%u bits",&i) == 1 && i == bitsPerBlock) + break; + } + } + /* now at the rMin line */ + line[0]=0; + if (fgets(line,sizeof(line)-4,rf) == NULL || line[0] == 0 || strncmp(line,"rMin =",6)) + { + fclose(rf); + rf = NULL; + return NULL; + } + + /* now read in all the rotation values */ + for (i=0;i= bitsPerBlock) + { /* Invalid rotation value */ + fclose(rf); + rf = NULL; + return NULL; + } + rotates[i] = (u08b) rotVal; + } + if (fgets(line,sizeof(line)-4,rf) == NULL) /* skip eol */ + { + fclose(rf); + rf = NULL; + } + if (rotShow) + { /* show the hamming weight profile */ + printf("\n:::::::::::\n"); + printf("Rot #%02d [%4d-bit blocks] read from file '%s':\n",rotCnt,bitsPerBlock,rfName); + for (i=0;irWorst = 0; + r->parentCRC = ~0u; + + if (rotFileName) /* get from search results file? */ + { + const u08b *rf = get_rotation_file(rotFileName); + if (rf) + { + for (i=0;irotList[i] = rf[i]; + Set_Min_hw_OR(r,t.rotVerMask,t.rounds); + r->ID = rID++; + return 1; + } + /* here with file exhausted. Keep going with randomized values */ + rotFileName = NULL; /* don't use file any more */ + return 0; + } + for (i=goodRotCnt=0;i= i) /* no dup, value ok, so this value is ok */ + i++; + } + hw = Cycle_Min_HW(t.rounds,rotates,t.minHW_or-t.minOffs,t.rotVerMask); + if (hw == 0) /* did we get close? */ + continue; + rCntOK++; + + hwBase = hw; + if (hw >= t.minHW_or) + if (Cycle_Min_HW(t.maxSatRnds, rotates,0,t.rotVerMask) == BITS_PER_WORD) + { + for (i=0;irotList[i] = rotates[i]; + rScale = 1; /* set up for scaling below */ + } + } + /* use odd scaling for randomly generated rotations */ + for (;rScale < BITS_PER_WORD;) + { + for (i=0;irotList[i] = (rotates[i] * rScale) % BITS_PER_WORD; + if (RotCnt_Bad(r->rotList[i])) + break; + } + rScale+=2; /* bump scale factor for next time */ + if (i >= rotsPerCycle) + { /* all values ok: this one's a keeper */ + Set_Min_hw_OR(r,t.rotVerMask,t.rounds); + r->ID = rID++; + return 1; + } + } + /* Try nearby values to see if hw gets better: monotonic hill climb. */ + /* -- exhaustively try all possible values of pairs of changes */ + for (m=0;m k;q--) /* check for dups in the same round */ + if (rotates[k] == rotates[q]) + break; + if (q > k) + continue; + for (i=m+1;i n;q--) /* check for dups in the same round */ + if (rotates[n] == rotates[q]) + break; + if (q > n) + continue; + k = (t.minHW_or > hwBase) ? t.minHW_or : hwBase; + hw = Cycle_Min_HW(t.rounds,rotates,k,t.rotVerMask); + if (hw > hwBase) + if (Cycle_Min_HW(t.maxSatRnds, rotates,0,t.rotVerMask) == BITS_PER_WORD) + { /* must improve hw to accept this new rotation set */ + assert(hw >= t.minHW_or); + hwBase = hw; + rScale = 3; /* set up for scaling next time */ + for (i=0;irotList[i] = rotates[i]; + Set_Min_hw_OR(r,t.rotVerMask,t.rounds); + r->ID = rID++; + return 1; + } + } + } + } + hwBase = 0; /* back to random */ + } + } + +/* display a search record result */ +void ShowSearchRec(FILE *f,const rSearchRec *r,testParms t,uint showMode,char markCh,uint showNum) + { + uint i,j,n,hwMin; + const char *s; + char fStr[200]; + + hwMin=BITS_PER_WORD; + for (i=0;i (uint) r->hw_OR[i]) + hwMin = (uint) r->hw_OR[i]; + + switch (showMode) + { + case SHOW_ROTS_FINAL: sprintf(fStr,".final:%02d " ,showNum); s = fStr; break; + case SHOW_ROTS_H: s = ".format"; break; + case SHOW_ROTS_PRELIM: s = ".prelim"; break; + default: s = ""; break; + } + + fprintf(f,"rMin = %5.3f.%c [CRC=%08X. parent=%08X. ID=%08X. hw_OR=%2d. cnt=%5d. bits=%4u]%-10s%s%s\n", + r->rWorst/(double)t.sampleCnt,markCh,r->CRC,r->parentCRC,r->ID, + hwMin,t.sampleCnt,bitsPerBlock,s, + (t.tstFlags & TST_FLG_USE_ABS)?" useAbs":"",(r->ID & ID_RECALC_BIT)?" recalc":"" + ); + + switch (showMode) + { + case SHOW_NONE: + break; + case SHOW_ROTS_H: /* format for "skein.h" */ + for (j=n=0;jrotList[n++]); + } + fprintf(f,"\n"); + } + break; + default: + for (i=0;irotList[i],((i+1)%(wordsPerBlock/2))?"":"\n"); + break; + } + } + +/* compute Skein differentials for a given rotation set */ +uint CheckDifferentials(rSearchRec *r,testParms t) + { + enum { HIST_BINS = 20 }; + + uint i,j,k,v,n,d,dMax,minCnt,maxCnt,vCnt,q; + uint rMin,rMax,hwMin,hwMax,hw,rMinCnt,rMaxCnt,iMin,jMin,iMax,jMax; + uint hist[HIST_BINS+1]; + u08b rots[MAX_ROTS_PER_CYCLE]; + u64b totSum,w,y,z,oMask; + double fSum,fSqr,x,var,denom; + static u64b onesCnt[3][MAX_BITS_PER_BLK][MAX_BITS_PER_BLK/8]; /* pack eight 8-bit counts into each u64b (for speed) */ + u64b *oPtr; + struct + { + Block pt,ct; + } a,b; + + r->rWorst = t.sampleCnt; + dMax = 1u << (t.diffBits & (BITS_PER_WORD-1)); + iMin = jMin = iMax = jMax = bitsPerBlock + 1; + + for (v=vCnt=0;v < MAX_ROT_VER_CNT; v++) + { /* different versions of rotation schedule, including "inverse" cipher */ + if ((t.rotVerMask & (1 << v)) == 0) + continue; + vCnt++; /* number of versions processed */ + if (v & 1) + { /* do it on the "half-cycle" */ + for (i=0;irotList[(i >= rotsPerCycle/2) ? i - rotsPerCycle/2 : i + rotsPerCycle/2]; + } + } + else + memcpy(rots,r->rotList,rotsPerCycle*sizeof(rots[0])); + for (d=1; d < dMax; d+=2) /* multi-bit difference patterns (must start with a '1' bit) */ + { + hwMax=0; + hwMin=bitsPerBlock+1; + memset(onesCnt,0,sizeof(onesCnt)); /* clear stats before starting */ + + oMask = DUP_64(0x01010101); /* mask for adding, 8 bins at a time */ + for (n=1;n<=t.sampleCnt;n++) + { + for (i=0;i> 1) & oMask; oPtr[1] += y; z += y; + y = (w >> 2) & oMask; oPtr[2] += y; z += y; /* do it 8 times to cover all bits in w */ + y = (w >> 3) & oMask; oPtr[3] += y; z += y; + + y = (w >> 4) & oMask; oPtr[4] += y; z += y; + y = (w >> 5) & oMask; oPtr[5] += y; z += y; + y = (w >> 6) & oMask; oPtr[6] += y; z += y; + y = (w >> 7) & oMask; oPtr[7] += y; z += y; + oPtr += 8; + } + /* sum up the total hamming weight bins (very carefully) */ + z = (z & DUP_64(0x00FF00FF)) + ((z >> 8) & DUP_64(0x00FF00FF)); + hw = (uint) (z + (z >> 16) + (z >> 32) + (z >> 48)) & 0xFFFF; + if (hwMin > hw) hwMin = hw; /* update total hw min/max stats */ + if (hwMax < hw) hwMax = hw; + } + if ((n & 0x7F) == 0) + { /* prevent onesCnt[0] overflow by "transferring" MSBs of 8-bit bytes into onesCnt[1] */ + for (i=0;i> 7) & oMask; + onesCnt[0][i][j] &= ~(oMask << 7); + } + if ((n & 0x3FFF) == 0) + { /* propagate overflow into onesCnt[2] (occasionally, as needed) */ + for (i=0;i> 7) & oMask; + onesCnt[1][i][j] &= ~(oMask << 7); + } + } + } + if (n == 32 && d == 1 && (t.tstFlags & TST_FLG_QUICK_EXIT)) + { /* quick exit if not even close to random looking after a few samples */ + for (i=0;i 1) + { /* show why we stopped, if we already showed something */ + printf("%23s/* quick exit: %d/%d */\n","",(uint)onesCnt[0][i][j],n); + } + return r->rWorst = 0; /* not a good result */ + } + } + } + } + /* now process the stats from the samples we just generated */ + assert(t.sampleCnt < (1 << 22)); /* 2**22 is big enough not to worry! */ + memset(hist,0,sizeof(hist)); + fSum = fSqr = 0.0; + denom = 1.0 / (double) t.sampleCnt; + rMin = minCnt = ~0u; + totSum= rMax = rMinCnt = rMaxCnt = maxCnt = 0; + for (i=0;i>= 8,y >>= 8,z >>= 8) + { + q = (uint) ((w & 0xFF) + ((y & 0xFF) << 7) + ((z & 0xFF) << 14)); + if (maxCnt < q) { maxCnt = q; iMax = i; jMax = j; if (rMax < q) { rMax = q; rMaxCnt = 0; } } + if (minCnt > q) { minCnt = q; iMin = i; jMin = j; if (rMin > q) { rMin = q; rMinCnt = 0; } } + if (rMin == minCnt) rMinCnt++; + if (rMax == maxCnt) rMaxCnt++; + if (t.tstFlags & TST_FLG_SHOW) + { /* compute more extensive stats only if showing results below */ + totSum += q; + x = q*denom; /* update stats for stdDev */ + fSum += x; + fSqr += x*x; + hist[(uint)floor(x*HIST_BINS)]++; /* track histogram */ + } + } + } + } + if (t.tstFlags & TST_FLG_USE_ABS && rMin > t.sampleCnt - rMax) + { + rMin = t.sampleCnt - rMax; /* use max variation from 1/2 */ + iMin = iMax; + jMin = jMax; + } + if (r->rWorst > rMin) + { + r->rWorst = rMin; + if (rMin == 0) + { /* if far worse than current best, stop now (to speed up the search) */ + if (t.tstFlags & TST_FLG_SHOW && (d > 1 || vCnt > 1)) /* show why we stopped, if we already showed something */ + printf("%23s/* early exit */\n",""); + return r->rWorst = 0; + } + } + if (t.tstFlags & TST_FLG_SHOW) + { /* show some detailed results of the test */ + if (d == 1) + { /* put out the rotation info the first time thru */ + if ((t.tstFlags & TST_FLG_DO_RAND) == 0) + { + printf("Rotation set [CRC=%08X. hw_OR=%2d. sampleCnt=%5d. block=%4d bits. v=%d]:\n", + r->CRC,r->hw_OR[v],t.sampleCnt,bitsPerBlock,v); + if (vCnt == 0) + for (i=0;irotList[i],((i+1)%(wordsPerBlock/2))?"":"\n"); + } + } + printf("rnds=%2d,cnt=%5d",t.rounds,t.sampleCnt); + x = fSum/(bitsPerBlock*bitsPerBlock); + var= (fSqr/(bitsPerBlock*bitsPerBlock)) - x*x; + printf(" min=%5.3f.[%c] max=%5.3f.[%c] hw=%3d..%3d. avg=%7.5f. std=%6.4f. d=%X. [%3d,%3d]", + rMin*denom,(rMinCnt > 9) ? '+' : '0'+rMinCnt, + rMax*denom,(rMaxCnt > 9) ? '+' : '0'+rMaxCnt, + hwMin,hwMax, + (totSum*denom)/(bitsPerBlock*bitsPerBlock),sqrt(var),(uint)d,iMin,jMin); + if (t.tstFlags & TST_FLG_SHOW_HIST) + { /* very wide histogram display */ + for (i=0;i<=HIST_BINS;i++) + if (hist[i]) + printf(" %7.5f",hist[i]/(double)(bitsPerBlock*bitsPerBlock)); + else + printf(" _ "); + } + if (t.tstFlags & TST_FLG_DO_RAND) + printf(" [RANDOM] "); + printf("\n"); + fflush(stdout); + } + if (t.tstFlags & TST_FLG_DO_RAND) + break; /* no need to do more than one random setting per rotation set */ + } /* for (d=1;drWorst; + } + +/* twiddle a bit with an entry, but keep maxSatRounds satisfied */ +void Twiddle(rSearchRec *r,testParms t) + { + enum { MAX_TWIDDLE_CNT = 100, MAX_ROT_CNT = 6 }; + uint i,j,k,n,v[MAX_ROT_CNT]; + u08b old[MAX_ROT_CNT]; + u64b usedBitmap; + u08b goodRots[BITS_PER_WORD]; + uint goodRotCnt; + + assert(rotsPerCycle <= sizeof(usedBitmap)*8); + r->ID += (1 << TWIDDLE_CNT_BIT0); /* bump count of number of times twiddled */ + r->ID &= ~ID_RECALC_BIT; /* show this one hasn't been had recalc yet */ + r->parentCRC = r->CRC; /* track genealogy */ + + for (i=goodRotCnt=0;i> v[j]) & 1); /* make sure all v[j] values are unique */ + usedBitmap |= (((u64b) 1) << v[j]); + old[j] = r->rotList[v[j]]; /* save current value */ + } + for (k=0;krotList[v[j]] = goodRots[Rand32() % goodRotCnt]; + } /* make sure new rotation value changes */ + while (r->rotList[v[j]] == old[j]); + } + if (Cycle_Min_HW(t.maxSatRnds,r->rotList,0,t.rotVerMask) == BITS_PER_WORD) + { + if (i >= 2 || !(t.tstFlags & TST_FLG_KEEP_MIN_HW) || + Cycle_Min_HW(t.rounds,r->rotList,t.minHW_or,t.rotVerMask) >= (int) t.minHW_or) + { + Set_Min_hw_OR(r,t.rotVerMask,t.rounds); + return; + } + } + for (j=0;jrotList[v[j]] = old[j]; + } + } + /* twiddling failed to produce a valid set (very rare). Select a brand new one */ + get_rotation(r,t); + } + +/* run a full search */ +void RunSearch(testParms t) + { + enum { KEEP_DIV = 16, KEEP_REP = 10, SHOW_CNT = 8 }; + rSearchRec popList[MAX_POP_CNT+2]; + uint i,j,k,n,repCnt,genCnt,keepCnt,prevBest[SHOW_CNT],showMask; + const char *timeStr; + time_t t0,t1; + + Rand_Init(t.seed0 + (((u64b) bitsPerBlock) << 32)); + memset(prevBest,0,sizeof(prevBest)); + + /* now set up the globals according to selected Skein blocksize */ + switch (bitsPerBlock) + { + case 256: + t.genCntMax = (t.genCntMax) ? t.genCntMax : DEFAULT_GEN_CNT_4 ; + t.rounds = (t.rounds) ? t.rounds : DEFAULT_ROUND_CNT_4; + t.minHW_or = (t.minHW_or) ? t.minHW_or : MIN_HW_OR_4; + t.maxSatRnds = (t.maxSatRnds)? t.maxSatRnds : MAX_SAT_ROUNDS_4; + fwd_cycle_or_rN = (t.rounds!=8) ? fwd_cycle_4_or : fwd_cycle_4_or_r8 ; + rev_cycle_or_rN = (t.rounds!=8) ? rev_cycle_4_or : rev_cycle_4_or_r8 ; + fwd_cycle_or = fwd_cycle_4_or; + rev_cycle_or = fwd_cycle_4_or; + fwd_cycle = fwd_cycle_4; + rev_cycle = rev_cycle_4; + showMask = 7; + break; + case 512: + t.genCntMax = (t.genCntMax) ? t.genCntMax : DEFAULT_GEN_CNT_8 ; + t.rounds = (t.rounds) ? t.rounds : DEFAULT_ROUND_CNT_8; + t.minHW_or = (t.minHW_or) ? t.minHW_or : MIN_HW_OR_8; + t.maxSatRnds = (t.maxSatRnds)? t.maxSatRnds : MAX_SAT_ROUNDS_8; + fwd_cycle_or_rN = (t.rounds!=8) ? fwd_cycle_8_or : fwd_cycle_8_or_r8 ; + rev_cycle_or_rN = (t.rounds!=8) ? rev_cycle_8_or : rev_cycle_8_or_r8 ; + fwd_cycle_or = fwd_cycle_8_or; + rev_cycle_or = rev_cycle_8_or; + fwd_cycle = fwd_cycle_8; + rev_cycle = rev_cycle_8; + showMask = 3; + break; + case 1024: + t.genCntMax = (t.genCntMax) ? t.genCntMax : DEFAULT_GEN_CNT_16 ; + t.rounds = (t.rounds) ? t.rounds : DEFAULT_ROUND_CNT_16; + t.minHW_or = (t.minHW_or) ? t.minHW_or : MIN_HW_OR_16; + t.maxSatRnds = (t.maxSatRnds)? t.maxSatRnds : MAX_SAT_ROUNDS_16; + fwd_cycle_or_rN = (t.rounds!=9) ? fwd_cycle_16_or: fwd_cycle_16_or_r9 ; + rev_cycle_or_rN = (t.rounds!=9) ? rev_cycle_16_or: rev_cycle_16_or_r9 ; + fwd_cycle_or = fwd_cycle_16_or; + rev_cycle_or = rev_cycle_16_or; + fwd_cycle = fwd_cycle_16; + rev_cycle = rev_cycle_16; + showMask = 1; + break; + default: + printf("Invalid block size!"); + exit(2); + } + if (t.popCnt > MAX_POP_CNT) + t.popCnt = MAX_POP_CNT; + if (t.popCnt < MIN_POP_CNT) + t.popCnt = MIN_POP_CNT; + wordsPerBlock = bitsPerBlock / BITS_PER_WORD; + rotsPerCycle = (wordsPerBlock / 2) * ROUNDS_PER_CYCLE; + + keepCnt = t.popCnt/KEEP_DIV; + assert(keepCnt*(1+KEEP_REP) <= t.popCnt); + + printf("******************************************************************\n"); + printf("Random seed = %u. BlockSize =%4d bits. sampleCnt =%6d. rounds = %2d. minHW_or=%d. CPU = %d-bit\n", + t.seed0,bitsPerBlock,t.sampleCnt,t.rounds,t.minHW_or,(uint)sizeof(size_t)*8); + printf("Population = %d. keepCnt = %d. repCnt = %d. rest = %d. keepMinHW = %d\n", + t.popCnt,keepCnt,KEEP_REP,t.popCnt-keepCnt*(1+KEEP_REP),(t.tstFlags & TST_FLG_KEEP_MIN_HW)?1:0); + timeStr = ASCII_TimeDate(); + if (t.tstFlags & TST_FLG_STDERR) + { + fprintf(stderr,"Start: %sBlock size = %d bits. popCnt = %d. sampleCnt = %d. keepMinHW = %d", + timeStr,bitsPerBlock,t.popCnt,t.sampleCnt,(t.tstFlags & TST_FLG_KEEP_MIN_HW)?1:0); + if (t.runHours) + fprintf(stderr,". run time = %d hours",t.runHours); + fprintf(stderr,"\n"); + } + else + showMask = 0; + printf("Start: %s \n",timeStr); + time(&t0); + fflush(stdout); + + for (n=0;n= keepCnt) + { + CheckDifferentials(&popList[i],t); + } + else if (i <= keepCnt/2 && (popList[i].ID & ID_RECALC_BIT) == 0) + { /* recalc with bigger sampleCnt for better accuracy */ + t.sampleCnt <<= 2; + CheckDifferentials(&popList[i],t); + t.sampleCnt >>= 2; + popList[i].rWorst = (popList[i].rWorst + 2) / 4; + popList[i].ID |= ID_RECALC_BIT; + } + } + qsort(popList,t.popCnt,sizeof(popList[0]),Compare_SearchRec_Descending); + if (t.genCntMax == 1) + { keepCnt = t.popCnt; break; } /* allow quick processing from file */ + /* now update the population for the next generation */ + n = t.popCnt-1; /* start discarding at the end of the list */ + for (i=0;i= keepCnt); /* sanity check */ + } + } + for (;n>=keepCnt;n--) /* just tweak the rest */ + { + Twiddle(&popList[n],t); + } + time(&t1); + /* show current best */ + if (t.tstFlags & TST_FLG_STDERR) + { /* first to stderr (assuming redirected stdout */ + fprintf(stderr,"\r%4d: ",genCnt+1); + for (i=j=0;i>= n; /* revert to original sampleCnt */ + } + + time(&t1); + printf("End: %s\n",ASCII_TimeDate()); + printf("Elapsed time = %6.3f hours\n\n",(t1-t0)/(double)3600.0); + if (t.tstFlags & TST_FLG_STDERR) + fprintf(stderr,"\r%60s\n",""); /* clear the screen if needed */ + fflush(stdout); + } + +void GiveHelp(void) + { + printf("Usage: skein_rot_search [options/flags]\n" + "Options: -Bnn = set Skein block size in bits (default=512)\n" + " -Cnn = set count of random differentials taken\n" + " -Dnn = set number bits of difference pattern tested (default=1)\n" + " -Gnn = set min invalid rotation value (default 0)\n" + " -Inn = set rotation version mask\n" + " -Onn = set Hamming weight offset\n" + " -Pnn = set population count\n" + " -Rnn = set round count\n" + " -Snn = set initial random seed (0 --> randomize)\n" + " -Tnn = set max time to run (in hours)\n" + " -Wnn = set minimum hamming weight\n" + " -Xnn = set max test rotation count\n" + " -Znn = set max rounds needed for saturation using OR\n" + " @file = read rotations from file\n" + "Flags: -A = use min, not absolute difference\n" + " -E = no stderr output\n" + " -H = show histogram (very wide)\n" + " -K = keep minHW_or during twiddling\n" + " -Q = disable quick exit in search\n" + " -U = weighted repeat count (repeat best more frequently)\n" + " -V = verbose mode\n" + ); + exit(0); + } + +int main(int argc,char *argv[]) + { + uint i,bMin,bMax; + testParms t; + uint chkInv = 1; /* check inverse functions at startup (slow for debbuging) */ + uint goodRot= 2; /* first allowed rotation value (+/-) */ + uint seed = 1; /* 0 = randomize based on time, else use specified seed */ + uint do8 = 0; /* optimize 8-bit CPU performance */ + + t.rounds = 0; /* number of Skein rounds to test */ + t.minHW_or = 0; /* minHW (using OR) required */ + t.minOffs = 4; /* heuristic used to speed up rotation search */ + t.diffBits = 1; /* # consecutive bits of differential inputs tested */ + t.sampleCnt = 1024; /* number of differential pairs tested */ + t.genCntMax = 0; /* number of "generations" tested */ + t.maxSatRnds= 0; /* number of rounds to Hamming weight "saturation" */ + t.rotVerMask= 3; /* mask of which versions to run */ + t.runHours = 0; /* stop searching after this many hours */ + t.dupRotMask= 0; /* default is to allow same rotation value in a round */ + t.regradeCnt= 3; /* how many scaled up counts to try */ + t.popCnt = DEFAULT_POP_CNT; /* size of population */ + t.tstFlags = TST_FLG_STDERR | TST_FLG_VERBOSE | TST_FLG_USE_ABS | TST_FLG_CHECK_ONE; /* default flags */ + + for (i=1;i<(uint)argc;i++) + { /* parse command line args */ + if (argv[i][0] == '?') + GiveHelp(); + else if (argv[i][0] == '-' || argv[i][0] == '+') + { +#define arg_toi(s) atoi(s + ((s[2] == '=') ? 3 : 2)) + switch (toupper(argv[i][1])) + { + case '?': GiveHelp(); break; + + case 'A': t.tstFlags &= ~TST_FLG_USE_ABS; break; + case 'E': t.tstFlags &= ~TST_FLG_STDERR; break; + case 'H': t.tstFlags |= TST_FLG_SHOW_HIST; break; + case 'K': t.tstFlags |= TST_FLG_KEEP_MIN_HW; break; + case 'Q': t.tstFlags |= TST_FLG_QUICK_EXIT; break; + case 'U': t.tstFlags |= TST_FLG_WEIGHT_REP; break; + case 'V': t.tstFlags &= ~TST_FLG_VERBOSE; break; + case '1': t.tstFlags &= ~TST_FLG_CHECK_ONE; break; + + case 'B': bitsPerBlock = arg_toi(argv[i]); break; + case 'C': t.sampleCnt = arg_toi(argv[i]); break; + case 'D': t.diffBits = arg_toi(argv[i]); break; + case 'G': goodRot = arg_toi(argv[i]); break; + case 'I': t.rotVerMask = arg_toi(argv[i]); break; + case 'J': t.regradeCnt = arg_toi(argv[i]); break; + case 'O': t.minOffs = arg_toi(argv[i]); break; + case 'P': t.popCnt = arg_toi(argv[i]); break; + case 'R': t.rounds = arg_toi(argv[i]); break; + case 'S': seed = arg_toi(argv[i]); break; + case 'T': t.runHours = arg_toi(argv[i]); break; + case 'W': t.minHW_or = arg_toi(argv[i]); break; + case 'X': t.genCntMax = arg_toi(argv[i]); break; + case 'Z': t.maxSatRnds = arg_toi(argv[i]); break; + case '2': t.dupRotMask = ~0u; break; + case '0': chkInv = 0; break; + case '8': do8 = 1; break; + + default : printf("Unknown option: %s\n",argv[i]); GiveHelp(); break; + } + } + else if (argv[i][0] == '@') + { + rotFileName = argv[i]+1; + t.genCntMax = 1; /* stop after one generation */ + } + } + + if (chkInv) + InverseChecks(); /* check fwd vs. rev transforms (slow in debugger) */ + + t.goodRotCntMask = 0; + for (i=goodRot; i <= BITS_PER_WORD - goodRot ;i++) + t.goodRotCntMask |= (((u64b) 1) << i); + if (do8) + t.goodRotCntMask = (((u64b) 0x03838383) << 32) | 0x83838380; + + if (bitsPerBlock == 0) + { + printf("Running search for all Skein block sizes (256, 512, and 1024)\n"); + t.rounds = 0; /* use defaults, since otherwise it makes little sense */ + t.minHW_or = 0; + } + + bMin = (bitsPerBlock) ? bitsPerBlock : 256; + bMax = (bitsPerBlock) ? bitsPerBlock : 1024; + + for (bitsPerBlock=bMin;bitsPerBlock<=bMax;bitsPerBlock*=2) + { + t.seed0 = (seed) ? seed : (uint) time(NULL); /* randomize based on time if -s0 is given */ + RunSearch(t); + } + + return 0; + } diff --git a/Additional_Implementations/skein_test.c b/Additional_Implementations/skein_test.c new file mode 100644 index 000000000000..9d999e0d49c0 --- /dev/null +++ b/Additional_Implementations/skein_test.c @@ -0,0 +1,1380 @@ +/*********************************************************************** +** +** Test/verification code for the Skein block functions. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Testing: +** - buffering of incremental calls (random cnt steps) +** - partial input byte handling +** - output sample hash results (for comparison of ref vs. optimized) +** - performance +** +***********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "skein.h" +#include "SHA3api_ref.h" + +static const uint_t HASH_BITS[] = /* list of hash hash lengths to test */ + { 160,224,256,384,512,1024, 256+8,512+8,1024+8,2048+8 }; + +#define HASH_BITS_CNT (sizeof(HASH_BITS)/sizeof(HASH_BITS[0])) + +/* bits of the verbose flag word */ +#define V_KAT_LONG (1u << 0) +#define V_KAT_SHORT (1u << 1) +#define V_KAT_NO_TREE (1u << 2) +#define V_KAT_NO_SEQ (1u << 3) +#define V_KAT_NO_3FISH (1u << 4) +#define V_KAT_DO_3FISH (1u << 5) + +/* automatic compiler version number detection */ +#if !defined(CompilerVersion) + +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +#define CompilerVersion (900) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +#define CompilerVersion (600) +#elif defined(_MSC_VER) && (_MSC_VER >= 1000) +#define CompilerVersion (420) +#elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) +#define CompilerVersion (100*__GNUC__ + 10*__GNUC_MINOR__ + __GNUC_PATCHLEVEL__) +#elif defined(__BORLANDC__) /* this is in hex */ +#define CompilerVersion (100*(__BORLANDC__ >> 8) + 10*((__BORLANDC__ >> 4) & 0xF) + (__BORLANDC__ & 0xF)) +#endif + +#endif + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +/* external functions to determine code size (in bytes) */ +size_t Skein_256_Process_Block_CodeSize(void); +size_t Skein_512_Process_Block_CodeSize(void); +size_t Skein1024_Process_Block_CodeSize(void); +size_t Skein_256_API_CodeSize(void); +size_t Skein_512_API_CodeSize(void); +size_t Skein1024_API_CodeSize(void); +uint_t Skein_256_Unroll_Cnt(void); +uint_t Skein_512_Unroll_Cnt(void); +uint_t Skein1024_Unroll_Cnt(void); +#elif defined(SKEIN_LOOP) +uint_t Skein_256_Unroll_Cnt(void) { return (SKEIN_LOOP / 100) % 10; } +uint_t Skein_512_Unroll_Cnt(void) { return (SKEIN_LOOP / 10) % 10; } +uint_t Skein1024_Unroll_Cnt(void) { return (SKEIN_LOOP ) % 10; } +#else +uint_t Skein_256_Unroll_Cnt(void) { return 0; } +uint_t Skein_512_Unroll_Cnt(void) { return 0; } +uint_t Skein1024_Unroll_Cnt(void) { return 0; } +#endif + +/* External function to process blkCnt (nonzero) full block(s) of data. */ +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); + +/********************** debug i/o helper routines **********************/ +void FatalError(const char *s,...) + { /* print out a msg and exit with an error code */ + va_list ap; + va_start(ap,s); + vprintf(s,ap); + va_end(ap); + printf("\n"); + exit(2); + } + +static uint_t _quiet_ = 0; /* quiet processing? */ +static uint_t verbose = 0; /* verbose flag bits */ +static uint_t katHash = ~0u; /* use as a quick check on KAT results */ + +void ShowBytes(uint_t cnt,const u08b_t *b) + { /* formatted output of byte array */ + uint_t i; + + for (i=0;i < cnt;i++) + { + if (i %16 == 0) printf(" "); + else if (i % 4 == 0) printf(" "); + printf(" %02X",b[i]); + katHash = (katHash ^ b[i]) * 0xDEADBEEF; + katHash = (katHash ^ (katHash >> 23) ^ (katHash >> 17) ^ (katHash >> 9)) * 0xCAFEF00D; + if (i %16 == 15 || i==cnt-1) printf("\n"); + } + } + +#ifndef SKEIN_DEBUG +uint_t skein_DebugFlag = 0; /* dummy flags (if not defined elsewhere) */ +#endif + +#define SKEIN_DEBUG_SHORT (SKEIN_DEBUG_HDR | SKEIN_DEBUG_STATE | SKEIN_DEBUG_TWEAK | SKEIN_DEBUG_KEY | SKEIN_DEBUG_INPUT_08 | SKEIN_DEBUG_FINAL) +#define SKEIN_DEBUG_DEFAULT (SKEIN_DEBUG_SHORT) + +void Show_Debug(const char *s,...) + { + if (skein_DebugFlag) /* are we showing debug info? */ + { + va_list ap; + va_start(ap,s); + vprintf(s,ap); + va_end(ap); + } + } + +/************** Timing routine (for performance measurements) ***********/ +/* unfortunately, this is generally assembly code and not very portable */ + +#if defined(_M_IX86) || defined(__i386) || defined(_i386) || defined(__i386__) || defined(i386) || \ + defined(_X86_) || defined(__x86_64__) || defined(_M_X64) || defined(__x86_64) +#define _Is_X86_ 1 +#endif + +#if defined(_Is_X86_) && (!defined(__STRICT_ANSI__)) && (defined(__GNUC__) || !defined(__STDC__)) && \ + (defined(__BORLANDC__) || defined(_MSC_VER) || defined(__MINGW_H) || defined(__GNUC__)) +#define HI_RES_CLK_OK 1 /* it's ok to use RDTSC opcode */ + +#if defined(_MSC_VER) && defined(_M_X64) +#include +#pragma intrinsic(__rdtsc) +#endif + +#endif + +uint_32t HiResTime(void) + { +#if defined(HI_RES_CLK_OK) + uint_32t x[2]; +#if defined(__BORLANDC__) +#define COMPILER_ID "BCC" + _asm { push edx }; + __emit__(0x0F,0x31); /* RDTSC instruction */ + _asm { pop edx }; + _asm { mov x[0],eax }; +#elif defined(_MSC_VER) +#define COMPILER_ID "MSC" +#if defined(_MSC_VER) && defined(_M_X64) + x[0] = (uint_32t) __rdtsc(); +#else + _asm { push edx }; + _asm { _emit 0fh }; _asm { _emit 031h }; + _asm { pop edx }; + _asm { mov x[0],eax }; +#endif +#elif defined(__MINGW_H) || defined(__GNUC__) +#define COMPILER_ID "GCC" + asm volatile("rdtsc" : "=a"(x[0]), "=d"(x[1])); +#else +#error "HI_RES_CLK_OK -- but no assembler code for this platform (?)" +#endif + return x[0]; +#else + /* avoid annoying MSVC 9.0 compiler warning #4720 in ANSI mode! */ +#if (!defined(_MSC_VER)) || (!defined(__STDC__)) || (_MSC_VER < 1300) + FatalError("No support for RDTSC on this CPU platform\n"); +#endif + return 0; +#endif /* defined(HI_RES_CLK_OK) */ + } + +/******** OS-specific calls for setting priorities and sleeping ******/ +#if (defined(_MSC_VER) && (_MSC_VER >= 1300) && !defined(__STRICT_ANSI__) && !defined(__STDC__)) \ + && defined(_M_X64) +#include +#include + +#ifdef SKEIN_FORCE_LOCK_CPU /* NielsF says this is not a good way to do things */ +#define SKEIN_LOCK_CPU_OK (1) +int Lock_CPU(void) + { /* lock this process to this CPU for perf timing */ + /* -- thanks to Brian Gladman for this code */ + HANDLE ph; + DWORD_PTR afp; + DWORD_PTR afs; + ph = GetCurrentProcess(); + if(GetProcessAffinityMask(ph, &afp, &afs)) + { + afp &= (((size_t)1u) << GetCurrentProcessorNumber()); + if(!SetProcessAffinityMask(ph, afp)) + return 1; + } + else + { + return 2; + } + return 0; /* success */ + } +#endif + +#define _GOT_OS_SLEEP (1) +void OS_Sleep(uint_t msec) + { + Sleep(msec); + } + +#define _GOT_OS_SET_PRIORITY (1) +int OS_Set_High_Priority(void) + { + if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST)) + return 1; +#ifdef SKEIN_LOCK_CPU_OK + if (Lock_CPU()) + return 2; +#endif + return 0; + } + +int OS_Set_Normal_Priority(void) + { + if(!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL)) + return 1; + return 0; + } +#endif + +#if defined(__linux) || defined(__linux__) || defined(linux) || defined(__gnu_linux__) +#include +#define _GOT_OS_SLEEP (1) +void OS_Sleep(uint_t mSec) + { + usleep(mSec*1000); + } +#endif + +#ifndef _GOT_OS_SET_PRIORITY +/* dummy routines if nothing is available */ +int OS_Set_High_Priority(void) + { + return 0; + } +int OS_Set_Normal_Priority(void) + { + return 0; + } +#endif + +#ifndef _GOT_OS_SLEEP +uint_32t OS_Sleep(uint_32t mSec) + { + return mSec; /* avoid compiler warnings */ + } +#endif + +#ifndef COMPILER_ID +#define COMPILER_ID "(unknown)" +#endif +/********************** use RC4 to generate test data ******************/ +/* Note: this works identically on all platforms (big/little-endian) */ +static struct + { + uint_t I,J; /* RC4 vars */ + u08b_t state[256]; + } prng; + +void RandBytes(void *dst,uint_t byteCnt) + { + u08b_t a,b; + u08b_t *d = (u08b_t *) dst; + + for (;byteCnt;byteCnt--,d++) /* run RC4 */ + { + prng.I = (prng.I+1) & 0xFF; + a = prng.state[prng.I]; + prng.J = (prng.J+a) & 0xFF; + b = prng.state[prng.J]; + prng.state[prng.I] = b; + prng.state[prng.J] = a; + *d = prng.state[(a+b) & 0xFF]; + } + } + +/* get a pseudo-random 32-bit integer in a portable way */ +uint_t Rand32(void) + { + uint_t i,n; + u08b_t tmp[4]; + + RandBytes(tmp,sizeof(tmp)); + + for (i=n=0;i> (8*i)); + + /* initialize the permutation */ + for (i=0;i<256;i++) + prng.state[i]=(u08b_t) i; + + /* now run the RC4 key schedule */ + for (i=j=0;i<256;i++) + { + j = (j + prng.state[i] + tmp[i%8]) & 0xFF; + tmp[256] = prng.state[i]; + prng.state[i] = prng.state[j]; + prng.state[j] = tmp[256]; + } + prng.I = prng.J = 0; /* init I,J variables for RC4 */ + + /* discard initial keystream before returning */ + RandBytes(tmp,sizeof(tmp)); + } + +/***********************************************************************/ +/* An AHS-like API that allows explicit setting of block size */ +/* [i.e., the AHS API selects a block size based solely on the ] */ +/* [hash result length, while Skein allows independent hash ] */ +/* [result size and block size ] */ +/***********************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init the context */ +int Skein_Init(int blkSize,hashState *state, int hashbitlen) + { + switch (blkSize) + { + case 256: + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen); + case 512: + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); + case 1024: + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen); + default: + return SKEIN_FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init (extended) the context */ +int Skein_InitExt(int blkSize,hashState *state, int hashbitlen,u64b_t treeInfo,const u08b_t *key,size_t keyBytes) + { + switch (blkSize) + { + case 256: + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_InitExt(&state->u.ctx_256,(size_t) hashbitlen,treeInfo,key,keyBytes); + case 512: + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_InitExt(&state->u.ctx_512,(size_t) hashbitlen,treeInfo,key,keyBytes); + case 1024: + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_InitExt(&state->u.ctx1024,(size_t) hashbitlen,treeInfo,key,keyBytes); + default: + return SKEIN_FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process data to be hashed */ +int Skein_Update(hashState *state, const BitSequence *data, DataLength databitlen) + { + /* only the final Update() call is allowed do partial bytes, else assert an error */ + Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, FAIL); + + if ((databitlen & 7) == 0) + { + switch (state->statebits) + { + case 512: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); + case 256: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3); + case 1024: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3); + default: return SKEIN_FAIL; + } + } + else + { + size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle */ + u08b_t mask,*p; + +#if (!defined(_MSC_VER)) || (MSC_VER >= 1200) /* MSC v4.2 gives (invalid) warning here!! */ + Skein_assert(&state->u.h == &state->u.ctx_256.h); /* sanity checks: allow u.h --> all contexts */ + Skein_assert(&state->u.h == &state->u.ctx_512.h); + Skein_assert(&state->u.h == &state->u.ctx1024.h); +#endif + switch (state->statebits) + { + case 512: Skein_512_Update(&state->u.ctx_512,data,bCnt); + p = state->u.ctx_512.b; + break; + case 256: Skein_256_Update(&state->u.ctx_256,data,bCnt); + p = state->u.ctx_256.b; + break; + case 1024: Skein1024_Update(&state->u.ctx1024,data,bCnt); + p = state->u.ctx1024.b; + break; + default: return FAIL; + } + Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ + /* now "pad" the final partial byte the way NIST likes */ + bCnt = state->u.h.bCnt; /* get the bCnt value (same location for all block sizes) */ + Skein_assert(bCnt != 0); /* internal sanity check: there IS a partial byte in the buffer! */ + mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ + p[bCnt-1] = (u08b_t)((p[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte (in the buffer) */ + + return SUCCESS; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize hash computation and output the result (hashbitlen bits) */ +int Skein_Final(hashState *state, BitSequence *hashval) + { + switch (state->statebits) + { + case 512: return Skein_512_Final(&state->u.ctx_512,hashval); + case 256: return Skein_256_Final(&state->u.ctx_256,hashval); + case 1024: return Skein1024_Final(&state->u.ctx1024,hashval); + default: return SKEIN_FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* all-in-one hash function */ +int Skein_Hash(int blkSize,int hashbitlen, const BitSequence *data, /* all-in-one call */ + DataLength databitlen,BitSequence *hashval) + { + hashState state; + int r = Skein_Init(blkSize,&state,hashbitlen); + if (r == SKEIN_SUCCESS) + { /* these calls do not fail when called properly */ + r = Skein_Update(&state,data,databitlen); + Skein_Final(&state,hashval); + } + return r; + } + +/***********************************************************************/ +/* various self-consistency checks */ +uint_t Skein_Test(uint_t blkSize,uint_t maxLen,uint_t hashLen,uint_t nStep,uint_t oneBlk) + { + enum { MAX_BUF=1024 }; + u08b_t b[MAX_BUF+4],hashVal[2][MAX_BUF+4]; + uint_t i,j,k,n,bCnt,useAHS,step,bitLen,testCnt=0; + hashState s[2]; + + assert(blkSize > 0 && blkSize <= 1024 && (blkSize % 256) == 0); + assert((hashLen % 8) == 0); + + if (maxLen > MAX_BUF*8) /* keep things reasonably small */ + maxLen = MAX_BUF*8; + if (hashLen > MAX_BUF*8) + hashLen = MAX_BUF*8; + if (maxLen == 0) /* default sizes */ + maxLen = blkSize*2; + if (hashLen == 0) + hashLen = blkSize; + + if (oneBlk) + { + if (oneBlk > MAX_BUF*8) + oneBlk = MAX_BUF*8; + for (i=0;i pair */ + FatalError("AHS_API Init() error!"); + skein_DebugFlag = n; /* restore debug display status */ + + useAHS = (s[0].statebits == blkSize); /* does this pair work via AHS_API? */ + + bCnt = (maxLen + 7) / 8; /* convert maxLen to bytes */ + for (n=0;n < bCnt;n+=nStep) /* process all the data lengths (# bytes = n+1)*/ + { + RandBytes(b,maxLen); /* get something to hash */ + for (j=8;j>0;j--) /* j = # bits in final byte */ + { + testCnt++; + memset(hashVal,0,sizeof(hashVal)); + Show_Debug("\n*** Single Hash() call (%d bits)\n",8*n+j); + if (Skein_Hash(blkSize,hashLen,b,8*n+j,hashVal[0]) != SKEIN_SUCCESS) + FatalError("Skein_Hash != SUCCESS"); + for (k=hashLen/8;k<=MAX_BUF;k++) + if (hashVal[0][k] != 0) + FatalError("Skein hash output overrun!: hashLen = %d bits",hashLen); + if (useAHS) /* compare using AHS API, if supported */ + { + Show_Debug("\n*** Single AHS API Hash() call\n"); + if (Hash(hashLen,b,8*n+j,hashVal[1]) != SUCCESS) + FatalError("Skein_Hash != SUCCESS"); + for (k=hashLen/8;k<=MAX_BUF;k++) + if (hashVal[1][k] != 0) + FatalError("Skein AHS_API hash output overrun!: hashLen = %d bits",hashLen); + if (memcmp(hashVal[1],hashVal[0],hashLen/8)) + FatalError("Skein vs. AHS API miscompare"); + } + /* now try (randomized) steps thru entire input block */ + for (i=0;i<4;i++) + { + Show_Debug("\n*** Multiple Update() calls [%s]",(i)?"random steps":"step==1"); + if (i >= 2) + { + Show_Debug(" [re-use precomputed state]"); + s[0] = s[1]; + } + else + { + k = (i) ? Skein_Init (blkSize,&s[0],hashLen) : + Skein_InitExt(blkSize,&s[0],hashLen,SKEIN_CFG_TREE_INFO_SEQUENTIAL,NULL,0); + if (k != SKEIN_SUCCESS) + FatalError("Skein_Init != SUCCESS"); + s[1] = s[0]; /* make a copy for next time */ + } + Show_Debug("\n"); + for (k=0;k0, randomly */ + step = (i == 0) ? 1 : 1 + (Rand32() % (n+1-k)); /* # bytes to process */ + bitLen = (k+step >= n+1) ? 8*(step-1) + j: 8*step; /* partial final byte handling */ + if (Skein_Update(&s[0],&b[k],bitLen) != SKEIN_SUCCESS) + FatalError("Skein_Update != SUCCESS"); + } + if (Skein_Final(&s[0],hashVal[1]) != SKEIN_SUCCESS) + FatalError("Skein_Final != SUCCESS"); + for (k=hashLen/8;k<=MAX_BUF;k++) + if (hashVal[0][k] != 0) + FatalError("Skein hash output overrun!: hashLen = %d bits",hashLen); + if (memcmp(hashVal[1],hashVal[0],hashLen/8)) + FatalError("Skein Hash() vs. Update() miscompare!"); + } + } + } + return testCnt; + } + +/* filter out pairs in short KAT mode */ +uint_t Short_KAT_OK(uint_t blkSize,uint_t hashBits) + { + switch (blkSize) + { + case 256: + if (hashBits != 256 && hashBits != 224) + return 0; + break; + case 512: + if (hashBits != 256 && hashBits != 384 && hashBits != 512) + return 0; + break; + case 1024: + if (hashBits != 384 && hashBits != 512 && hashBits != 1024) + return 0; + break; + default: + return 0; + } + return 1; + } + +#if SKEIN_TREE_HASH +#define MAX_TREE_MSG_LEN (1 << 12) +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* pad final block, no OUTPUT stage */ +int Skein_Final_Pad(hashState *state, BitSequence *hashval) + { + switch (state->statebits) + { + case 512: return Skein_512_Final_Pad(&state->u.ctx_512,hashval); + case 256: return Skein_256_Final_Pad(&state->u.ctx_256,hashval); + case 1024: return Skein1024_Final_Pad(&state->u.ctx1024,hashval); + default: return SKEIN_FAIL; + } + } +/* just the OUTPUT stage */ +int Skein_Output(hashState *state, BitSequence *hashval) + { + switch (state->statebits) + { + case 512: return Skein_512_Output(&state->u.ctx_512,hashval); + case 256: return Skein_256_Output(&state->u.ctx_256,hashval); + case 1024: return Skein1024_Output(&state->u.ctx1024,hashval); + default: return SKEIN_FAIL; + } + } + +/* generate a KAT test for the given data and tree parameters. */ +/* This is an "all-in-one" call. It is not intended to represent */ +/* how a real multi-processor version would be implemented, but */ +/* the results will be the same */ +void Skein_TreeHash + (uint_t blkSize,uint_t hashBits,const u08b_t *msg,size_t msgBytes, + uint_t leaf ,uint_t node ,uint_t maxLevel ,u08b_t *hashRes) + { + enum { MAX_HEIGHT = 32 }; /* how deep we can go here */ + uint_t height; + uint_t blkBytes = blkSize/8; + uint_t saveDebug = skein_DebugFlag; + size_t n,nodeLen,srcOffs,dstOffs,bCnt; + u64b_t treeInfo; + u08b_t M[MAX_TREE_MSG_LEN+4]; + hashState G,s; + + assert(node < 256 && leaf < 256 && maxLevel < 256); + assert(node > 0 && leaf > 0 && maxLevel > 1 ); + assert(blkSize == 256 || blkSize == 512 || blkSize == 1024); + assert(blkBytes <= sizeof(M)); + assert(msgBytes <= sizeof(M)); + + /* precompute the config block result G for multiple uses below */ +#ifdef SKEIN_DEBUG + if (skein_DebugFlag) + skein_DebugFlag |= SKEIN_DEBUG_CONFIG; +#endif + treeInfo = SKEIN_CFG_TREE_INFO(leaf,node,maxLevel); + if (Skein_InitExt(blkSize,&G,hashBits,treeInfo,NULL,0) != SKEIN_SUCCESS) + FatalError("Skein_InitExt() fails in tree"); + skein_DebugFlag = saveDebug; + + bCnt = msgBytes; + memcpy(M,msg,bCnt); + for (height=0;;height++) /* walk up the tree */ + { + if (height && (bCnt==blkBytes)) /* are we done (with only one block left)? */ + break; + if (height+1 == maxLevel) /* is this the final allowed level? */ + { /* if so, do it as one big hash */ + s = G; + Skein_Set_Tree_Level(s.u.h,height+1); + Skein_Update (&s,M,bCnt*8); + Skein_Final_Pad(&s,M); + break; + } + nodeLen = blkBytes << ((height) ? node : leaf); + for (srcOffs=dstOffs=0;srcOffs <= bCnt;) + { + n = bCnt - srcOffs; /* number of bytes left at this level */ + if (n > nodeLen) /* limit to node size */ + n = nodeLen; + s = G; + s.u.h.T[0] = srcOffs; /* nonzero initial offset in tweak! */ + Skein_Set_Tree_Level(s.u.h,height+1); + Skein_Update (&s,M+srcOffs,n*8); + Skein_Final_Pad(&s,M+dstOffs); /* finish up this node, output intermediate result to M[]*/ + dstOffs+=blkBytes; + srcOffs+=n; + if (srcOffs >= bCnt) /* special logic to handle (msgBytes == 0) case */ + break; + } + bCnt = dstOffs; + } + + /* output the result */ + Skein_Output(&s,hashRes); + } + +/* +** Generate tree-mode hash KAT vectors. +** Note: +** Tree vectors are different enough from non-tree vectors that it +** makes sense to separate this out into a different function, rather +** than shoehorn it into the same KAT logic as the other modes. +**/ +void Skein_GenKAT_Tree(uint_t blkSize) + { + static const struct + { + uint_t leaf,node,maxLevel,levels; + } + TREE_PARMS[] = { {2,2,2,2}, {1,2,3,2}, {2,1,0xFF,3} }; +#define TREE_PARM_CNT (sizeof(TREE_PARMS)/sizeof(TREE_PARMS[0])) + + u08b_t msg[MAX_TREE_MSG_LEN+4],hashVal[MAX_TREE_MSG_LEN+4]; + uint_t i,j,k,n,p,q,hashBits,node,leaf,leafBytes,msgBytes,byteCnt,levels,maxLevel; + + assert(blkSize == 256 || blkSize == 512 || blkSize == 1024); + for (i=0;i> 16)); + msg[i+1] = (u08b_t) ((i ^ blkSize) >> 8); + } + for (k=q=n=0;k < HASH_BITS_CNT;k++) + { + hashBits = HASH_BITS[k]; + if (!Short_KAT_OK(blkSize,hashBits)) + continue; + if ((verbose & V_KAT_SHORT) && (hashBits != blkSize)) + continue; + for (p=0;p 0); + if (byteCnt > MAX_TREE_MSG_LEN) + continue; + q = (q+1) % leafBytes; + msgBytes = byteCnt - q; + switch (blkSize) + { + case 256: printf("\n:Skein-256: "); break; + case 512: printf("\n:Skein-512: "); break; + case 1024: printf("\n:Skein-1024:"); break; + } + printf(" %4d-bit hash, msgLen =%6d bits",hashBits,msgBytes*8); + printf(". Tree: leaf=%02X, node=%02X, maxLevels=%02X\n",leaf,node,maxLevel); + printf("\nMessage data:\n"); + if (msgBytes == 0) + printf(" (none)\n"); + else + ShowBytes(msgBytes,msg); + + Skein_TreeHash(blkSize,hashBits,msg,msgBytes,leaf,node,maxLevel,hashVal); + + printf("Result:\n"); + ShowBytes((hashBits+7)/8,hashVal); + printf("--------------------------------\n"); + } + } + } + } +#endif + +/* +** Output some KAT values. This output is generally re-directed to a file and +** can be compared across platforms to help validate an implementation on a +** new platform (or compare reference vs. optimized code, for example). The +** file will be provided as part of the Skein submission package to NIST. +** +** When used in conjunction with the debug flag, this will output a VERY long +** result. The verbose flag is used to output even more combinations of +** +** +** Note: this function does NOT output the NIST AHS KAT format. +*/ +void Skein_ShowKAT(uint_t blkSizeMask) + { + enum + { + DATA_TYPE_ZERO = 0, + DATA_TYPE_INC, + DATA_TYPE_RAND, + DATA_TYPE_MAC, + DATA_TYPE_TREE, + DATA_TYPE_CNT, + + MAX_BYTES = 3*1024/8 + }; + static const char *TYPE_NAMES[] = { "zero","incrementing","random","random+MAC","tree",NULL }; + static const uint_t MSG_BITS[] = + { 0,1,2,3,4,5,6,7,8,9,10,32,64,128,192, + 256-1, 256, 256+1, 384, + 512-1, 512, 512+1, 768, + 1024-1,1024,1024+1, + 2048-1,2048,2048+1 + }; +#define MSG_BITS_CNT (sizeof(MSG_BITS)/sizeof(MSG_BITS[0])) + + uint_t i,j,k,blkSize,dataType,hashBits,msgBits,keyBytes,blkBytes,keyType; + u08b_t data[MAX_BYTES+4],key[MAX_BYTES+4],hashVal[MAX_BYTES+4]; + const char *msgType; + hashState s; + + Rand_Init(SKEIN_MK_64(0xDEADBEEF,0)); /* init PRNG with repeatable value */ + katHash = ~0u; + keyType = 0; + +#ifdef SKEIN_DEBUG + /* first, show some "raw" Threefish + feedforward block calls, with round-by-round debug info if enabled */ + if (skein_DebugFlag && !(verbose & V_KAT_NO_3FISH)) + { + k = skein_DebugFlag; /* save debug flag value */ + skein_DebugFlag = THREEFISH_DEBUG_ALL & ~ SKEIN_DEBUG_HDR; /* turn on full debug detail, use Threefish name */ + skein_DebugFlag |= (k & SKEIN_DEBUG_PERMUTE); +#else + if (verbose & V_KAT_DO_3FISH) /* allow non-SKEIN_DEBUG testing */ + { +#endif + for (blkSize = 256;blkSize <= 1024; blkSize*=2) + { + if (blkSizeMask && (blkSize & blkSizeMask) == 0) + continue; + for (dataType=DATA_TYPE_ZERO; dataType <= DATA_TYPE_INC; dataType++) + { + switch (dataType) + { + case DATA_TYPE_ZERO: + memset(data,0,sizeof(data)); + memset(key ,0,sizeof(key)); + break; + case DATA_TYPE_INC: + for (i=0;i '%s'",dataType,msgType); + break; + } + + for (blkSize = 256;blkSize <= 1024; blkSize*=2) + { + if (blkSizeMask && (blkSize & blkSizeMask) == 0) + continue; + if (dataType == DATA_TYPE_TREE) + { +#if SKEIN_TREE_HASH + Skein_GenKAT_Tree(blkSize); +#endif + continue; + } + if (verbose & V_KAT_NO_SEQ) + continue; + blkBytes = blkSize/8; + for (j=0;j < MSG_BITS_CNT;j++) + for (k=0;k < HASH_BITS_CNT;k++) + { + msgBits = MSG_BITS[j]; /* message length */ + hashBits = HASH_BITS[k]; /* hash result size */ + assert(MAX_BYTES*8 >= hashBits && MAX_BYTES*8 >= msgBits); + if (msgBits != 1024 && hashBits != blkSize && !(verbose & V_KAT_LONG)) + continue; /* keep the output size reasonable, unless verbose */ + if (verbose & V_KAT_SHORT) + { /* -v2 ==> generate "short" KAT set by filtering out most vectors */ + if (dataType != DATA_TYPE_INC) + continue; + if (msgBits != 8 && msgBits != blkSize && msgBits != 2*blkSize) + continue; + if (!Short_KAT_OK(blkSize,hashBits)) + continue; + } + switch (blkSize) + { + case 256: printf("\n:Skein-256: "); break; + case 512: printf("\n:Skein-512: "); break; + case 1024: printf("\n:Skein-1024:"); break; + } + printf(" %4d-bit hash, msgLen =%6d bits",hashBits,msgBits); + if (!(verbose & V_KAT_SHORT)) + printf(", data = '%s'",msgType); + printf("\n\nMessage data:\n"); + if (msgBits == 0) + printf(" (none)\n"); + else + ShowBytes((msgBits+7)/8,data); + switch (dataType) + { + default: /* straight hash value */ + if (Skein_Hash(blkSize,hashBits,data,msgBits,hashVal) != SKEIN_SUCCESS) + FatalError("Skein_Hash() error!"); + break; + case DATA_TYPE_MAC: /* include some MAC computations in KAT file */ + switch (keyType++) /* sequence thru different MAC key lengths */ + { + case 0: keyBytes = blkBytes/2; break; + case 1: keyBytes = blkBytes; break; + case 2: keyBytes = blkBytes +1; break; + case 3: keyBytes = blkBytes*2+1; break; + default:keyBytes = 0; /* not actually a MAC this time, but use InitExt() */ + keyType = 0; /* start the cycle again next time */ + } + printf("MAC key = %4d bytes:\n",keyBytes); + if (keyBytes) /* show MAC key, if any */ + ShowBytes(keyBytes,key); + else + printf(" (none) /* use InitExt() call */\n"); + + if (Skein_InitExt(blkSize,&s,hashBits,SKEIN_CFG_TREE_INFO_SEQUENTIAL,key,keyBytes) != SKEIN_SUCCESS) + FatalError("Skein_InitExt() error!"); + if (Skein_Update(&s,data,msgBits) != SKEIN_SUCCESS) + FatalError("Skein_Update() error!"); + if (Skein_Final(&s,hashVal) != SKEIN_SUCCESS) + FatalError("Skein_Final() error!"); + break; + case DATA_TYPE_TREE: + assert(0); + break; + } + printf("Result:\n"); + ShowBytes((hashBits+7)/8,hashVal); + printf("--------------------------------\n"); + } + } + } + if (!_quiet_) + fprintf(stderr,"katHash = %08X\n",katHash ^ 0x150183D2); + } + +/* generate pre-computed IVs for inclusion in Skein C code */ +void Skein_GenerateIV(void) + { + static const struct + { uint_t blkSize,hashBits; } + IV_TAB[] = /* which pairs to precompute */ + { { 256, 128 }, { 256, 160 }, { 256, 224 }, { 256, 256 }, + { 512, 128 }, { 512, 160 }, { 512, 224 }, { 512, 256 }, + { 512, 384 }, { 512, 512 }, + {1024, 384 }, {1024, 512 }, {1024,1024 } + }; + uint_t i,j,blkSize,hashBits; + hashState state; + const u64b_t *w; + const char *s; + + printf("#ifndef _SKEIN_IV_H_\n" + "#define _SKEIN_IV_H_\n\n" + "#include \"skein.h\" /* get Skein macros and types */\n\n" + "/*\n" + "***************** Pre-computed Skein IVs *******************\n" + "**\n" + "** NOTE: these values are not \"magic\" constants, but\n" + "** are generated using the Threefish block function.\n" + "** They are pre-computed here only for speed; i.e., to\n" + "** avoid the need for a Threefish call during Init().\n" + "**\n" + "** The IV for any fixed hash length may be pre-computed.\n" + "** Only the most common values are included here.\n" + "**\n" + "************************************************************\n" + "**/\n\n" + "#define MK_64 SKEIN_MK_64\n\n" + ); + for (i=0;i < sizeof(IV_TAB)/sizeof(IV_TAB[0]); i++) + { + blkSize = IV_TAB[i].blkSize; + hashBits = IV_TAB[i].hashBits; + switch (blkSize) + { + case 256: w = state.u.ctx_256.X; s = "_256"; break; + case 512: w = state.u.ctx_512.X; s = "_512"; break; + case 1024: w = state.u.ctx1024.X; s = "1024"; break; + default: FatalError("Invalid blkSize"); + continue; /* should never happen, but avoids gcc warning */ + } + if (Skein_Init(blkSize,&state,hashBits) != SKEIN_SUCCESS) + FatalError("Error generating IV: blkSize=%d, hashBits=%d",blkSize,hashBits); + printf("/* blkSize = %4d bits. hashSize = %4d bits */\n",blkSize,hashBits); + printf("const u64b_t SKEIN%s_IV_%d[] =\n {\n",s,hashBits); + for (j=0;j> 32),(uint_32t)w[j],(j+1 == blkSize/64)?"":","); + printf(" };\n\n"); + } + printf("#endif /* _SKEIN_IV_H_ */\n"); + } + +/* qsort routine */ +int compare_uint_32t(const void *aPtr,const void *bPtr) + { + uint_32t a = * ((uint_32t *) aPtr); + uint_32t b = * ((uint_32t *) bPtr); + + if (a > b) return 1; + if (a < b) return -1; + return 0; + } + +void ShowCompiler(const char *CVER) + { + printf(" //:"); +#if defined(SKEIN_XMM) + printf(" 32-XMM, "); +#else + printf(" %2u-bit, ",(uint_t)(8*sizeof(size_t))); +#endif + printf("%s%s",COMPILER_ID,CVER); + + /* do we need to show unroll amount? */ +#if defined(SKEIN_USE_ASM) && SKEIN_USE_ASM + printf(" [asm="); +#define _SC_DO_LOOP_ (1) +#elif defined(SKEIN_LOOP) + printf(" [ C ="); +#define _SC_DO_LOOP_ (1) +#endif + +#ifdef _SC_DO_LOOP_ + printf("%c",(Skein_256_Unroll_Cnt())?'0'+Skein_256_Unroll_Cnt():'.'); + printf("%c",(Skein_512_Unroll_Cnt())?'0'+Skein_512_Unroll_Cnt():'.'); + printf("%c",(Skein1024_Unroll_Cnt())?'0'+Skein1024_Unroll_Cnt():'.'); + printf("]"); +#endif + } + +/* measure the speed (in CPU clks/byte) for a Skein implementation */ +void Skein_MeasurePerformance(const char *target) + { + const uint_t MSG_BYTES[] = {1,2,4,8,10,16,32,64,100,128,256,512,1000,1024,2048,4096,8192,10000,16384,32768,100000,0}; + enum { TIMER_SAMPLE_CNT = 13, MAX_BUFFER=1024*100, PERF_TIMEOUT_CLKS = 500000 }; + enum { _256 = 256, _512 = 512 }; + uint_32t dt[24][3][TIMER_SAMPLE_CNT],t0,t1; + uint_32t dtMin = ~0u; + uint_t targetSize = 0; + uint_t repCnt = 1; + uint_t i,k,n,r,blkSize,msgBytes; + u08b_t b[MAX_BUFFER],hashVal[SKEIN1024_BLOCK_BYTES*4]; + hashState s; +#ifdef CompilerVersion + char CVER[20]; /* avoid ANSI compiler warnings for sprintf()! :-(( */ + n = CompilerVersion; + CVER[0] = '_'; + CVER[1] = 'v'; + CVER[2] = (char)('0'+((n /100)%10)); + CVER[3] = '.'; + CVER[4] = (char)('0'+((n / 10)%10)); + CVER[5] = (char)('0'+((n / 1)%10)); + CVER[6] = 0; +#else +#define CVER "" +#endif + if (target && target[0]) + { + targetSize = atoi(target); + for (i=0;target[i];i++) + if (target[i] == '.') + { + repCnt = atoi(target+i+1); + break; + } + if (repCnt == 0) + repCnt = 1; + } + + assert(sizeof(dt)/(3*TIMER_SAMPLE_CNT*sizeof(dt[0][0][0])) >= + sizeof(MSG_BYTES)/sizeof(MSG_BYTES[0])); + if (OS_Set_High_Priority()) + printf("Unable to set thread to high priority\n"); + fflush(stdout); /* let things calm down */ + OS_Sleep(200); /* let things settle down for a bit */ + memset(dt,0,sizeof(dt)); + RandBytes(b,sizeof(b)); /* use random data for testing */ + for (i=0;i<4*TIMER_SAMPLE_CNT;i++) /* calibrate the overhead for measuring time */ + { + t0 = HiResTime(); + t1 = HiResTime(); + if (dtMin > t1-t0) /* keep only the minimum time */ + dtMin = t1-t0; + } + for (r=0;r MAX_BUFFER || msgBytes == 0) + break; + if (targetSize && targetSize != msgBytes) + continue; + for (k=0;k<3;k++) + { /* cycle thru the different block sizes */ + blkSize=256 << k; + t0=HiResTime(); + t1=HiResTime(); +#define OneTest(BITS) \ + Skein##BITS##_Init (&s.u.ctx##BITS,BITS); \ + Skein##BITS##_Update(&s.u.ctx##BITS,b,msgBytes);\ + Skein##BITS##_Final (&s.u.ctx##BITS,hashVal); + + OS_Sleep(0); /* yield the time slice to OS */ + for (i=0;i MAX_BUFFER || msgBytes == 0) + break; + if (targetSize && targetSize != msgBytes) + continue; + printf("%7d_ ||",msgBytes); + for (k=0;k<3;k++) /* cycle thru the different Skein block sizes */ + { /* here with dt[n][k][] full of time differences */ + /* discard high/low, then show min/median of the rest, in clks/byte */ + qsort(dt[n][k],TIMER_SAMPLE_CNT,sizeof(dt[0][0][0]),compare_uint_32t); + printf(" %8.2f %8.2f |",dt[n][k][1]/(double)msgBytes,dt[n][k][TIMER_SAMPLE_CNT/2]/(double)msgBytes); + } + ShowCompiler(CVER); + printf("\n"); + if (targetSize == 0 && target && target[0] && repCnt == 1) + { /* show the details */ + for (k=0;k<3;k++) + { + printf("%4d: ",256 << k); + for (i=0;i all) */ + uint_t seed0 = (uint_t) time(NULL); /* randomize based on time */ + uint_t oneBlk = 0; /* test block size */ + + for (i=1;i /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ +#include "SHA3api_ref.h"/* get the AHS API definitions */ + +/******************************************************************/ +/* AHS API code */ +/******************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init the context */ +HashReturn Init(hashState *state, int hashbitlen) + { +#if SKEIN_256_NIST_MAX_HASH_BITS + if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS) + { + Skein_Assert(hashbitlen > 0,BAD_HASHLEN); + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen); + } +#endif + if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS) + { + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); + } + else + { + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen); + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process data to be hashed */ +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) + { + /* only the final Update() call is allowed do partial bytes, else assert an error */ + Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, FAIL); + + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + if ((databitlen & 7) == 0) /* partial bytes? */ + { + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); + case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3); + case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3); + default: return FAIL; + } + } + else + { /* handle partial final byte */ + size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ + u08b_t b,mask; + + mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ + b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ + + switch ((state->statebits >> 8) & 3) + { + case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ + Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ + break; + case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */ + Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */ + break; + case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */ + Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */ + break; + default: return FAIL; + } + Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ + + return SUCCESS; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize hash computation and output the result (hashbitlen bits) */ +HashReturn Final(hashState *state, BitSequence *hashval) + { + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Final(&state->u.ctx_512,hashval); + case 1: return Skein_256_Final(&state->u.ctx_256,hashval); + case 0: return Skein1024_Final(&state->u.ctx1024,hashval); + default: return FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* all-in-one hash function */ +HashReturn Hash(int hashbitlen, const BitSequence *data, /* all-in-one call */ + DataLength databitlen,BitSequence *hashval) + { + hashState state; + HashReturn r = Init(&state,hashbitlen); + if (r == SUCCESS) + { /* these calls do not fail when called properly */ + r = Update(&state,data,databitlen); + Final(&state,hashval); + } + return r; + } diff --git a/Optimized_32bit/SHA3api_ref.h b/Optimized_32bit/SHA3api_ref.h new file mode 100644 index 000000000000..6d62304e59b7 --- /dev/null +++ b/Optimized_32bit/SHA3api_ref.h @@ -0,0 +1,66 @@ +#ifndef _AHS_API_H_ +#define _AHS_API_H_ + +/*********************************************************************** +** +** Interface declarations of the AHS API using the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include "skein.h" + +typedef enum + { + SUCCESS = SKEIN_SUCCESS, + FAIL = SKEIN_FAIL, + BAD_HASHLEN = SKEIN_BAD_HASHLEN + } + HashReturn; + +typedef size_t DataLength; /* bit count type */ +typedef u08b_t BitSequence; /* bit stream type */ + +typedef struct + { + uint_t statebits; /* 256, 512, or 1024 */ + union + { + Skein_Ctxt_Hdr_t h; /* common header "overlay" */ + Skein_256_Ctxt_t ctx_256; + Skein_512_Ctxt_t ctx_512; + Skein1024_Ctxt_t ctx1024; + } u; + } + hashState; + +/* "incremental" hashing API */ +HashReturn Init (hashState *state, int hashbitlen); +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); +HashReturn Final (hashState *state, BitSequence *hashval); + +/* "all-in-one" call */ +HashReturn Hash (int hashbitlen, const BitSequence *data, + DataLength databitlen, BitSequence *hashval); + + +/* +** Re-define the compile-time constants below to change the selection +** of the Skein state size in the Init() function in SHA3api_ref.c. +** +** That is, the NIST API does not allow for explicit selection of the +** Skein block size, so it must be done implicitly in the Init() function. +** The selection is controlled by these constants. +*/ +#ifndef SKEIN_256_NIST_MAX_HASHBITS +#define SKEIN_256_NIST_MAX_HASHBITS (0) +#endif + +#ifndef SKEIN_512_NIST_MAX_HASHBITS +#define SKEIN_512_NIST_MAX_HASHBITS (512) +#endif + +#endif /* ifdef _AHS_API_H_ */ diff --git a/Optimized_32bit/brg_endian.h b/Optimized_32bit/brg_endian.h new file mode 100644 index 000000000000..978eb33f08cf --- /dev/null +++ b/Optimized_32bit/brg_endian.h @@ -0,0 +1,148 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 20/10/2006 +*/ + +#ifndef BRG_ENDIAN_H +#define BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined(AVR) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) || defined( AVR ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order +#endif +#endif + +/* special handler for IA64, which may be either endianness (?) */ +/* here we assume little-endian, but this may need to be changed */ +#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) +# define PLATFORM_MUST_ALIGN (1) +#ifndef PLATFORM_BYTE_ORDER +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif +#endif + +#ifndef PLATFORM_MUST_ALIGN +# define PLATFORM_MUST_ALIGN (0) +#endif + +#endif /* ifndef BRG_ENDIAN_H */ diff --git a/Optimized_32bit/brg_types.h b/Optimized_32bit/brg_types.h new file mode 100644 index 000000000000..d6d6cdab9fbf --- /dev/null +++ b/Optimized_32bit/brg_types.h @@ -0,0 +1,188 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + The unsigned integer types defined here are of the form uint_t where + is the length of the type; for example, the unsigned 32-bit type is + 'uint_32t'. These are NOT the same as the 'C99 integer types' that are + defined in the inttypes.h and stdint.h headers since attempts to use these + types have shown that support for them is still highly variable. However, + since the latter are of the form uint_t, a regular expression search + and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t') + can be used to convert the types used here to the C99 standard types. +*/ + +#ifndef BRG_TYPES_H +#define BRG_TYPES_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include + +#ifndef BRG_UI8 +# define BRG_UI8 +# if UCHAR_MAX == 255u + typedef unsigned char uint_8t; +# else +# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI16 +# define BRG_UI16 +# if USHRT_MAX == 65535u + typedef unsigned short uint_16t; +# else +# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h +# endif +#endif + +#ifndef BRG_UI32 +# define BRG_UI32 +# if UINT_MAX == 4294967295u +# define li_32(h) 0x##h##u + typedef unsigned int uint_32t; +# elif ULONG_MAX == 4294967295u +# define li_32(h) 0x##h##ul + typedef unsigned long uint_32t; +# elif defined( _CRAY ) +# error This code needs 32-bit data types, which Cray machines do not provide +# else +# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI64 +# if defined( __BORLANDC__ ) && !defined( __MSDOS__ ) +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */ +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( __sun ) && defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u +# if UINT_MAX == 18446744073709551615u +# define BRG_UI64 +# define li_64(h) 0x##h##u + typedef unsigned int uint_64t; +# endif +# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u +# if ULONG_MAX == 18446744073709551615ul +# define BRG_UI64 +# define li_64(h) 0x##h##ul + typedef unsigned long uint_64t; +# endif +# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u +# if ULLONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u +# if ULONG_LONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined(__GNUC__) /* DLW: avoid mingw problem with -ansi */ +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +#endif + +#if defined( NEED_UINT_64T ) && !defined( BRG_UI64 ) +# error Please define uint_64t as an unsigned 64 bit type in brg_types.h +#endif + +#ifndef RETURN_VALUES +# define RETURN_VALUES +# if defined( DLL_EXPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllexport ) void __stdcall +# define INT_RETURN __declspec( dllexport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllexport__ ) void +# define INT_RETURN __declspec( __dllexport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( DLL_IMPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllimport ) void __stdcall +# define INT_RETURN __declspec( dllimport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllimport__ ) void +# define INT_RETURN __declspec( __dllimport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( __WATCOMC__ ) +# define VOID_RETURN void __cdecl +# define INT_RETURN int __cdecl +# else +# define VOID_RETURN void +# define INT_RETURN int +# endif +#endif + +/* These defines are used to declare buffers in a way that allows + faster operations on longer variables to be used. In all these + defines 'size' must be a power of 2 and >= 8 + + dec_unit_type(size,x) declares a variable 'x' of length + 'size' bits + + dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' + bytes defined as an array of variables + each of 'size' bits (bsize must be a + multiple of size / 8) + + ptr_cast(x,size) casts a pointer to a pointer to a + varaiable of length 'size' bits +*/ + +#define ui_type(size) uint_##size##t +#define dec_unit_type(size,x) typedef ui_type(size) x +#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] +#define ptr_cast(x,size) ((ui_type(size)*)(x)) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Optimized_32bit/skein.c b/Optimized_32bit/skein.c new file mode 100644 index 000000000000..c9289cd49e8e --- /dev/null +++ b/Optimized_32bit/skein.c @@ -0,0 +1,753 @@ +/*********************************************************************** +** +** Implementation of the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ + +#include /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ +#include "skein_iv.h" /* get precomputed IVs */ + +/*****************************************************************/ +/* External function to process blkCnt (nonzero) full block(s) of data. */ +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); + +/*****************************************************************/ +/* 256-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break; + case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break; + case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_256_Update(ctx,key,keyBytes); /* hash the key */ + Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(256,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); + Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_256_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */ + Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; + msg += n * SKEIN_256_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_API_CodeSize(void) + { + return ((u08b_t *) Skein_256_API_CodeSize) - + ((u08b_t *) Skein_256_Init); + } +#endif + +/*****************************************************************/ +/* 512-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break; + case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_512_Update(ctx,key,keyBytes); /* hash the key */ + Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(512,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); + Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ + Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_API_CodeSize(void) + { + return ((u08b_t *) Skein_512_API_CodeSize) - + ((u08b_t *) Skein_512_Init); + } +#endif + +/*****************************************************************/ +/* 1024-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break; + case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein1024_Update(ctx,key,keyBytes); /* hash the key */ + Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(1024,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); + Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN1024_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */ + Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES); + msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; + msg += n * SKEIN1024_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_API_CodeSize(void) + { + return ((u08b_t *) Skein1024_API_CodeSize) - + ((u08b_t *) Skein1024_Init); + } +#endif + +/**************** Functions to support MAC/tree hashing ***************/ +/* (this code is identical for Optimized and Reference versions) */ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +#if SKEIN_TREE_HASH +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } +#endif diff --git a/Optimized_32bit/skein.h b/Optimized_32bit/skein.h new file mode 100644 index 000000000000..721c9bc9ce0d --- /dev/null +++ b/Optimized_32bit/skein.h @@ -0,0 +1,327 @@ +#ifndef _SKEIN_H_ +#define _SKEIN_H_ 1 +/************************************************************************** +** +** Interface declarations and internal definitions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +*************************************************************************** +** +** The following compile-time switches may be defined to control some +** tradeoffs between speed, code size, error checking, and security. +** +** The "default" note explains what happens when the switch is not defined. +** +** SKEIN_DEBUG -- make callouts from inside Skein code +** to examine/display intermediate values. +** [default: no callouts (no overhead)] +** +** SKEIN_ERR_CHECK -- how error checking is handled inside Skein +** code. If not defined, most error checking +** is disabled (for performance). Otherwise, +** the switch value is interpreted as: +** 0: use assert() to flag errors +** 1: return SKEIN_FAIL to flag errors +** +***************************************************************************/ +#ifdef __cplusplus +extern "C" +{ +#endif + +#include /* get size_t definition */ +#include "skein_port.h" /* get platform-specific definitions */ + +enum + { + SKEIN_SUCCESS = 0, /* return codes from Skein calls */ + SKEIN_FAIL = 1, + SKEIN_BAD_HASHLEN = 2 + }; + +#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS ( 4) +#define SKEIN_512_STATE_WORDS ( 8) +#define SKEIN1024_STATE_WORDS (16) +#define SKEIN_MAX_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS) + +typedef struct + { + size_t hashBitLen; /* size of hash result, in bits */ + size_t bCnt; /* current byte count in buffer b[] */ + u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ + } Skein_Ctxt_Hdr_t; + +typedef struct /* 256-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_256_Ctxt_t; + +typedef struct /* 512-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_512_Ctxt_t; + +typedef struct /* 1024-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein1024_Ctxt_t; + +/* Skein APIs for (incremental) "straight hashing" */ +int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen); +int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); +int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen); + +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); + +int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +/* +** Skein APIs for "extended" initialization: MAC keys, tree hashing. +** After an InitExt() call, just use Update/Final calls as with Init(). +** +** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. +** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, +** the results of InitExt() are identical to calling Init(). +** The function Init() may be called once to "precompute" the IV for +** a given hashBitLen value, then by saving a copy of the context +** the IV computation may be avoided in later calls. +** Similarly, the function InitExt() may be called once per MAC key +** to precompute the MAC IV, then a copy of the context saved and +** reused for each new MAC computation. +**/ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); + +/* +** Skein APIs for MAC and tree hash: +** Final_Pad: pad, do final block, but no OUTPUT type +** Output: do just the output stage +*/ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +#ifndef SKEIN_TREE_HASH +#define SKEIN_TREE_HASH (1) +#endif +#if SKEIN_TREE_HASH +int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); +#endif + +/***************************************************************** +** "Internal" Skein definitions +** -- not needed for sequential hashing API, but will be +** helpful for other uses of Skein (e.g., tree hash mode). +** -- included here so that they can be shared between +** reference and optimized code. +******************************************************************/ + +/* tweak word T[1]: bit field starting positions */ +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ + +#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ + +/* tweak word T[1]: flag bit definition(s) */ +#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) +#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) + +/* tweak word T[1]: tree level bit field mask */ +#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) +#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) + +/* tweak word T[1]: block type field */ +#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ +#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ +#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ +#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ +#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ +#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ +#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ + +#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) +#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ +#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ +#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ +#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ +#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ +#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ +#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ + +#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) +#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define SKEIN_VERSION (1) + +#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ +#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ +#endif + +#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) +#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) + +#define SKEIN_CFG_STR_LEN (4*8) + +/* bit field definitions in config block treeInfo word */ +#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) +#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) +#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) + +#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) +#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) +#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) + +#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ + ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ + (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ + (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) + +#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ + +/* +** Skein macros for getting/setting tweak words, etc. +** These are useful for partial input bytes, hash tree init/update, etc. +**/ +#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) +#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} + +#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) +#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) +#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) +#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) + +/* set both tweak words at once */ +#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ + { \ + Skein_Set_T0(ctxPtr,(T0)); \ + Skein_Set_T1(ctxPtr,(T1)); \ + } + +#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ + Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) + +/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ +#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ + { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } + +#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } +#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } + +#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} + +/***************************************************************** +** "Internal" Skein definitions for debugging and error checking +******************************************************************/ +#ifdef SKEIN_DEBUG /* examine/display intermediate values? */ +#include "skein_debug.h" +#else /* default is no callouts */ +#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) +#define Skein_Show_Round(bits,ctx,r,X) +#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) +#define Skein_Show_Final(bits,ctx,cnt,outPtr) +#define Skein_Show_Key(bits,ctx,key,keyBytes) +#endif + +#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ +#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ +#define Skein_assert(x) +#elif defined(SKEIN_ASSERT) +#include +#define Skein_Assert(x,retCode) assert(x) +#define Skein_assert(x) assert(x) +#else +#include +#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ +#define Skein_assert(x) assert(x) /* internal error */ +#endif + +/***************************************************************** +** Skein block function constants (shared across Ref and Opt code) +******************************************************************/ +enum + { + /* Skein_256 round rotation constants */ + R_256_0_0=14, R_256_0_1=16, + R_256_1_0=52, R_256_1_1=57, + R_256_2_0=23, R_256_2_1=40, + R_256_3_0= 5, R_256_3_1=37, + R_256_4_0=25, R_256_4_1=33, + R_256_5_0=46, R_256_5_1=12, + R_256_6_0=58, R_256_6_1=22, + R_256_7_0=32, R_256_7_1=32, + + /* Skein_512 round rotation constants */ + R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, + R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, + R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, + R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, + R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, + R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, + R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, + R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, + + /* Skein1024 round rotation constants */ + R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37, + R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52, + R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17, + R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25, + R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30, + R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41, + R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25, + R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20 + }; + +#ifndef SKEIN_ROUNDS +#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */ +#define SKEIN_512_ROUNDS_TOTAL (72) +#define SKEIN1024_ROUNDS_TOTAL (80) +#else /* allow command-line define in range 8*(5..14) */ +#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5)) +#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) +#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _SKEIN_H_ */ diff --git a/Optimized_32bit/skein_block.c b/Optimized_32bit/skein_block.c new file mode 100644 index 000000000000..bfd29d1eee2d --- /dev/null +++ b/Optimized_32bit/skein_block.c @@ -0,0 +1,689 @@ +/*********************************************************************** +** +** Implementation of the Skein block functions. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Compile-time switches: +** +** SKEIN_USE_ASM -- set bits (256/512/1024) to select which +** versions use ASM code for block processing +** [default: use C for all block sizes] +** +************************************************************************/ + +#include +#include "skein.h" + +#ifndef SKEIN_USE_ASM +#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ +#endif + +#ifndef SKEIN_LOOP +#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ +#endif + +#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ +#define KW_TWK_BASE (0) +#define KW_KEY_BASE (3) +#define ks (kw + KW_KEY_BASE) +#define ts (kw + KW_TWK_BASE) + +#ifdef SKEIN_DEBUG +#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } +#else +#define DebugSaveTweak(ctx) +#endif + +/***************************** Skein_256 ******************************/ +#if !(SKEIN_USE_ASM & 256) +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_256_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_256_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10) +#else +#define SKEIN_UNROLL_256 (0) +#endif + +#if SKEIN_UNROLL_256 +#if (RCNT % SKEIN_UNROLL_256) +#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; +#endif + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1] + ts[0]; + X2 = w[2] + ks[2] + ts[1]; + X3 = w[3] + ks[3]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */ + + blkPtr += SKEIN_256_BLOCK_BYTES; + + /* run the rounds */ + +#define Round256(p0,p1,p2,p3,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I256(R) \ + X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ + X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ + X3 += ks[((R)+4) % 5] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R256(p0,p1,p2,p3,ROT,rNum) \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I256(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ + X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ + X3 += ks[r+(R)+3] + r+(R) ; \ + ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\ + ts[r + (R)+2 ] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */ +#endif + { +#define R256_8_rounds(R) \ + R256(0,1,2,3,R_256_0,8*(R) + 1); \ + R256(0,3,2,1,R_256_1,8*(R) + 2); \ + R256(0,1,2,3,R_256_2,8*(R) + 3); \ + R256(0,3,2,1,R_256_3,8*(R) + 4); \ + I256(2*(R)); \ + R256(0,1,2,3,R_256_4,8*(R) + 5); \ + R256(0,3,2,1,R_256_5,8*(R) + 6); \ + R256(0,1,2,3,R_256_6,8*(R) + 7); \ + R256(0,3,2,1,R_256_7,8*(R) + 8); \ + I256(2*(R)+1); + + R256_8_rounds( 0); + +#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN))) + + #if R256_Unroll_R( 1) + R256_8_rounds( 1); + #endif + #if R256_Unroll_R( 2) + R256_8_rounds( 2); + #endif + #if R256_Unroll_R( 3) + R256_8_rounds( 3); + #endif + #if R256_Unroll_R( 4) + R256_8_rounds( 4); + #endif + #if R256_Unroll_R( 5) + R256_8_rounds( 5); + #endif + #if R256_Unroll_R( 6) + R256_8_rounds( 6); + #endif + #if R256_Unroll_R( 7) + R256_8_rounds( 7); + #endif + #if R256_Unroll_R( 8) + R256_8_rounds( 8); + #endif + #if R256_Unroll_R( 9) + R256_8_rounds( 9); + #endif + #if R256_Unroll_R(10) + R256_8_rounds(10); + #endif + #if R256_Unroll_R(11) + R256_8_rounds(11); + #endif + #if R256_Unroll_R(12) + R256_8_rounds(12); + #endif + #if R256_Unroll_R(13) + R256_8_rounds(13); + #endif + #if R256_Unroll_R(14) + R256_8_rounds(14); + #endif + #if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in Skein_256_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_256_Process_Block_CodeSize) - + ((u08b_t *) Skein_256_Process_Block); + } +uint_t Skein_256_Unroll_Cnt(void) + { + return SKEIN_UNROLL_256; + } +#endif +#endif + +/***************************** Skein_512 ******************************/ +#if !(SKEIN_USE_ASM & 512) +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_512_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; + Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; + + blkPtr += SKEIN_512_BLOCK_BYTES; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + /* run the rounds */ +#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I512(R) \ + X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 9]; \ + X2 += ks[((R)+3) % 9]; \ + X3 += ks[((R)+4) % 9]; \ + X4 += ks[((R)+5) % 9]; \ + X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ + X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ + X7 += ks[((R)+8) % 9] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I512(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1]; \ + X2 += ks[r+(R)+2]; \ + X3 += ks[r+(R)+3]; \ + X4 += ks[r+(R)+4]; \ + X5 += ks[r+(R)+5] + ts[r+(R)+0]; \ + X6 += ks[r+(R)+6] + ts[r+(R)+1]; \ + X7 += ks[r+(R)+7] + r+(R) ; \ + ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */ +#endif /* end of looped code definitions */ + { +#define R512_8_rounds(R) /* do 8 full rounds */ \ + R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ + R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ + R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ + R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ + I512(2*(R)); \ + R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ + R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ + R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ + R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ + I512(2*(R)+1); /* and key injection */ + + R512_8_rounds( 0); + +#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) + + #if R512_Unroll_R( 1) + R512_8_rounds( 1); + #endif + #if R512_Unroll_R( 2) + R512_8_rounds( 2); + #endif + #if R512_Unroll_R( 3) + R512_8_rounds( 3); + #endif + #if R512_Unroll_R( 4) + R512_8_rounds( 4); + #endif + #if R512_Unroll_R( 5) + R512_8_rounds( 5); + #endif + #if R512_Unroll_R( 6) + R512_8_rounds( 6); + #endif + #if R512_Unroll_R( 7) + R512_8_rounds( 7); + #endif + #if R512_Unroll_R( 8) + R512_8_rounds( 8); + #endif + #if R512_Unroll_R( 9) + R512_8_rounds( 9); + #endif + #if R512_Unroll_R(10) + R512_8_rounds(10); + #endif + #if R512_Unroll_R(11) + R512_8_rounds(11); + #endif + #if R512_Unroll_R(12) + R512_8_rounds(12); + #endif + #if R512_Unroll_R(13) + R512_8_rounds(13); + #endif + #if R512_Unroll_R(14) + R512_8_rounds(14); + #endif + #if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in Skein_512_Process_Block" + #endif + } + + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + ctx->X[4] = X4 ^ w[4]; + ctx->X[5] = X5 ^ w[5]; + ctx->X[6] = X6 ^ w[6]; + ctx->X[7] = X7 ^ w[7]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_512_Process_Block_CodeSize) - + ((u08b_t *) Skein_512_Process_Block); + } +uint_t Skein_512_Unroll_Cnt(void) + { + return SKEIN_UNROLL_512; + } +#endif +#endif + +/***************************** Skein1024 ******************************/ +#if !(SKEIN_USE_ASM & 1024) +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C, always looping (unrolled is bigger AND slower!) */ + enum + { + WCNT = SKEIN1024_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN1024_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + + u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */ + X08,X09,X10,X11,X12,X13,X14,X15; + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03; + Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07; + Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11; + Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[ 0] = ctx->X[ 0]; + ks[ 1] = ctx->X[ 1]; + ks[ 2] = ctx->X[ 2]; + ks[ 3] = ctx->X[ 3]; + ks[ 4] = ctx->X[ 4]; + ks[ 5] = ctx->X[ 5]; + ks[ 6] = ctx->X[ 6]; + ks[ 7] = ctx->X[ 7]; + ks[ 8] = ctx->X[ 8]; + ks[ 9] = ctx->X[ 9]; + ks[10] = ctx->X[10]; + ks[11] = ctx->X[11]; + ks[12] = ctx->X[12]; + ks[13] = ctx->X[13]; + ks[14] = ctx->X[14]; + ks[15] = ctx->X[15]; + ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^ + ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^ + ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^ + ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X00 = w[ 0] + ks[ 0]; /* do the first full key injection */ + X01 = w[ 1] + ks[ 1]; + X02 = w[ 2] + ks[ 2]; + X03 = w[ 3] + ks[ 3]; + X04 = w[ 4] + ks[ 4]; + X05 = w[ 5] + ks[ 5]; + X06 = w[ 6] + ks[ 6]; + X07 = w[ 7] + ks[ 7]; + X08 = w[ 8] + ks[ 8]; + X09 = w[ 9] + ks[ 9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + +#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \ + X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \ + X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \ + X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \ + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr); + +#define I1024(R) \ + X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \ + X01 += ks[((R)+ 2) % 17]; \ + X02 += ks[((R)+ 3) % 17]; \ + X03 += ks[((R)+ 4) % 17]; \ + X04 += ks[((R)+ 5) % 17]; \ + X05 += ks[((R)+ 6) % 17]; \ + X06 += ks[((R)+ 7) % 17]; \ + X07 += ks[((R)+ 8) % 17]; \ + X08 += ks[((R)+ 9) % 17]; \ + X09 += ks[((R)+10) % 17]; \ + X10 += ks[((R)+11) % 17]; \ + X11 += ks[((R)+12) % 17]; \ + X12 += ks[((R)+13) % 17]; \ + X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \ + X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \ + X15 += ks[((R)+16) % 17] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr); + +#define I1024(R) \ + X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \ + X01 += ks[r+(R)+ 1]; \ + X02 += ks[r+(R)+ 2]; \ + X03 += ks[r+(R)+ 3]; \ + X04 += ks[r+(R)+ 4]; \ + X05 += ks[r+(R)+ 5]; \ + X06 += ks[r+(R)+ 6]; \ + X07 += ks[r+(R)+ 7]; \ + X08 += ks[r+(R)+ 8]; \ + X09 += ks[r+(R)+ 9]; \ + X10 += ks[r+(R)+10]; \ + X11 += ks[r+(R)+11]; \ + X12 += ks[r+(R)+12]; \ + X13 += ks[r+(R)+13] + ts[r+(R)+0]; \ + X14 += ks[r+(R)+14] + ts[r+(R)+1]; \ + X15 += ks[r+(R)+15] + r+(R) ; \ + ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+ 2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */ +#endif + { +#define R1024_8_rounds(R) /* do 8 full rounds */ \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \ + I1024(2*(R)); \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \ + I1024(2*(R)+1); + + R1024_8_rounds( 0); + +#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN))) + + #if R1024_Unroll_R( 1) + R1024_8_rounds( 1); + #endif + #if R1024_Unroll_R( 2) + R1024_8_rounds( 2); + #endif + #if R1024_Unroll_R( 3) + R1024_8_rounds( 3); + #endif + #if R1024_Unroll_R( 4) + R1024_8_rounds( 4); + #endif + #if R1024_Unroll_R( 5) + R1024_8_rounds( 5); + #endif + #if R1024_Unroll_R( 6) + R1024_8_rounds( 6); + #endif + #if R1024_Unroll_R( 7) + R1024_8_rounds( 7); + #endif + #if R1024_Unroll_R( 8) + R1024_8_rounds( 8); + #endif + #if R1024_Unroll_R( 9) + R1024_8_rounds( 9); + #endif + #if R1024_Unroll_R(10) + R1024_8_rounds(10); + #endif + #if R1024_Unroll_R(11) + R1024_8_rounds(11); + #endif + #if R1024_Unroll_R(12) + R1024_8_rounds(12); + #endif + #if R1024_Unroll_R(13) + R1024_8_rounds(13); + #endif + #if R1024_Unroll_R(14) + R1024_8_rounds(14); + #endif + #if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + + ctx->X[ 0] = X00 ^ w[ 0]; + ctx->X[ 1] = X01 ^ w[ 1]; + ctx->X[ 2] = X02 ^ w[ 2]; + ctx->X[ 3] = X03 ^ w[ 3]; + ctx->X[ 4] = X04 ^ w[ 4]; + ctx->X[ 5] = X05 ^ w[ 5]; + ctx->X[ 6] = X06 ^ w[ 6]; + ctx->X[ 7] = X07 ^ w[ 7]; + ctx->X[ 8] = X08 ^ w[ 8]; + ctx->X[ 9] = X09 ^ w[ 9]; + ctx->X[10] = X10 ^ w[10]; + ctx->X[11] = X11 ^ w[11]; + ctx->X[12] = X12 ^ w[12]; + ctx->X[13] = X13 ^ w[13]; + ctx->X[14] = X14 ^ w[14]; + ctx->X[15] = X15 ^ w[15]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + blkPtr += SKEIN1024_BLOCK_BYTES; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein1024_Process_Block_CodeSize) - + ((u08b_t *) Skein1024_Process_Block); + } +uint_t Skein1024_Unroll_Cnt(void) + { + return SKEIN_UNROLL_1024; + } +#endif +#endif diff --git a/Optimized_32bit/skein_debug.c b/Optimized_32bit/skein_debug.c new file mode 100644 index 000000000000..fac5038598ea --- /dev/null +++ b/Optimized_32bit/skein_debug.c @@ -0,0 +1,247 @@ +/*********************************************************************** +** +** Debug output functions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ +#include + +#ifdef SKEIN_DEBUG /* only instantiate this code if SKEIN_DEBUG is on */ +#include "skein.h" + +static const char INDENT[] = " "; /* how much to indent on new line */ + +uint_t skein_DebugFlag = 0; /* off by default. Must be set externally */ + +static void Show64_step(size_t cnt,const u64b_t *X,size_t step) + { + size_t i,j; + for (i=j=0;i < cnt;i++,j+=step) + { + if (i % 4 == 0) printf(INDENT); + printf(" %08X.%08X ",(uint_32t)(X[j] >> 32),(uint_32t)X[j]); + if (i % 4 == 3 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +#define Show64(cnt,X) Show64_step(cnt,X,1) + +static void Show64_flag(size_t cnt,const u64b_t *X) + { + size_t xptr = (size_t) X; + size_t step = (xptr & 1) ? 2 : 1; + if (step != 1) + { + X = (const u64b_t *) (xptr & ~1); + } + Show64_step(cnt,X,step); + } + +static void Show08(size_t cnt,const u08b_t *b) + { + size_t i; + for (i=0;i < cnt;i++) + { + if (i %16 == 0) printf(INDENT); + else if (i % 4 == 0) printf(" "); + printf(" %02X",b[i]); + if (i %16 == 15 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +static const char *AlgoHeader(uint_t bits) + { + if (skein_DebugFlag & SKEIN_DEBUG_THREEFISH) + switch (bits) + { + case 256: return ":Threefish-256: "; + case 512: return ":Threefish-512: "; + case 1024: return ":Threefish-1024:"; + } + else + switch (bits) + { + case 256: return ":Skein-256: "; + case 512: return ":Skein-512: "; + case 1024: return ":Skein-1024:"; + } + return NULL; + } + +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr) + { + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_FINAL) + { + printf("\n%s Final output=\n",AlgoHeader(bits)); + Show08(cnt,outPtr); + printf(" ++++++++++\n"); + fflush(stdout); + } + } + +/* show state after a round (or "pseudo-round") */ +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X) + { + static uint_t injectNum=0; /* not multi-thread safe! */ + + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (r >= SKEIN_RND_SPECIAL) + { /* a key injection (or feedforward) point */ + injectNum = (r == SKEIN_RND_KEY_INITIAL) ? 0 : injectNum+1; + if ( skein_DebugFlag & SKEIN_DEBUG_INJECT || + ((skein_DebugFlag & SKEIN_DEBUG_FINAL) && r == SKEIN_RND_FEED_FWD)) + { + printf("\n%s",AlgoHeader(bits)); + switch (r) + { + case SKEIN_RND_KEY_INITIAL: + printf(" [state after initial key injection]"); + break; + case SKEIN_RND_KEY_INJECT: + printf(" [state after key injection #%02d]",injectNum); + break; + case SKEIN_RND_FEED_FWD: + printf(" [state after plaintext feedforward]"); + injectNum = 0; + break; + } + printf("=\n"); + Show64(bits/64,X); + if (r== SKEIN_RND_FEED_FWD) + printf(" ----------\n"); + } + } + else if (skein_DebugFlag & SKEIN_DEBUG_ROUNDS) + { + uint_t j; + u64b_t p[SKEIN_MAX_STATE_WORDS]; + const u08b_t *perm; + const static u08b_t PERM_256 [4][ 4] = { { 0,1,2,3 }, { 0,3,2,1 }, { 0,1,2,3 }, { 0,3,2,1 } }; + const static u08b_t PERM_512 [4][ 8] = { { 0,1,2,3,4,5,6,7 }, + { 2,1,4,7,6,5,0,3 }, + { 4,1,6,3,0,5,2,7 }, + { 6,1,0,7,2,5,4,3 } + }; + const static u08b_t PERM_1024[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + { 0, 9, 2,13, 6,11, 4,15,10, 7,12, 3,14, 5, 8, 1 }, + { 0, 7, 2, 5, 4, 3, 6, 1,12,15,14,13, 8,11,10, 9 }, + { 0,15, 2,11, 6,13, 4, 9,14, 1, 8, 5,10, 3,12, 7 } + }; + + if ((skein_DebugFlag & SKEIN_DEBUG_PERMUTE) && (r & 3)) + { + printf("\n%s [state after round %2d (permuted)]=\n",AlgoHeader(bits),(int)r); + switch (bits) + { + case 256: perm = PERM_256 [r&3]; break; + case 512: perm = PERM_512 [r&3]; break; + default: perm = PERM_1024[r&3]; break; + } + for (j=0;jT[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (skein_DebugFlag & SKEIN_DEBUG_HDR) + { + printf("\n%s Block: outBits=%4d. T0=%06X.",AlgoHeader(bits),(uint_t) h->hashBitLen,(uint_t)h->T[0]); + printf(" Type="); + n = (uint_t) ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) >> SKEIN_T1_POS_BLK_TYPE); + switch (n) + { + case SKEIN_BLK_TYPE_KEY: printf("KEY. "); break; + case SKEIN_BLK_TYPE_CFG: printf("CFG. "); break; + case SKEIN_BLK_TYPE_PERS: printf("PERS."); break; + case SKEIN_BLK_TYPE_PK : printf("PK. "); break; + case SKEIN_BLK_TYPE_KDF: printf("KDF. "); break; + case SKEIN_BLK_TYPE_MSG: printf("MSG. "); break; + case SKEIN_BLK_TYPE_OUT: printf("OUT. "); break; + default: printf("0x%02X.",n); break; + } + printf(" Flags="); + printf((h->T[1] & SKEIN_T1_FLAG_FIRST) ? " First":" "); + printf((h->T[1] & SKEIN_T1_FLAG_FINAL) ? " Final":" "); + printf((h->T[1] & SKEIN_T1_FLAG_BIT_PAD) ? " Pad" :" "); + n = (uint_t) ((h->T[1] & SKEIN_T1_TREE_LVL_MASK) >> SKEIN_T1_POS_TREE_LVL); + if (n) + printf(" TreeLevel = %02X",n); + printf("\n"); + fflush(stdout); + } + if (skein_DebugFlag & SKEIN_DEBUG_TWEAK) + { + printf(" Tweak:\n"); + Show64(2,h->T); + } + if (skein_DebugFlag & SKEIN_DEBUG_STATE) + { + printf(" %s words:\n",(skein_DebugFlag & SKEIN_DEBUG_THREEFISH)?"Key":"State"); + Show64(bits/64,X); + } + if (skein_DebugFlag & SKEIN_DEBUG_KEYSCHED) + { + printf(" Tweak schedule:\n"); + Show64_flag(3,tsPtr); + printf(" Key schedule:\n"); + Show64_flag((bits/64)+1,ksPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_64) + { + printf(" Input block (words):\n"); + Show64(bits/64,wPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_08) + { + printf(" Input block (bytes):\n"); + Show08(bits/8,blkPtr); + } + } + } + +void Skein_Show_Key(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes) + { + if (keyBytes) + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_KEY) + { + printf("\n%s MAC key = %4u bytes\n",AlgoHeader(bits),(unsigned) keyBytes); + Show08(keyBytes,key); + } + } +#endif diff --git a/Optimized_32bit/skein_debug.h b/Optimized_32bit/skein_debug.h new file mode 100644 index 000000000000..7775c0165c0a --- /dev/null +++ b/Optimized_32bit/skein_debug.h @@ -0,0 +1,48 @@ +#ifndef _SKEIN_DEBUG_H_ +#define _SKEIN_DEBUG_H_ +/*********************************************************************** +** +** Interface definitions for Skein hashing debug output. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#ifdef SKEIN_DEBUG +/* callout functions used inside Skein code */ +void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr, + const u64b_t *wPtr,const u64b_t *ksPtr,const u64b_t *tsPtr); +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X); +void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[]); +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr); +void Skein_Show_Key (uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes); + +extern uint_t skein_DebugFlag; /* flags to control debug output (0 --> none) */ + +#define SKEIN_RND_SPECIAL (1000u) +#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) +#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) +#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) + +/* flag bits: skein_DebugFlag */ +#define SKEIN_DEBUG_KEY (1u << 1) /* show MAC key */ +#define SKEIN_DEBUG_CONFIG (1u << 2) /* show config block processing */ +#define SKEIN_DEBUG_STATE (1u << 3) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_TWEAK (1u << 4) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_KEYSCHED (1u << 5) /* show expanded key schedule */ +#define SKEIN_DEBUG_INPUT_64 (1u << 6) /* show input block as 64-bit words */ +#define SKEIN_DEBUG_INPUT_08 (1u << 7) /* show input block as 8-bit bytes */ +#define SKEIN_DEBUG_INJECT (1u << 8) /* show state after key injection & feedforward points */ +#define SKEIN_DEBUG_ROUNDS (1u << 9) /* show state after all rounds */ +#define SKEIN_DEBUG_FINAL (1u <<10) /* show final output of Skein */ +#define SKEIN_DEBUG_HDR (1u <<11) /* show block header */ +#define SKEIN_DEBUG_THREEFISH (1u <<12) /* use Threefish name instead of Skein */ +#define SKEIN_DEBUG_PERMUTE (1u <<13) /* use word permutations */ +#define SKEIN_DEBUG_ALL ((~0u) & ~(SKEIN_DEBUG_THREEFISH | SKEIN_DEBUG_PERMUTE)) +#define THREEFISH_DEBUG_ALL (SKEIN_DEBUG_ALL | SKEIN_DEBUG_THREEFISH) + +#endif /* SKEIN_DEBUG */ + +#endif /* _SKEIN_DEBUG_H_ */ diff --git a/Optimized_32bit/skein_iv.h b/Optimized_32bit/skein_iv.h new file mode 100644 index 000000000000..a8f54a41d345 --- /dev/null +++ b/Optimized_32bit/skein_iv.h @@ -0,0 +1,199 @@ +#ifndef _SKEIN_IV_H_ +#define _SKEIN_IV_H_ + +#include "skein.h" /* get Skein macros and types */ + +/* +***************** Pre-computed Skein IVs ******************* +** +** NOTE: these values are not "magic" constants, but +** are generated using the Threefish block function. +** They are pre-computed here only for speed; i.e., to +** avoid the need for a Threefish call during Init(). +** +** The IV for any fixed hash length may be pre-computed. +** Only the most common values are included here. +** +************************************************************ +**/ + +#define MK_64 SKEIN_MK_64 + +/* blkSize = 256 bits. hashSize = 128 bits */ +const u64b_t SKEIN_256_IV_128[] = + { + MK_64(0xE1111906,0x964D7260), + MK_64(0x883DAAA7,0x7C8D811C), + MK_64(0x10080DF4,0x91960F7A), + MK_64(0xCCF7DDE5,0xB45BC1C2) + }; + +/* blkSize = 256 bits. hashSize = 160 bits */ +const u64b_t SKEIN_256_IV_160[] = + { + MK_64(0x14202314,0x72825E98), + MK_64(0x2AC4E9A2,0x5A77E590), + MK_64(0xD47A5856,0x8838D63E), + MK_64(0x2DD2E496,0x8586AB7D) + }; + +/* blkSize = 256 bits. hashSize = 224 bits */ +const u64b_t SKEIN_256_IV_224[] = + { + MK_64(0xC6098A8C,0x9AE5EA0B), + MK_64(0x876D5686,0x08C5191C), + MK_64(0x99CB88D7,0xD7F53884), + MK_64(0x384BDDB1,0xAEDDB5DE) + }; + +/* blkSize = 256 bits. hashSize = 256 bits */ +const u64b_t SKEIN_256_IV_256[] = + { + MK_64(0xFC9DA860,0xD048B449), + MK_64(0x2FCA6647,0x9FA7D833), + MK_64(0xB33BC389,0x6656840F), + MK_64(0x6A54E920,0xFDE8DA69) + }; + +/* blkSize = 512 bits. hashSize = 128 bits */ +const u64b_t SKEIN_512_IV_128[] = + { + MK_64(0xA8BC7BF3,0x6FBF9F52), + MK_64(0x1E9872CE,0xBD1AF0AA), + MK_64(0x309B1790,0xB32190D3), + MK_64(0xBCFBB854,0x3F94805C), + MK_64(0x0DA61BCD,0x6E31B11B), + MK_64(0x1A18EBEA,0xD46A32E3), + MK_64(0xA2CC5B18,0xCE84AA82), + MK_64(0x6982AB28,0x9D46982D) + }; + +/* blkSize = 512 bits. hashSize = 160 bits */ +const u64b_t SKEIN_512_IV_160[] = + { + MK_64(0x28B81A2A,0xE013BD91), + MK_64(0xC2F11668,0xB5BDF78F), + MK_64(0x1760D8F3,0xF6A56F12), + MK_64(0x4FB74758,0x8239904F), + MK_64(0x21EDE07F,0x7EAF5056), + MK_64(0xD908922E,0x63ED70B8), + MK_64(0xB8EC76FF,0xECCB52FA), + MK_64(0x01A47BB8,0xA3F27A6E) + }; + +/* blkSize = 512 bits. hashSize = 224 bits */ +const u64b_t SKEIN_512_IV_224[] = + { + MK_64(0xCCD06162,0x48677224), + MK_64(0xCBA65CF3,0xA92339EF), + MK_64(0x8CCD69D6,0x52FF4B64), + MK_64(0x398AED7B,0x3AB890B4), + MK_64(0x0F59D1B1,0x457D2BD0), + MK_64(0x6776FE65,0x75D4EB3D), + MK_64(0x99FBC70E,0x997413E9), + MK_64(0x9E2CFCCF,0xE1C41EF7) + }; + +/* blkSize = 512 bits. hashSize = 256 bits */ +const u64b_t SKEIN_512_IV_256[] = + { + MK_64(0xCCD044A1,0x2FDB3E13), + MK_64(0xE8359030,0x1A79A9EB), + MK_64(0x55AEA061,0x4F816E6F), + MK_64(0x2A2767A4,0xAE9B94DB), + MK_64(0xEC06025E,0x74DD7683), + MK_64(0xE7A436CD,0xC4746251), + MK_64(0xC36FBAF9,0x393AD185), + MK_64(0x3EEDBA18,0x33EDFC13) + }; + +/* blkSize = 512 bits. hashSize = 384 bits */ +const u64b_t SKEIN_512_IV_384[] = + { + MK_64(0xA3F6C6BF,0x3A75EF5F), + MK_64(0xB0FEF9CC,0xFD84FAA4), + MK_64(0x9D77DD66,0x3D770CFE), + MK_64(0xD798CBF3,0xB468FDDA), + MK_64(0x1BC4A666,0x8A0E4465), + MK_64(0x7ED7D434,0xE5807407), + MK_64(0x548FC1AC,0xD4EC44D6), + MK_64(0x266E1754,0x6AA18FF8) + }; + +/* blkSize = 512 bits. hashSize = 512 bits */ +const u64b_t SKEIN_512_IV_512[] = + { + MK_64(0x4903ADFF,0x749C51CE), + MK_64(0x0D95DE39,0x9746DF03), + MK_64(0x8FD19341,0x27C79BCE), + MK_64(0x9A255629,0xFF352CB1), + MK_64(0x5DB62599,0xDF6CA7B0), + MK_64(0xEABE394C,0xA9D5C3F4), + MK_64(0x991112C7,0x1A75B523), + MK_64(0xAE18A40B,0x660FCC33) + }; + +/* blkSize = 1024 bits. hashSize = 384 bits */ +const u64b_t SKEIN1024_IV_384[] = + { + MK_64(0x5102B6B8,0xC1894A35), + MK_64(0xFEEBC9E3,0xFE8AF11A), + MK_64(0x0C807F06,0xE32BED71), + MK_64(0x60C13A52,0xB41A91F6), + MK_64(0x9716D35D,0xD4917C38), + MK_64(0xE780DF12,0x6FD31D3A), + MK_64(0x797846B6,0xC898303A), + MK_64(0xB172C2A8,0xB3572A3B), + MK_64(0xC9BC8203,0xA6104A6C), + MK_64(0x65909338,0xD75624F4), + MK_64(0x94BCC568,0x4B3F81A0), + MK_64(0x3EBBF51E,0x10ECFD46), + MK_64(0x2DF50F0B,0xEEB08542), + MK_64(0x3B5A6530,0x0DBC6516), + MK_64(0x484B9CD2,0x167BBCE1), + MK_64(0x2D136947,0xD4CBAFEA) + }; + +/* blkSize = 1024 bits. hashSize = 512 bits */ +const u64b_t SKEIN1024_IV_512[] = + { + MK_64(0xCAEC0E5D,0x7C1B1B18), + MK_64(0xA01B0E04,0x5F03E802), + MK_64(0x33840451,0xED912885), + MK_64(0x374AFB04,0xEAEC2E1C), + MK_64(0xDF25A0E2,0x813581F7), + MK_64(0xE4004093,0x8B12F9D2), + MK_64(0xA662D539,0xC2ED39B6), + MK_64(0xFA8B85CF,0x45D8C75A), + MK_64(0x8316ED8E,0x29EDE796), + MK_64(0x053289C0,0x2E9F91B8), + MK_64(0xC3F8EF1D,0x6D518B73), + MK_64(0xBDCEC3C4,0xD5EF332E), + MK_64(0x549A7E52,0x22974487), + MK_64(0x67070872,0x5B749816), + MK_64(0xB9CD28FB,0xF0581BD1), + MK_64(0x0E2940B8,0x15804974) + }; + +/* blkSize = 1024 bits. hashSize = 1024 bits */ +const u64b_t SKEIN1024_IV_1024[] = + { + MK_64(0xD593DA07,0x41E72355), + MK_64(0x15B5E511,0xAC73E00C), + MK_64(0x5180E5AE,0xBAF2C4F0), + MK_64(0x03BD41D3,0xFCBCAFAF), + MK_64(0x1CAEC6FD,0x1983A898), + MK_64(0x6E510B8B,0xCDD0589F), + MK_64(0x77E2BDFD,0xC6394ADA), + MK_64(0xC11E1DB5,0x24DCB0A3), + MK_64(0xD6D14AF9,0xC6329AB5), + MK_64(0x6A9B0BFC,0x6EB67E0D), + MK_64(0x9243C60D,0xCCFF1332), + MK_64(0x1A1F1DDE,0x743F02D4), + MK_64(0x0996753C,0x10ED0BB8), + MK_64(0x6572DD22,0xF2B4969A), + MK_64(0x61FD3062,0xD00A579A), + MK_64(0x1DE0536E,0x8682E539) + }; + +#endif /* _SKEIN_IV_H_ */ diff --git a/Optimized_32bit/skein_port.h b/Optimized_32bit/skein_port.h new file mode 100644 index 000000000000..653302de7467 --- /dev/null +++ b/Optimized_32bit/skein_port.h @@ -0,0 +1,124 @@ +#ifndef _SKEIN_PORT_H_ +#define _SKEIN_PORT_H_ +/******************************************************************* +** +** Platform-specific definitions for Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Many thanks to Brian Gladman for his portable header files. +** +** To port Skein to an "unsupported" platform, change the definitions +** in this file appropriately. +** +********************************************************************/ + +#include "brg_types.h" /* get integer type definitions */ + +typedef unsigned int uint_t; /* native unsigned integer */ +typedef uint_8t u08b_t; /* 8-bit unsigned integer */ +typedef uint_64t u64b_t; /* 64-bit unsigned integer */ + +#ifndef RotL_64 +#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N)))) +#endif + +/* + * Skein is "natively" little-endian (unlike SHA-xxx), for optimal + * performance on x86 CPUs. The Skein code requires the following + * definitions for dealing with endianness: + * + * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian + * Skein_Put64_LSB_First + * Skein_Get64_LSB_First + * Skein_Swap64 + * + * If SKEIN_NEED_SWAP is defined at compile time, it is used here + * along with the portable versions of Put64/Get64/Swap64, which + * are slow in general. + * + * Otherwise, an "auto-detect" of endianness is attempted below. + * If the default handling doesn't work well, the user may insert + * platform-specific code instead (e.g., for big-endian CPUs). + * + */ +#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ + +#include "brg_endian.h" /* get endianness selection */ +#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN + /* here for big-endian CPUs */ +#define SKEIN_NEED_SWAP (1) +#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN + /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ +#define SKEIN_NEED_SWAP (0) +#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ +#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt) +#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt)) +#endif +#else +#error "Skein needs endianness setting!" +#endif + +#endif /* ifndef SKEIN_NEED_SWAP */ + +/* + ****************************************************************** + * Provide any definitions still needed. + ****************************************************************** + */ +#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ +#if SKEIN_NEED_SWAP +#define Skein_Swap64(w64) \ + ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \ + (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \ + (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \ + (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \ + (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \ + (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \ + (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \ + (((((u64b_t)(w64)) >>56) & 0xFF) ) ) +#else +#define Skein_Swap64(w64) (w64) +#endif +#endif /* ifndef Skein_Swap64 */ + + +#ifndef Skein_Put64_LSB_First +void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n>3] >> (8*(n&7))); + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Put64_LSB_First */ + + +#ifndef Skein_Get64_LSB_First +void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n<8*wCnt;n+=8) + dst[n/8] = (((u64b_t) src[n ]) ) + + (((u64b_t) src[n+1]) << 8) + + (((u64b_t) src[n+2]) << 16) + + (((u64b_t) src[n+3]) << 24) + + (((u64b_t) src[n+4]) << 32) + + (((u64b_t) src[n+5]) << 40) + + (((u64b_t) src[n+6]) << 48) + + (((u64b_t) src[n+7]) << 56) ; + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Get64_LSB_First */ + +#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/Optimized_64bit/SHA3api_ref.c b/Optimized_64bit/SHA3api_ref.c new file mode 100644 index 000000000000..6861a3e4bffb --- /dev/null +++ b/Optimized_64bit/SHA3api_ref.c @@ -0,0 +1,115 @@ +/*********************************************************************** +** +** Implementation of the AHS API using the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ +#include "SHA3api_ref.h"/* get the AHS API definitions */ + +/******************************************************************/ +/* AHS API code */ +/******************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init the context */ +HashReturn Init(hashState *state, int hashbitlen) + { +#if SKEIN_256_NIST_MAX_HASH_BITS + if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS) + { + Skein_Assert(hashbitlen > 0,BAD_HASHLEN); + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen); + } +#endif + if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS) + { + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); + } + else + { + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen); + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process data to be hashed */ +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) + { + /* only the final Update() call is allowed do partial bytes, else assert an error */ + Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, FAIL); + + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + if ((databitlen & 7) == 0) /* partial bytes? */ + { + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); + case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3); + case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3); + default: return FAIL; + } + } + else + { /* handle partial final byte */ + size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ + u08b_t b,mask; + + mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ + b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ + + switch ((state->statebits >> 8) & 3) + { + case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ + Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ + break; + case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */ + Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */ + break; + case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */ + Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */ + break; + default: return FAIL; + } + Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ + + return SUCCESS; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize hash computation and output the result (hashbitlen bits) */ +HashReturn Final(hashState *state, BitSequence *hashval) + { + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Final(&state->u.ctx_512,hashval); + case 1: return Skein_256_Final(&state->u.ctx_256,hashval); + case 0: return Skein1024_Final(&state->u.ctx1024,hashval); + default: return FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* all-in-one hash function */ +HashReturn Hash(int hashbitlen, const BitSequence *data, /* all-in-one call */ + DataLength databitlen,BitSequence *hashval) + { + hashState state; + HashReturn r = Init(&state,hashbitlen); + if (r == SUCCESS) + { /* these calls do not fail when called properly */ + r = Update(&state,data,databitlen); + Final(&state,hashval); + } + return r; + } diff --git a/Optimized_64bit/SHA3api_ref.h b/Optimized_64bit/SHA3api_ref.h new file mode 100644 index 000000000000..6d62304e59b7 --- /dev/null +++ b/Optimized_64bit/SHA3api_ref.h @@ -0,0 +1,66 @@ +#ifndef _AHS_API_H_ +#define _AHS_API_H_ + +/*********************************************************************** +** +** Interface declarations of the AHS API using the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include "skein.h" + +typedef enum + { + SUCCESS = SKEIN_SUCCESS, + FAIL = SKEIN_FAIL, + BAD_HASHLEN = SKEIN_BAD_HASHLEN + } + HashReturn; + +typedef size_t DataLength; /* bit count type */ +typedef u08b_t BitSequence; /* bit stream type */ + +typedef struct + { + uint_t statebits; /* 256, 512, or 1024 */ + union + { + Skein_Ctxt_Hdr_t h; /* common header "overlay" */ + Skein_256_Ctxt_t ctx_256; + Skein_512_Ctxt_t ctx_512; + Skein1024_Ctxt_t ctx1024; + } u; + } + hashState; + +/* "incremental" hashing API */ +HashReturn Init (hashState *state, int hashbitlen); +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); +HashReturn Final (hashState *state, BitSequence *hashval); + +/* "all-in-one" call */ +HashReturn Hash (int hashbitlen, const BitSequence *data, + DataLength databitlen, BitSequence *hashval); + + +/* +** Re-define the compile-time constants below to change the selection +** of the Skein state size in the Init() function in SHA3api_ref.c. +** +** That is, the NIST API does not allow for explicit selection of the +** Skein block size, so it must be done implicitly in the Init() function. +** The selection is controlled by these constants. +*/ +#ifndef SKEIN_256_NIST_MAX_HASHBITS +#define SKEIN_256_NIST_MAX_HASHBITS (0) +#endif + +#ifndef SKEIN_512_NIST_MAX_HASHBITS +#define SKEIN_512_NIST_MAX_HASHBITS (512) +#endif + +#endif /* ifdef _AHS_API_H_ */ diff --git a/Optimized_64bit/brg_endian.h b/Optimized_64bit/brg_endian.h new file mode 100644 index 000000000000..978eb33f08cf --- /dev/null +++ b/Optimized_64bit/brg_endian.h @@ -0,0 +1,148 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 20/10/2006 +*/ + +#ifndef BRG_ENDIAN_H +#define BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined(AVR) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) || defined( AVR ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order +#endif +#endif + +/* special handler for IA64, which may be either endianness (?) */ +/* here we assume little-endian, but this may need to be changed */ +#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) +# define PLATFORM_MUST_ALIGN (1) +#ifndef PLATFORM_BYTE_ORDER +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif +#endif + +#ifndef PLATFORM_MUST_ALIGN +# define PLATFORM_MUST_ALIGN (0) +#endif + +#endif /* ifndef BRG_ENDIAN_H */ diff --git a/Optimized_64bit/brg_types.h b/Optimized_64bit/brg_types.h new file mode 100644 index 000000000000..d6d6cdab9fbf --- /dev/null +++ b/Optimized_64bit/brg_types.h @@ -0,0 +1,188 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + The unsigned integer types defined here are of the form uint_t where + is the length of the type; for example, the unsigned 32-bit type is + 'uint_32t'. These are NOT the same as the 'C99 integer types' that are + defined in the inttypes.h and stdint.h headers since attempts to use these + types have shown that support for them is still highly variable. However, + since the latter are of the form uint_t, a regular expression search + and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t') + can be used to convert the types used here to the C99 standard types. +*/ + +#ifndef BRG_TYPES_H +#define BRG_TYPES_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include + +#ifndef BRG_UI8 +# define BRG_UI8 +# if UCHAR_MAX == 255u + typedef unsigned char uint_8t; +# else +# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI16 +# define BRG_UI16 +# if USHRT_MAX == 65535u + typedef unsigned short uint_16t; +# else +# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h +# endif +#endif + +#ifndef BRG_UI32 +# define BRG_UI32 +# if UINT_MAX == 4294967295u +# define li_32(h) 0x##h##u + typedef unsigned int uint_32t; +# elif ULONG_MAX == 4294967295u +# define li_32(h) 0x##h##ul + typedef unsigned long uint_32t; +# elif defined( _CRAY ) +# error This code needs 32-bit data types, which Cray machines do not provide +# else +# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI64 +# if defined( __BORLANDC__ ) && !defined( __MSDOS__ ) +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */ +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( __sun ) && defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u +# if UINT_MAX == 18446744073709551615u +# define BRG_UI64 +# define li_64(h) 0x##h##u + typedef unsigned int uint_64t; +# endif +# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u +# if ULONG_MAX == 18446744073709551615ul +# define BRG_UI64 +# define li_64(h) 0x##h##ul + typedef unsigned long uint_64t; +# endif +# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u +# if ULLONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u +# if ULONG_LONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined(__GNUC__) /* DLW: avoid mingw problem with -ansi */ +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +#endif + +#if defined( NEED_UINT_64T ) && !defined( BRG_UI64 ) +# error Please define uint_64t as an unsigned 64 bit type in brg_types.h +#endif + +#ifndef RETURN_VALUES +# define RETURN_VALUES +# if defined( DLL_EXPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllexport ) void __stdcall +# define INT_RETURN __declspec( dllexport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllexport__ ) void +# define INT_RETURN __declspec( __dllexport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( DLL_IMPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllimport ) void __stdcall +# define INT_RETURN __declspec( dllimport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllimport__ ) void +# define INT_RETURN __declspec( __dllimport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( __WATCOMC__ ) +# define VOID_RETURN void __cdecl +# define INT_RETURN int __cdecl +# else +# define VOID_RETURN void +# define INT_RETURN int +# endif +#endif + +/* These defines are used to declare buffers in a way that allows + faster operations on longer variables to be used. In all these + defines 'size' must be a power of 2 and >= 8 + + dec_unit_type(size,x) declares a variable 'x' of length + 'size' bits + + dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' + bytes defined as an array of variables + each of 'size' bits (bsize must be a + multiple of size / 8) + + ptr_cast(x,size) casts a pointer to a pointer to a + varaiable of length 'size' bits +*/ + +#define ui_type(size) uint_##size##t +#define dec_unit_type(size,x) typedef ui_type(size) x +#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] +#define ptr_cast(x,size) ((ui_type(size)*)(x)) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Optimized_64bit/skein.c b/Optimized_64bit/skein.c new file mode 100644 index 000000000000..c9289cd49e8e --- /dev/null +++ b/Optimized_64bit/skein.c @@ -0,0 +1,753 @@ +/*********************************************************************** +** +** Implementation of the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */ + +#include /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ +#include "skein_iv.h" /* get precomputed IVs */ + +/*****************************************************************/ +/* External function to process blkCnt (nonzero) full block(s) of data. */ +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); + +/*****************************************************************/ +/* 256-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 256: memcpy(ctx->X,SKEIN_256_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_256_IV_224,sizeof(ctx->X)); break; + case 160: memcpy(ctx->X,SKEIN_256_IV_160,sizeof(ctx->X)); break; + case 128: memcpy(ctx->X,SKEIN_256_IV_128,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_256_Update(ctx,key,keyBytes); /* hash the key */ + Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(256,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); + Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_256_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */ + Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; + msg += n * SKEIN_256_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_API_CodeSize(void) + { + return ((u08b_t *) Skein_256_API_CodeSize) - + ((u08b_t *) Skein_256_Init); + } +#endif + +/*****************************************************************/ +/* 512-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN_512_IV_512,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN_512_IV_384,sizeof(ctx->X)); break; + case 256: memcpy(ctx->X,SKEIN_512_IV_256,sizeof(ctx->X)); break; + case 224: memcpy(ctx->X,SKEIN_512_IV_224,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_512_Update(ctx,key,keyBytes); /* hash the key */ + Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(512,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); + Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ + Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_API_CodeSize(void) + { + return ((u08b_t *) Skein_512_API_CodeSize) - + ((u08b_t *) Skein_512_Init); + } +#endif + +/*****************************************************************/ +/* 1024-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + + switch (hashBitLen) + { /* use pre-computed values, where available */ +#ifndef SKEIN_NO_PRECOMP + case 512: memcpy(ctx->X,SKEIN1024_IV_512 ,sizeof(ctx->X)); break; + case 384: memcpy(ctx->X,SKEIN1024_IV_384 ,sizeof(ctx->X)); break; + case 1024: memcpy(ctx->X,SKEIN1024_IV_1024,sizeof(ctx->X)); break; +#endif + default: + /* here if there is no precomputed IV value available */ + /* build/process the config block, type == CONFIG (could be precomputed) */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + memset(&cfg.w[3],0,sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + break; + } + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein1024_Update(ctx,key,keyBytes); /* hash the key */ + Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ +#if SKEIN_NEED_SWAP + { + uint_t i; + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } +#endif + } + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(1024,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash (default) */ + ctx->h.bCnt = 0; /* buffer b[] starts out empty */ + Skein_Start_New_Type(ctx,MSG); + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); + Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN1024_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */ + Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES); + msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; + msg += n * SKEIN1024_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_API_CodeSize(void) + { + return ((u08b_t *) Skein1024_API_CodeSize) - + ((u08b_t *) Skein1024_Init); + } +#endif + +/**************** Functions to support MAC/tree hashing ***************/ +/* (this code is identical for Optimized and Reference versions) */ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +#if SKEIN_TREE_HASH +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } +#endif diff --git a/Optimized_64bit/skein.h b/Optimized_64bit/skein.h new file mode 100644 index 000000000000..721c9bc9ce0d --- /dev/null +++ b/Optimized_64bit/skein.h @@ -0,0 +1,327 @@ +#ifndef _SKEIN_H_ +#define _SKEIN_H_ 1 +/************************************************************************** +** +** Interface declarations and internal definitions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +*************************************************************************** +** +** The following compile-time switches may be defined to control some +** tradeoffs between speed, code size, error checking, and security. +** +** The "default" note explains what happens when the switch is not defined. +** +** SKEIN_DEBUG -- make callouts from inside Skein code +** to examine/display intermediate values. +** [default: no callouts (no overhead)] +** +** SKEIN_ERR_CHECK -- how error checking is handled inside Skein +** code. If not defined, most error checking +** is disabled (for performance). Otherwise, +** the switch value is interpreted as: +** 0: use assert() to flag errors +** 1: return SKEIN_FAIL to flag errors +** +***************************************************************************/ +#ifdef __cplusplus +extern "C" +{ +#endif + +#include /* get size_t definition */ +#include "skein_port.h" /* get platform-specific definitions */ + +enum + { + SKEIN_SUCCESS = 0, /* return codes from Skein calls */ + SKEIN_FAIL = 1, + SKEIN_BAD_HASHLEN = 2 + }; + +#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS ( 4) +#define SKEIN_512_STATE_WORDS ( 8) +#define SKEIN1024_STATE_WORDS (16) +#define SKEIN_MAX_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS) + +typedef struct + { + size_t hashBitLen; /* size of hash result, in bits */ + size_t bCnt; /* current byte count in buffer b[] */ + u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ + } Skein_Ctxt_Hdr_t; + +typedef struct /* 256-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_256_Ctxt_t; + +typedef struct /* 512-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_512_Ctxt_t; + +typedef struct /* 1024-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein1024_Ctxt_t; + +/* Skein APIs for (incremental) "straight hashing" */ +int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen); +int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); +int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen); + +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); + +int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +/* +** Skein APIs for "extended" initialization: MAC keys, tree hashing. +** After an InitExt() call, just use Update/Final calls as with Init(). +** +** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. +** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, +** the results of InitExt() are identical to calling Init(). +** The function Init() may be called once to "precompute" the IV for +** a given hashBitLen value, then by saving a copy of the context +** the IV computation may be avoided in later calls. +** Similarly, the function InitExt() may be called once per MAC key +** to precompute the MAC IV, then a copy of the context saved and +** reused for each new MAC computation. +**/ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); + +/* +** Skein APIs for MAC and tree hash: +** Final_Pad: pad, do final block, but no OUTPUT type +** Output: do just the output stage +*/ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +#ifndef SKEIN_TREE_HASH +#define SKEIN_TREE_HASH (1) +#endif +#if SKEIN_TREE_HASH +int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); +#endif + +/***************************************************************** +** "Internal" Skein definitions +** -- not needed for sequential hashing API, but will be +** helpful for other uses of Skein (e.g., tree hash mode). +** -- included here so that they can be shared between +** reference and optimized code. +******************************************************************/ + +/* tweak word T[1]: bit field starting positions */ +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ + +#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ + +/* tweak word T[1]: flag bit definition(s) */ +#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) +#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) + +/* tweak word T[1]: tree level bit field mask */ +#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) +#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) + +/* tweak word T[1]: block type field */ +#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ +#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ +#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ +#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ +#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ +#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ +#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ + +#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) +#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ +#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ +#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ +#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ +#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ +#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ +#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ + +#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) +#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define SKEIN_VERSION (1) + +#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ +#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ +#endif + +#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) +#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) + +#define SKEIN_CFG_STR_LEN (4*8) + +/* bit field definitions in config block treeInfo word */ +#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) +#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) +#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) + +#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) +#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) +#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) + +#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ + ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ + (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ + (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) + +#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ + +/* +** Skein macros for getting/setting tweak words, etc. +** These are useful for partial input bytes, hash tree init/update, etc. +**/ +#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) +#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} + +#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) +#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) +#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) +#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) + +/* set both tweak words at once */ +#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ + { \ + Skein_Set_T0(ctxPtr,(T0)); \ + Skein_Set_T1(ctxPtr,(T1)); \ + } + +#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ + Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) + +/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ +#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ + { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } + +#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } +#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } + +#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} + +/***************************************************************** +** "Internal" Skein definitions for debugging and error checking +******************************************************************/ +#ifdef SKEIN_DEBUG /* examine/display intermediate values? */ +#include "skein_debug.h" +#else /* default is no callouts */ +#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) +#define Skein_Show_Round(bits,ctx,r,X) +#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) +#define Skein_Show_Final(bits,ctx,cnt,outPtr) +#define Skein_Show_Key(bits,ctx,key,keyBytes) +#endif + +#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ +#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ +#define Skein_assert(x) +#elif defined(SKEIN_ASSERT) +#include +#define Skein_Assert(x,retCode) assert(x) +#define Skein_assert(x) assert(x) +#else +#include +#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ +#define Skein_assert(x) assert(x) /* internal error */ +#endif + +/***************************************************************** +** Skein block function constants (shared across Ref and Opt code) +******************************************************************/ +enum + { + /* Skein_256 round rotation constants */ + R_256_0_0=14, R_256_0_1=16, + R_256_1_0=52, R_256_1_1=57, + R_256_2_0=23, R_256_2_1=40, + R_256_3_0= 5, R_256_3_1=37, + R_256_4_0=25, R_256_4_1=33, + R_256_5_0=46, R_256_5_1=12, + R_256_6_0=58, R_256_6_1=22, + R_256_7_0=32, R_256_7_1=32, + + /* Skein_512 round rotation constants */ + R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, + R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, + R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, + R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, + R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, + R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, + R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, + R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, + + /* Skein1024 round rotation constants */ + R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37, + R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52, + R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17, + R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25, + R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30, + R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41, + R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25, + R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20 + }; + +#ifndef SKEIN_ROUNDS +#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */ +#define SKEIN_512_ROUNDS_TOTAL (72) +#define SKEIN1024_ROUNDS_TOTAL (80) +#else /* allow command-line define in range 8*(5..14) */ +#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5)) +#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) +#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _SKEIN_H_ */ diff --git a/Optimized_64bit/skein_block.c b/Optimized_64bit/skein_block.c new file mode 100644 index 000000000000..bfd29d1eee2d --- /dev/null +++ b/Optimized_64bit/skein_block.c @@ -0,0 +1,689 @@ +/*********************************************************************** +** +** Implementation of the Skein block functions. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Compile-time switches: +** +** SKEIN_USE_ASM -- set bits (256/512/1024) to select which +** versions use ASM code for block processing +** [default: use C for all block sizes] +** +************************************************************************/ + +#include +#include "skein.h" + +#ifndef SKEIN_USE_ASM +#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */ +#endif + +#ifndef SKEIN_LOOP +#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */ +#endif + +#define BLK_BITS (WCNT*64) /* some useful definitions for code here */ +#define KW_TWK_BASE (0) +#define KW_KEY_BASE (3) +#define ks (kw + KW_KEY_BASE) +#define ts (kw + KW_TWK_BASE) + +#ifdef SKEIN_DEBUG +#define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; } +#else +#define DebugSaveTweak(ctx) +#endif + +/***************************** Skein_256 ******************************/ +#if !(SKEIN_USE_ASM & 256) +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_256_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_256_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10) +#else +#define SKEIN_UNROLL_256 (0) +#endif + +#if SKEIN_UNROLL_256 +#if (RCNT % SKEIN_UNROLL_256) +#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3; /* local copy of context vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; +#endif + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1] + ts[0]; + X2 = w[2] + ks[2] + ts[1]; + X3 = w[3] + ks[3]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); /* show starting state values */ + + blkPtr += SKEIN_256_BLOCK_BYTES; + + /* run the rounds */ + +#define Round256(p0,p1,p2,p3,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + +#if SKEIN_UNROLL_256 == 0 +#define R256(p0,p1,p2,p3,ROT,rNum) /* fully unrolled */ \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I256(R) \ + X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ + X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ + X3 += ks[((R)+4) % 5] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R256(p0,p1,p2,p3,ROT,rNum) \ + Round256(p0,p1,p2,p3,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I256(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ + X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ + X3 += ks[r+(R)+3] + r+(R) ; \ + ks[r + (R)+4 ] = ks[r+(R)-1]; /* rotate key schedule */\ + ts[r + (R)+2 ] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_256) /* loop thru it */ +#endif + { +#define R256_8_rounds(R) \ + R256(0,1,2,3,R_256_0,8*(R) + 1); \ + R256(0,3,2,1,R_256_1,8*(R) + 2); \ + R256(0,1,2,3,R_256_2,8*(R) + 3); \ + R256(0,3,2,1,R_256_3,8*(R) + 4); \ + I256(2*(R)); \ + R256(0,1,2,3,R_256_4,8*(R) + 5); \ + R256(0,3,2,1,R_256_5,8*(R) + 6); \ + R256(0,1,2,3,R_256_6,8*(R) + 7); \ + R256(0,3,2,1,R_256_7,8*(R) + 8); \ + I256(2*(R)+1); + + R256_8_rounds( 0); + +#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN))) + + #if R256_Unroll_R( 1) + R256_8_rounds( 1); + #endif + #if R256_Unroll_R( 2) + R256_8_rounds( 2); + #endif + #if R256_Unroll_R( 3) + R256_8_rounds( 3); + #endif + #if R256_Unroll_R( 4) + R256_8_rounds( 4); + #endif + #if R256_Unroll_R( 5) + R256_8_rounds( 5); + #endif + #if R256_Unroll_R( 6) + R256_8_rounds( 6); + #endif + #if R256_Unroll_R( 7) + R256_8_rounds( 7); + #endif + #if R256_Unroll_R( 8) + R256_8_rounds( 8); + #endif + #if R256_Unroll_R( 9) + R256_8_rounds( 9); + #endif + #if R256_Unroll_R(10) + R256_8_rounds(10); + #endif + #if R256_Unroll_R(11) + R256_8_rounds(11); + #endif + #if R256_Unroll_R(12) + R256_8_rounds(12); + #endif + #if R256_Unroll_R(13) + R256_8_rounds(13); + #endif + #if R256_Unroll_R(14) + R256_8_rounds(14); + #endif + #if (SKEIN_UNROLL_256 > 14) +#error "need more unrolling in Skein_256_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_256_Process_Block_CodeSize) - + ((u08b_t *) Skein_256_Process_Block); + } +uint_t Skein_256_Unroll_Cnt(void) + { + return SKEIN_UNROLL_256; + } +#endif +#endif + +/***************************** Skein_512 ******************************/ +#if !(SKEIN_USE_ASM & 512) +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_512_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN_512_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10) +#else +#define SKEIN_UNROLL_512 (0) +#endif + +#if SKEIN_UNROLL_512 +#if (RCNT % SKEIN_UNROLL_512) +#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + u64b_t X0,X1,X2,X3,X4,X5,X6,X7; /* local copy of vars, for speed */ + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; + Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[0] = ctx->X[0]; + ks[1] = ctx->X[1]; + ks[2] = ctx->X[2]; + ks[3] = ctx->X[3]; + ks[4] = ctx->X[4]; + ks[5] = ctx->X[5]; + ks[6] = ctx->X[6]; + ks[7] = ctx->X[7]; + ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ + ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X0 = w[0] + ks[0]; /* do the first full key injection */ + X1 = w[1] + ks[1]; + X2 = w[2] + ks[2]; + X3 = w[3] + ks[3]; + X4 = w[4] + ks[4]; + X5 = w[5] + ks[5] + ts[0]; + X6 = w[6] + ks[6] + ts[1]; + X7 = w[7] + ks[7]; + + blkPtr += SKEIN_512_BLOCK_BYTES; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + /* run the rounds */ +#define Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + +#if SKEIN_UNROLL_512 == 0 +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) /* unrolled */ \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rNum,Xptr); + +#define I512(R) \ + X0 += ks[((R)+1) % 9]; /* inject the key schedule value */ \ + X1 += ks[((R)+2) % 9]; \ + X2 += ks[((R)+3) % 9]; \ + X3 += ks[((R)+4) % 9]; \ + X4 += ks[((R)+5) % 9]; \ + X5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; \ + X6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; \ + X7 += ks[((R)+8) % 9] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Round512(p0,p1,p2,p3,p4,p5,p6,p7,ROT,rNum) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rNum,Xptr); + +#define I512(R) \ + X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ + X1 += ks[r+(R)+1]; \ + X2 += ks[r+(R)+2]; \ + X3 += ks[r+(R)+3]; \ + X4 += ks[r+(R)+4]; \ + X5 += ks[r+(R)+5] + ts[r+(R)+0]; \ + X6 += ks[r+(R)+6] + ts[r+(R)+1]; \ + X7 += ks[r+(R)+7] + r+(R) ; \ + ks[r + (R)+8] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r < 2*RCNT;r+=2*SKEIN_UNROLL_512) /* loop thru it */ +#endif /* end of looped code definitions */ + { +#define R512_8_rounds(R) /* do 8 full rounds */ \ + R512(0,1,2,3,4,5,6,7,R_512_0,8*(R)+ 1); \ + R512(2,1,4,7,6,5,0,3,R_512_1,8*(R)+ 2); \ + R512(4,1,6,3,0,5,2,7,R_512_2,8*(R)+ 3); \ + R512(6,1,0,7,2,5,4,3,R_512_3,8*(R)+ 4); \ + I512(2*(R)); \ + R512(0,1,2,3,4,5,6,7,R_512_4,8*(R)+ 5); \ + R512(2,1,4,7,6,5,0,3,R_512_5,8*(R)+ 6); \ + R512(4,1,6,3,0,5,2,7,R_512_6,8*(R)+ 7); \ + R512(6,1,0,7,2,5,4,3,R_512_7,8*(R)+ 8); \ + I512(2*(R)+1); /* and key injection */ + + R512_8_rounds( 0); + +#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) + + #if R512_Unroll_R( 1) + R512_8_rounds( 1); + #endif + #if R512_Unroll_R( 2) + R512_8_rounds( 2); + #endif + #if R512_Unroll_R( 3) + R512_8_rounds( 3); + #endif + #if R512_Unroll_R( 4) + R512_8_rounds( 4); + #endif + #if R512_Unroll_R( 5) + R512_8_rounds( 5); + #endif + #if R512_Unroll_R( 6) + R512_8_rounds( 6); + #endif + #if R512_Unroll_R( 7) + R512_8_rounds( 7); + #endif + #if R512_Unroll_R( 8) + R512_8_rounds( 8); + #endif + #if R512_Unroll_R( 9) + R512_8_rounds( 9); + #endif + #if R512_Unroll_R(10) + R512_8_rounds(10); + #endif + #if R512_Unroll_R(11) + R512_8_rounds(11); + #endif + #if R512_Unroll_R(12) + R512_8_rounds(12); + #endif + #if R512_Unroll_R(13) + R512_8_rounds(13); + #endif + #if R512_Unroll_R(14) + R512_8_rounds(14); + #endif + #if (SKEIN_UNROLL_512 > 14) +#error "need more unrolling in Skein_512_Process_Block" + #endif + } + + /* do the final "feedforward" xor, update context chaining vars */ + ctx->X[0] = X0 ^ w[0]; + ctx->X[1] = X1 ^ w[1]; + ctx->X[2] = X2 ^ w[2]; + ctx->X[3] = X3 ^ w[3]; + ctx->X[4] = X4 ^ w[4]; + ctx->X[5] = X5 ^ w[5]; + ctx->X[6] = X6 ^ w[6]; + ctx->X[7] = X7 ^ w[7]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_512_Process_Block_CodeSize) - + ((u08b_t *) Skein_512_Process_Block); + } +uint_t Skein_512_Unroll_Cnt(void) + { + return SKEIN_UNROLL_512; + } +#endif +#endif + +/***************************** Skein1024 ******************************/ +#if !(SKEIN_USE_ASM & 1024) +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C, always looping (unrolled is bigger AND slower!) */ + enum + { + WCNT = SKEIN1024_STATE_WORDS + }; +#undef RCNT +#define RCNT (SKEIN1024_ROUNDS_TOTAL/8) + +#ifdef SKEIN_LOOP /* configure how much to unroll the loop */ +#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10) +#else +#define SKEIN_UNROLL_1024 (0) +#endif + +#if (SKEIN_UNROLL_1024 != 0) +#if (RCNT % SKEIN_UNROLL_1024) +#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ +#endif + size_t r; + u64b_t kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ +#else + u64b_t kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ +#endif + + u64b_t X00,X01,X02,X03,X04,X05,X06,X07, /* local copy of vars, for speed */ + X08,X09,X10,X11,X12,X13,X14,X15; + u64b_t w [WCNT]; /* local copy of input block */ +#ifdef SKEIN_DEBUG + const u64b_t *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */ + Xptr[ 0] = &X00; Xptr[ 1] = &X01; Xptr[ 2] = &X02; Xptr[ 3] = &X03; + Xptr[ 4] = &X04; Xptr[ 5] = &X05; Xptr[ 6] = &X06; Xptr[ 7] = &X07; + Xptr[ 8] = &X08; Xptr[ 9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11; + Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15; +#endif + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ts[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[ 0] = ctx->X[ 0]; + ks[ 1] = ctx->X[ 1]; + ks[ 2] = ctx->X[ 2]; + ks[ 3] = ctx->X[ 3]; + ks[ 4] = ctx->X[ 4]; + ks[ 5] = ctx->X[ 5]; + ks[ 6] = ctx->X[ 6]; + ks[ 7] = ctx->X[ 7]; + ks[ 8] = ctx->X[ 8]; + ks[ 9] = ctx->X[ 9]; + ks[10] = ctx->X[10]; + ks[11] = ctx->X[11]; + ks[12] = ctx->X[12]; + ks[13] = ctx->X[13]; + ks[14] = ctx->X[14]; + ks[15] = ctx->X[15]; + ks[16] = ks[ 0] ^ ks[ 1] ^ ks[ 2] ^ ks[ 3] ^ + ks[ 4] ^ ks[ 5] ^ ks[ 6] ^ ks[ 7] ^ + ks[ 8] ^ ks[ 9] ^ ks[10] ^ ks[11] ^ + ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY; + + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + DebugSaveTweak(ctx); + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + + X00 = w[ 0] + ks[ 0]; /* do the first full key injection */ + X01 = w[ 1] + ks[ 1]; + X02 = w[ 2] + ks[ 2]; + X03 = w[ 3] + ks[ 3]; + X04 = w[ 4] + ks[ 4]; + X05 = w[ 5] + ks[ 5]; + X06 = w[ 6] + ks[ 6]; + X07 = w[ 7] + ks[ 7]; + X08 = w[ 8] + ks[ 8]; + X09 = w[ 9] + ks[ 9]; + X10 = w[10] + ks[10]; + X11 = w[11] + ks[11]; + X12 = w[12] + ks[12]; + X13 = w[13] + ks[13] + ts[0]; + X14 = w[14] + ks[14] + ts[1]; + X15 = w[15] + ks[15]; + + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,Xptr); + +#define Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rNum) \ + X##p0 += X##p1; X##p1 = RotL_64(X##p1,ROT##_0); X##p1 ^= X##p0; \ + X##p2 += X##p3; X##p3 = RotL_64(X##p3,ROT##_1); X##p3 ^= X##p2; \ + X##p4 += X##p5; X##p5 = RotL_64(X##p5,ROT##_2); X##p5 ^= X##p4; \ + X##p6 += X##p7; X##p7 = RotL_64(X##p7,ROT##_3); X##p7 ^= X##p6; \ + X##p8 += X##p9; X##p9 = RotL_64(X##p9,ROT##_4); X##p9 ^= X##p8; \ + X##pA += X##pB; X##pB = RotL_64(X##pB,ROT##_5); X##pB ^= X##pA; \ + X##pC += X##pD; X##pD = RotL_64(X##pD,ROT##_6); X##pD ^= X##pC; \ + X##pE += X##pF; X##pF = RotL_64(X##pF,ROT##_7); X##pF ^= X##pE; \ + +#if SKEIN_UNROLL_1024 == 0 +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,rn,Xptr); + +#define I1024(R) \ + X00 += ks[((R)+ 1) % 17]; /* inject the key schedule value */ \ + X01 += ks[((R)+ 2) % 17]; \ + X02 += ks[((R)+ 3) % 17]; \ + X03 += ks[((R)+ 4) % 17]; \ + X04 += ks[((R)+ 5) % 17]; \ + X05 += ks[((R)+ 6) % 17]; \ + X06 += ks[((R)+ 7) % 17]; \ + X07 += ks[((R)+ 8) % 17]; \ + X08 += ks[((R)+ 9) % 17]; \ + X09 += ks[((R)+10) % 17]; \ + X10 += ks[((R)+11) % 17]; \ + X11 += ks[((R)+12) % 17]; \ + X12 += ks[((R)+13) % 17]; \ + X13 += ks[((R)+14) % 17] + ts[((R)+1) % 3]; \ + X14 += ks[((R)+15) % 17] + ts[((R)+2) % 3]; \ + X15 += ks[((R)+16) % 17] + (R)+1; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); +#else /* looping version */ +#define R1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Round1024(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pA,pB,pC,pD,pE,pF,ROT,rn) \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,4*(r-1)+rn,Xptr); + +#define I1024(R) \ + X00 += ks[r+(R)+ 0]; /* inject the key schedule value */ \ + X01 += ks[r+(R)+ 1]; \ + X02 += ks[r+(R)+ 2]; \ + X03 += ks[r+(R)+ 3]; \ + X04 += ks[r+(R)+ 4]; \ + X05 += ks[r+(R)+ 5]; \ + X06 += ks[r+(R)+ 6]; \ + X07 += ks[r+(R)+ 7]; \ + X08 += ks[r+(R)+ 8]; \ + X09 += ks[r+(R)+ 9]; \ + X10 += ks[r+(R)+10]; \ + X11 += ks[r+(R)+11]; \ + X12 += ks[r+(R)+12]; \ + X13 += ks[r+(R)+13] + ts[r+(R)+0]; \ + X14 += ks[r+(R)+14] + ts[r+(R)+1]; \ + X15 += ks[r+(R)+15] + r+(R) ; \ + ks[r + (R)+16] = ks[r+(R)-1]; /* rotate key schedule */ \ + ts[r + (R)+ 2] = ts[r+(R)-1]; \ + Skein_Show_R_Ptr(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,Xptr); + + for (r=1;r <= 2*RCNT;r+=2*SKEIN_UNROLL_1024) /* loop thru it */ +#endif + { +#define R1024_8_rounds(R) /* do 8 full rounds */ \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_0,8*(R) + 1); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_1,8*(R) + 2); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_2,8*(R) + 3); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_3,8*(R) + 4); \ + I1024(2*(R)); \ + R1024(00,01,02,03,04,05,06,07,08,09,10,11,12,13,14,15,R1024_4,8*(R) + 5); \ + R1024(00,09,02,13,06,11,04,15,10,07,12,03,14,05,08,01,R1024_5,8*(R) + 6); \ + R1024(00,07,02,05,04,03,06,01,12,15,14,13,08,11,10,09,R1024_6,8*(R) + 7); \ + R1024(00,15,02,11,06,13,04,09,14,01,08,05,10,03,12,07,R1024_7,8*(R) + 8); \ + I1024(2*(R)+1); + + R1024_8_rounds( 0); + +#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN))) + + #if R1024_Unroll_R( 1) + R1024_8_rounds( 1); + #endif + #if R1024_Unroll_R( 2) + R1024_8_rounds( 2); + #endif + #if R1024_Unroll_R( 3) + R1024_8_rounds( 3); + #endif + #if R1024_Unroll_R( 4) + R1024_8_rounds( 4); + #endif + #if R1024_Unroll_R( 5) + R1024_8_rounds( 5); + #endif + #if R1024_Unroll_R( 6) + R1024_8_rounds( 6); + #endif + #if R1024_Unroll_R( 7) + R1024_8_rounds( 7); + #endif + #if R1024_Unroll_R( 8) + R1024_8_rounds( 8); + #endif + #if R1024_Unroll_R( 9) + R1024_8_rounds( 9); + #endif + #if R1024_Unroll_R(10) + R1024_8_rounds(10); + #endif + #if R1024_Unroll_R(11) + R1024_8_rounds(11); + #endif + #if R1024_Unroll_R(12) + R1024_8_rounds(12); + #endif + #if R1024_Unroll_R(13) + R1024_8_rounds(13); + #endif + #if R1024_Unroll_R(14) + R1024_8_rounds(14); + #endif + #if (SKEIN_UNROLL_1024 > 14) +#error "need more unrolling in Skein_1024_Process_Block" + #endif + } + /* do the final "feedforward" xor, update context chaining vars */ + + ctx->X[ 0] = X00 ^ w[ 0]; + ctx->X[ 1] = X01 ^ w[ 1]; + ctx->X[ 2] = X02 ^ w[ 2]; + ctx->X[ 3] = X03 ^ w[ 3]; + ctx->X[ 4] = X04 ^ w[ 4]; + ctx->X[ 5] = X05 ^ w[ 5]; + ctx->X[ 6] = X06 ^ w[ 6]; + ctx->X[ 7] = X07 ^ w[ 7]; + ctx->X[ 8] = X08 ^ w[ 8]; + ctx->X[ 9] = X09 ^ w[ 9]; + ctx->X[10] = X10 ^ w[10]; + ctx->X[11] = X11 ^ w[11]; + ctx->X[12] = X12 ^ w[12]; + ctx->X[13] = X13 ^ w[13]; + ctx->X[14] = X14 ^ w[14]; + ctx->X[15] = X15 ^ w[15]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + ts[1] &= ~SKEIN_T1_FLAG_FIRST; + blkPtr += SKEIN1024_BLOCK_BYTES; + } + while (--blkCnt); + ctx->h.T[0] = ts[0]; + ctx->h.T[1] = ts[1]; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein1024_Process_Block_CodeSize) - + ((u08b_t *) Skein1024_Process_Block); + } +uint_t Skein1024_Unroll_Cnt(void) + { + return SKEIN_UNROLL_1024; + } +#endif +#endif diff --git a/Optimized_64bit/skein_debug.c b/Optimized_64bit/skein_debug.c new file mode 100644 index 000000000000..fac5038598ea --- /dev/null +++ b/Optimized_64bit/skein_debug.c @@ -0,0 +1,247 @@ +/*********************************************************************** +** +** Debug output functions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ +#include + +#ifdef SKEIN_DEBUG /* only instantiate this code if SKEIN_DEBUG is on */ +#include "skein.h" + +static const char INDENT[] = " "; /* how much to indent on new line */ + +uint_t skein_DebugFlag = 0; /* off by default. Must be set externally */ + +static void Show64_step(size_t cnt,const u64b_t *X,size_t step) + { + size_t i,j; + for (i=j=0;i < cnt;i++,j+=step) + { + if (i % 4 == 0) printf(INDENT); + printf(" %08X.%08X ",(uint_32t)(X[j] >> 32),(uint_32t)X[j]); + if (i % 4 == 3 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +#define Show64(cnt,X) Show64_step(cnt,X,1) + +static void Show64_flag(size_t cnt,const u64b_t *X) + { + size_t xptr = (size_t) X; + size_t step = (xptr & 1) ? 2 : 1; + if (step != 1) + { + X = (const u64b_t *) (xptr & ~1); + } + Show64_step(cnt,X,step); + } + +static void Show08(size_t cnt,const u08b_t *b) + { + size_t i; + for (i=0;i < cnt;i++) + { + if (i %16 == 0) printf(INDENT); + else if (i % 4 == 0) printf(" "); + printf(" %02X",b[i]); + if (i %16 == 15 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +static const char *AlgoHeader(uint_t bits) + { + if (skein_DebugFlag & SKEIN_DEBUG_THREEFISH) + switch (bits) + { + case 256: return ":Threefish-256: "; + case 512: return ":Threefish-512: "; + case 1024: return ":Threefish-1024:"; + } + else + switch (bits) + { + case 256: return ":Skein-256: "; + case 512: return ":Skein-512: "; + case 1024: return ":Skein-1024:"; + } + return NULL; + } + +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr) + { + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_FINAL) + { + printf("\n%s Final output=\n",AlgoHeader(bits)); + Show08(cnt,outPtr); + printf(" ++++++++++\n"); + fflush(stdout); + } + } + +/* show state after a round (or "pseudo-round") */ +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X) + { + static uint_t injectNum=0; /* not multi-thread safe! */ + + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (r >= SKEIN_RND_SPECIAL) + { /* a key injection (or feedforward) point */ + injectNum = (r == SKEIN_RND_KEY_INITIAL) ? 0 : injectNum+1; + if ( skein_DebugFlag & SKEIN_DEBUG_INJECT || + ((skein_DebugFlag & SKEIN_DEBUG_FINAL) && r == SKEIN_RND_FEED_FWD)) + { + printf("\n%s",AlgoHeader(bits)); + switch (r) + { + case SKEIN_RND_KEY_INITIAL: + printf(" [state after initial key injection]"); + break; + case SKEIN_RND_KEY_INJECT: + printf(" [state after key injection #%02d]",injectNum); + break; + case SKEIN_RND_FEED_FWD: + printf(" [state after plaintext feedforward]"); + injectNum = 0; + break; + } + printf("=\n"); + Show64(bits/64,X); + if (r== SKEIN_RND_FEED_FWD) + printf(" ----------\n"); + } + } + else if (skein_DebugFlag & SKEIN_DEBUG_ROUNDS) + { + uint_t j; + u64b_t p[SKEIN_MAX_STATE_WORDS]; + const u08b_t *perm; + const static u08b_t PERM_256 [4][ 4] = { { 0,1,2,3 }, { 0,3,2,1 }, { 0,1,2,3 }, { 0,3,2,1 } }; + const static u08b_t PERM_512 [4][ 8] = { { 0,1,2,3,4,5,6,7 }, + { 2,1,4,7,6,5,0,3 }, + { 4,1,6,3,0,5,2,7 }, + { 6,1,0,7,2,5,4,3 } + }; + const static u08b_t PERM_1024[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + { 0, 9, 2,13, 6,11, 4,15,10, 7,12, 3,14, 5, 8, 1 }, + { 0, 7, 2, 5, 4, 3, 6, 1,12,15,14,13, 8,11,10, 9 }, + { 0,15, 2,11, 6,13, 4, 9,14, 1, 8, 5,10, 3,12, 7 } + }; + + if ((skein_DebugFlag & SKEIN_DEBUG_PERMUTE) && (r & 3)) + { + printf("\n%s [state after round %2d (permuted)]=\n",AlgoHeader(bits),(int)r); + switch (bits) + { + case 256: perm = PERM_256 [r&3]; break; + case 512: perm = PERM_512 [r&3]; break; + default: perm = PERM_1024[r&3]; break; + } + for (j=0;jT[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (skein_DebugFlag & SKEIN_DEBUG_HDR) + { + printf("\n%s Block: outBits=%4d. T0=%06X.",AlgoHeader(bits),(uint_t) h->hashBitLen,(uint_t)h->T[0]); + printf(" Type="); + n = (uint_t) ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) >> SKEIN_T1_POS_BLK_TYPE); + switch (n) + { + case SKEIN_BLK_TYPE_KEY: printf("KEY. "); break; + case SKEIN_BLK_TYPE_CFG: printf("CFG. "); break; + case SKEIN_BLK_TYPE_PERS: printf("PERS."); break; + case SKEIN_BLK_TYPE_PK : printf("PK. "); break; + case SKEIN_BLK_TYPE_KDF: printf("KDF. "); break; + case SKEIN_BLK_TYPE_MSG: printf("MSG. "); break; + case SKEIN_BLK_TYPE_OUT: printf("OUT. "); break; + default: printf("0x%02X.",n); break; + } + printf(" Flags="); + printf((h->T[1] & SKEIN_T1_FLAG_FIRST) ? " First":" "); + printf((h->T[1] & SKEIN_T1_FLAG_FINAL) ? " Final":" "); + printf((h->T[1] & SKEIN_T1_FLAG_BIT_PAD) ? " Pad" :" "); + n = (uint_t) ((h->T[1] & SKEIN_T1_TREE_LVL_MASK) >> SKEIN_T1_POS_TREE_LVL); + if (n) + printf(" TreeLevel = %02X",n); + printf("\n"); + fflush(stdout); + } + if (skein_DebugFlag & SKEIN_DEBUG_TWEAK) + { + printf(" Tweak:\n"); + Show64(2,h->T); + } + if (skein_DebugFlag & SKEIN_DEBUG_STATE) + { + printf(" %s words:\n",(skein_DebugFlag & SKEIN_DEBUG_THREEFISH)?"Key":"State"); + Show64(bits/64,X); + } + if (skein_DebugFlag & SKEIN_DEBUG_KEYSCHED) + { + printf(" Tweak schedule:\n"); + Show64_flag(3,tsPtr); + printf(" Key schedule:\n"); + Show64_flag((bits/64)+1,ksPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_64) + { + printf(" Input block (words):\n"); + Show64(bits/64,wPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_08) + { + printf(" Input block (bytes):\n"); + Show08(bits/8,blkPtr); + } + } + } + +void Skein_Show_Key(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes) + { + if (keyBytes) + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_KEY) + { + printf("\n%s MAC key = %4u bytes\n",AlgoHeader(bits),(unsigned) keyBytes); + Show08(keyBytes,key); + } + } +#endif diff --git a/Optimized_64bit/skein_debug.h b/Optimized_64bit/skein_debug.h new file mode 100644 index 000000000000..7775c0165c0a --- /dev/null +++ b/Optimized_64bit/skein_debug.h @@ -0,0 +1,48 @@ +#ifndef _SKEIN_DEBUG_H_ +#define _SKEIN_DEBUG_H_ +/*********************************************************************** +** +** Interface definitions for Skein hashing debug output. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#ifdef SKEIN_DEBUG +/* callout functions used inside Skein code */ +void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr, + const u64b_t *wPtr,const u64b_t *ksPtr,const u64b_t *tsPtr); +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X); +void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[]); +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr); +void Skein_Show_Key (uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes); + +extern uint_t skein_DebugFlag; /* flags to control debug output (0 --> none) */ + +#define SKEIN_RND_SPECIAL (1000u) +#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) +#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) +#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) + +/* flag bits: skein_DebugFlag */ +#define SKEIN_DEBUG_KEY (1u << 1) /* show MAC key */ +#define SKEIN_DEBUG_CONFIG (1u << 2) /* show config block processing */ +#define SKEIN_DEBUG_STATE (1u << 3) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_TWEAK (1u << 4) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_KEYSCHED (1u << 5) /* show expanded key schedule */ +#define SKEIN_DEBUG_INPUT_64 (1u << 6) /* show input block as 64-bit words */ +#define SKEIN_DEBUG_INPUT_08 (1u << 7) /* show input block as 8-bit bytes */ +#define SKEIN_DEBUG_INJECT (1u << 8) /* show state after key injection & feedforward points */ +#define SKEIN_DEBUG_ROUNDS (1u << 9) /* show state after all rounds */ +#define SKEIN_DEBUG_FINAL (1u <<10) /* show final output of Skein */ +#define SKEIN_DEBUG_HDR (1u <<11) /* show block header */ +#define SKEIN_DEBUG_THREEFISH (1u <<12) /* use Threefish name instead of Skein */ +#define SKEIN_DEBUG_PERMUTE (1u <<13) /* use word permutations */ +#define SKEIN_DEBUG_ALL ((~0u) & ~(SKEIN_DEBUG_THREEFISH | SKEIN_DEBUG_PERMUTE)) +#define THREEFISH_DEBUG_ALL (SKEIN_DEBUG_ALL | SKEIN_DEBUG_THREEFISH) + +#endif /* SKEIN_DEBUG */ + +#endif /* _SKEIN_DEBUG_H_ */ diff --git a/Optimized_64bit/skein_iv.h b/Optimized_64bit/skein_iv.h new file mode 100644 index 000000000000..a8f54a41d345 --- /dev/null +++ b/Optimized_64bit/skein_iv.h @@ -0,0 +1,199 @@ +#ifndef _SKEIN_IV_H_ +#define _SKEIN_IV_H_ + +#include "skein.h" /* get Skein macros and types */ + +/* +***************** Pre-computed Skein IVs ******************* +** +** NOTE: these values are not "magic" constants, but +** are generated using the Threefish block function. +** They are pre-computed here only for speed; i.e., to +** avoid the need for a Threefish call during Init(). +** +** The IV for any fixed hash length may be pre-computed. +** Only the most common values are included here. +** +************************************************************ +**/ + +#define MK_64 SKEIN_MK_64 + +/* blkSize = 256 bits. hashSize = 128 bits */ +const u64b_t SKEIN_256_IV_128[] = + { + MK_64(0xE1111906,0x964D7260), + MK_64(0x883DAAA7,0x7C8D811C), + MK_64(0x10080DF4,0x91960F7A), + MK_64(0xCCF7DDE5,0xB45BC1C2) + }; + +/* blkSize = 256 bits. hashSize = 160 bits */ +const u64b_t SKEIN_256_IV_160[] = + { + MK_64(0x14202314,0x72825E98), + MK_64(0x2AC4E9A2,0x5A77E590), + MK_64(0xD47A5856,0x8838D63E), + MK_64(0x2DD2E496,0x8586AB7D) + }; + +/* blkSize = 256 bits. hashSize = 224 bits */ +const u64b_t SKEIN_256_IV_224[] = + { + MK_64(0xC6098A8C,0x9AE5EA0B), + MK_64(0x876D5686,0x08C5191C), + MK_64(0x99CB88D7,0xD7F53884), + MK_64(0x384BDDB1,0xAEDDB5DE) + }; + +/* blkSize = 256 bits. hashSize = 256 bits */ +const u64b_t SKEIN_256_IV_256[] = + { + MK_64(0xFC9DA860,0xD048B449), + MK_64(0x2FCA6647,0x9FA7D833), + MK_64(0xB33BC389,0x6656840F), + MK_64(0x6A54E920,0xFDE8DA69) + }; + +/* blkSize = 512 bits. hashSize = 128 bits */ +const u64b_t SKEIN_512_IV_128[] = + { + MK_64(0xA8BC7BF3,0x6FBF9F52), + MK_64(0x1E9872CE,0xBD1AF0AA), + MK_64(0x309B1790,0xB32190D3), + MK_64(0xBCFBB854,0x3F94805C), + MK_64(0x0DA61BCD,0x6E31B11B), + MK_64(0x1A18EBEA,0xD46A32E3), + MK_64(0xA2CC5B18,0xCE84AA82), + MK_64(0x6982AB28,0x9D46982D) + }; + +/* blkSize = 512 bits. hashSize = 160 bits */ +const u64b_t SKEIN_512_IV_160[] = + { + MK_64(0x28B81A2A,0xE013BD91), + MK_64(0xC2F11668,0xB5BDF78F), + MK_64(0x1760D8F3,0xF6A56F12), + MK_64(0x4FB74758,0x8239904F), + MK_64(0x21EDE07F,0x7EAF5056), + MK_64(0xD908922E,0x63ED70B8), + MK_64(0xB8EC76FF,0xECCB52FA), + MK_64(0x01A47BB8,0xA3F27A6E) + }; + +/* blkSize = 512 bits. hashSize = 224 bits */ +const u64b_t SKEIN_512_IV_224[] = + { + MK_64(0xCCD06162,0x48677224), + MK_64(0xCBA65CF3,0xA92339EF), + MK_64(0x8CCD69D6,0x52FF4B64), + MK_64(0x398AED7B,0x3AB890B4), + MK_64(0x0F59D1B1,0x457D2BD0), + MK_64(0x6776FE65,0x75D4EB3D), + MK_64(0x99FBC70E,0x997413E9), + MK_64(0x9E2CFCCF,0xE1C41EF7) + }; + +/* blkSize = 512 bits. hashSize = 256 bits */ +const u64b_t SKEIN_512_IV_256[] = + { + MK_64(0xCCD044A1,0x2FDB3E13), + MK_64(0xE8359030,0x1A79A9EB), + MK_64(0x55AEA061,0x4F816E6F), + MK_64(0x2A2767A4,0xAE9B94DB), + MK_64(0xEC06025E,0x74DD7683), + MK_64(0xE7A436CD,0xC4746251), + MK_64(0xC36FBAF9,0x393AD185), + MK_64(0x3EEDBA18,0x33EDFC13) + }; + +/* blkSize = 512 bits. hashSize = 384 bits */ +const u64b_t SKEIN_512_IV_384[] = + { + MK_64(0xA3F6C6BF,0x3A75EF5F), + MK_64(0xB0FEF9CC,0xFD84FAA4), + MK_64(0x9D77DD66,0x3D770CFE), + MK_64(0xD798CBF3,0xB468FDDA), + MK_64(0x1BC4A666,0x8A0E4465), + MK_64(0x7ED7D434,0xE5807407), + MK_64(0x548FC1AC,0xD4EC44D6), + MK_64(0x266E1754,0x6AA18FF8) + }; + +/* blkSize = 512 bits. hashSize = 512 bits */ +const u64b_t SKEIN_512_IV_512[] = + { + MK_64(0x4903ADFF,0x749C51CE), + MK_64(0x0D95DE39,0x9746DF03), + MK_64(0x8FD19341,0x27C79BCE), + MK_64(0x9A255629,0xFF352CB1), + MK_64(0x5DB62599,0xDF6CA7B0), + MK_64(0xEABE394C,0xA9D5C3F4), + MK_64(0x991112C7,0x1A75B523), + MK_64(0xAE18A40B,0x660FCC33) + }; + +/* blkSize = 1024 bits. hashSize = 384 bits */ +const u64b_t SKEIN1024_IV_384[] = + { + MK_64(0x5102B6B8,0xC1894A35), + MK_64(0xFEEBC9E3,0xFE8AF11A), + MK_64(0x0C807F06,0xE32BED71), + MK_64(0x60C13A52,0xB41A91F6), + MK_64(0x9716D35D,0xD4917C38), + MK_64(0xE780DF12,0x6FD31D3A), + MK_64(0x797846B6,0xC898303A), + MK_64(0xB172C2A8,0xB3572A3B), + MK_64(0xC9BC8203,0xA6104A6C), + MK_64(0x65909338,0xD75624F4), + MK_64(0x94BCC568,0x4B3F81A0), + MK_64(0x3EBBF51E,0x10ECFD46), + MK_64(0x2DF50F0B,0xEEB08542), + MK_64(0x3B5A6530,0x0DBC6516), + MK_64(0x484B9CD2,0x167BBCE1), + MK_64(0x2D136947,0xD4CBAFEA) + }; + +/* blkSize = 1024 bits. hashSize = 512 bits */ +const u64b_t SKEIN1024_IV_512[] = + { + MK_64(0xCAEC0E5D,0x7C1B1B18), + MK_64(0xA01B0E04,0x5F03E802), + MK_64(0x33840451,0xED912885), + MK_64(0x374AFB04,0xEAEC2E1C), + MK_64(0xDF25A0E2,0x813581F7), + MK_64(0xE4004093,0x8B12F9D2), + MK_64(0xA662D539,0xC2ED39B6), + MK_64(0xFA8B85CF,0x45D8C75A), + MK_64(0x8316ED8E,0x29EDE796), + MK_64(0x053289C0,0x2E9F91B8), + MK_64(0xC3F8EF1D,0x6D518B73), + MK_64(0xBDCEC3C4,0xD5EF332E), + MK_64(0x549A7E52,0x22974487), + MK_64(0x67070872,0x5B749816), + MK_64(0xB9CD28FB,0xF0581BD1), + MK_64(0x0E2940B8,0x15804974) + }; + +/* blkSize = 1024 bits. hashSize = 1024 bits */ +const u64b_t SKEIN1024_IV_1024[] = + { + MK_64(0xD593DA07,0x41E72355), + MK_64(0x15B5E511,0xAC73E00C), + MK_64(0x5180E5AE,0xBAF2C4F0), + MK_64(0x03BD41D3,0xFCBCAFAF), + MK_64(0x1CAEC6FD,0x1983A898), + MK_64(0x6E510B8B,0xCDD0589F), + MK_64(0x77E2BDFD,0xC6394ADA), + MK_64(0xC11E1DB5,0x24DCB0A3), + MK_64(0xD6D14AF9,0xC6329AB5), + MK_64(0x6A9B0BFC,0x6EB67E0D), + MK_64(0x9243C60D,0xCCFF1332), + MK_64(0x1A1F1DDE,0x743F02D4), + MK_64(0x0996753C,0x10ED0BB8), + MK_64(0x6572DD22,0xF2B4969A), + MK_64(0x61FD3062,0xD00A579A), + MK_64(0x1DE0536E,0x8682E539) + }; + +#endif /* _SKEIN_IV_H_ */ diff --git a/Optimized_64bit/skein_port.h b/Optimized_64bit/skein_port.h new file mode 100644 index 000000000000..653302de7467 --- /dev/null +++ b/Optimized_64bit/skein_port.h @@ -0,0 +1,124 @@ +#ifndef _SKEIN_PORT_H_ +#define _SKEIN_PORT_H_ +/******************************************************************* +** +** Platform-specific definitions for Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Many thanks to Brian Gladman for his portable header files. +** +** To port Skein to an "unsupported" platform, change the definitions +** in this file appropriately. +** +********************************************************************/ + +#include "brg_types.h" /* get integer type definitions */ + +typedef unsigned int uint_t; /* native unsigned integer */ +typedef uint_8t u08b_t; /* 8-bit unsigned integer */ +typedef uint_64t u64b_t; /* 64-bit unsigned integer */ + +#ifndef RotL_64 +#define RotL_64(x,N) (((x) << (N)) | ((x) >> (64-(N)))) +#endif + +/* + * Skein is "natively" little-endian (unlike SHA-xxx), for optimal + * performance on x86 CPUs. The Skein code requires the following + * definitions for dealing with endianness: + * + * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian + * Skein_Put64_LSB_First + * Skein_Get64_LSB_First + * Skein_Swap64 + * + * If SKEIN_NEED_SWAP is defined at compile time, it is used here + * along with the portable versions of Put64/Get64/Swap64, which + * are slow in general. + * + * Otherwise, an "auto-detect" of endianness is attempted below. + * If the default handling doesn't work well, the user may insert + * platform-specific code instead (e.g., for big-endian CPUs). + * + */ +#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */ + +#include "brg_endian.h" /* get endianness selection */ +#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN + /* here for big-endian CPUs */ +#define SKEIN_NEED_SWAP (1) +#elif PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN + /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ +#define SKEIN_NEED_SWAP (0) +#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */ +#define Skein_Put64_LSB_First(dst08,src64,bCnt) memcpy(dst08,src64,bCnt) +#define Skein_Get64_LSB_First(dst64,src08,wCnt) memcpy(dst64,src08,8*(wCnt)) +#endif +#else +#error "Skein needs endianness setting!" +#endif + +#endif /* ifndef SKEIN_NEED_SWAP */ + +/* + ****************************************************************** + * Provide any definitions still needed. + ****************************************************************** + */ +#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */ +#if SKEIN_NEED_SWAP +#define Skein_Swap64(w64) \ + ( (( ((u64b_t)(w64)) & 0xFF) << 56) | \ + (((((u64b_t)(w64)) >> 8) & 0xFF) << 48) | \ + (((((u64b_t)(w64)) >>16) & 0xFF) << 40) | \ + (((((u64b_t)(w64)) >>24) & 0xFF) << 32) | \ + (((((u64b_t)(w64)) >>32) & 0xFF) << 24) | \ + (((((u64b_t)(w64)) >>40) & 0xFF) << 16) | \ + (((((u64b_t)(w64)) >>48) & 0xFF) << 8) | \ + (((((u64b_t)(w64)) >>56) & 0xFF) ) ) +#else +#define Skein_Swap64(w64) (w64) +#endif +#endif /* ifndef Skein_Swap64 */ + + +#ifndef Skein_Put64_LSB_First +void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n>3] >> (8*(n&7))); + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Put64_LSB_First */ + + +#ifndef Skein_Get64_LSB_First +void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) +#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */ + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n<8*wCnt;n+=8) + dst[n/8] = (((u64b_t) src[n ]) ) + + (((u64b_t) src[n+1]) << 8) + + (((u64b_t) src[n+2]) << 16) + + (((u64b_t) src[n+3]) << 24) + + (((u64b_t) src[n+4]) << 32) + + (((u64b_t) src[n+5]) << 40) + + (((u64b_t) src[n+6]) << 48) + + (((u64b_t) src[n+7]) << 56) ; + } +#else + ; /* output only the function prototype */ +#endif +#endif /* ifndef Skein_Get64_LSB_First */ + +#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/README/readme.txt b/README/readme.txt new file mode 100644 index 000000000000..c827482e8098 --- /dev/null +++ b/README/readme.txt @@ -0,0 +1,166 @@ +Below is a list of Skein files included on the NIST submission CD, along +with a very brief description of each file. In both the reference and +optimized directories, all C files should be compiled to generate a +SHA3 NIST API "library" for Skein. + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The following files are identical and common between the reference and optimized +versions of the code: + +File Name Description +-------------------------------------------------------------------------------- +brg_endian.h Brian Gladman's header file to auto-detect CPU endianness + (with a few extensions for handling various platforms/compilers) + + +brg_types.h Brian Gladman's header file to auto-detect integer types + (with a few extensions for handling various platforms/compilers) + + +SHA3api_ref.h API definitions for SHA3 API, implemented in SHA3api_ref.c + + +SHA3api_ref.c "Wrapper" code that implements the NIST SHA3 API on top of the + Skein API. + + +skein_debug.h Header for with routines used internally by Skein routines for + generating debug i/o (e.g., round-by-round intermediate values) + If SKEIN_DEBUG is not defined at compile time, these interface + declarations instead become "dummy" macros so that there is + no performance impact. + + +skein_debug.c Debug i/o routines called by Skein functions. + + +skein.h Function prototypes, data structures, and constant definitions + for Skein. The Skein API is more general than the NIST API + (e.g., MAC functions). + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The following files are different for the reference and optimized versions +of the code. Note that the source files in Optimized_32bit and Optimized_64bit +directories are identical. + +File Name Description +-------------------------------------------------------------------------------- +skein_port.h Definitions that might need to be changed to port Skein to + a different CPU platform (e.g., big-endian). The Skein code + should run on most CPU platforms, but the macros/functions here + may be helpful in making the code run more efficiently + +skein.c The main Skein interface functions: Init, Update, and Final, for + all three Skein block sizes. Additionally, the InitExt() function + allows for MAC and other extended functionality. + +skein_block.c The Skein block processing function, based on the Threefish block + cipher. This module contains the most performance-sensitive code + and can be replaced by the assembly modules for slight speedups + on some platforms. The functions here are only for internal use + inside "skein.c" and are not intended for external APIs. + +skein_iv.h Initial values for various Skein hash functions. Note that these + values are NOT "magic constants", as they are computed using + the initial Skein "configuration" block. These values are used + only by the optimized code, in order to speed up the hash + computations. + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The following files are included in the Additional_Implementations directory: + +File Name Description +-------------------------------------------------------------------------------- +skein_test.c The Skein test module, used to measure performance and generate + KAT vectors for testing. This module should be compiled together + with the Skein source files (i.e., from the Reference or the + Optimized directories) to generate an executable, skein_test.exe. + This program is used internally to test/validate/compare different + implementations (e.g., Reference, Optimized, Assembly). + +skein_block_x64.asm This is the 64-bit assembly language version of skein_block.c. + It may be used to replace that file in the Optimized_64bit + directory to improve performance on 64-bit Intel/AMD systems. + It should be assembled with ml64.exe. + +skein_block_x86.asm This is the 32-bit assembly language version of skein_block.c. + It may be used to replace that file in the Optimized_32bit + directory to improve performance on 32-bit Intel/AMD systems. + It should be assembled with ml.exe. + +skein_rot_search.c This is the program that searches for the Threefish rotation + constants. It has many different command-line switches, but by + default it generates the constants used in the Skein paper. + This file is a stand-alone C file. To run it, simply re-direct + the output to a test file: "skein_rot_search > srs_log.txt". + Note that it takes nearly 3 DAYS on a Core 2 Duo to complete + program execution in this case. Alternately, to generate individual + files, run the following command lines: + skein_rot_search -b256 > srs_256.txt + skein_rot_search -b512 > srs_512.txt + skein_rot_search -b1024 > srs_1024.txt + +srs_256.txt These three files contain the results of running skein_rot_search.exe +srs_512.txt for the three different Skein block sizes. They are rather large. +srs_1024.txt At the end of each file, the "finalists" are re-graded with different + number of random samples. + +Atmel_AVR.c This file was used to compile on the Atmel AVR 8-bit CPU. + It includes the optimized versions of skein.c and skein_block.c + with compile-time settings to only implement one at time. + This was compiled with the free AVR tool set from Atmel + and simulated to give the 8-bit C performance numbers. + +skein_8bit_estimates.xls + This file is a spreadsheet used to generate the estimates for + code size and speed of assembly versions of Skein on the Atmel + 8-bit CPU family. Note that this is MUCH faster than the C + versions, since it uses static variables, with optimized loading + and rotations. No attempt is made here to minimize code size by + sharing code using calls, although the code size could be shrunk + significantly using calls, at some cost in performance. + +skein_perf_core2.txt + This file contains code size and performance data running on + an Intel Core 2 Duo CPU under Windows Vista 64-bit, using the + Microsoft and other compilers and assemblers. It includes + results for both 32-bit and 64-bit code. + +skein_MSC_v9_perf.txt + This file contains a subset of the skein_perf_core2.txt file, + including only results from the MSVC 2008 compiler, with message + sizes that are powers of 10. + +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +The following files are included in the KAT_MCT directory, in addition to the +KAT/MCT files required by NIST: + +genKAT.c NIST-supplied source file for generating KAT_MCT vectors. + This module should be compiled together with the Skein source + files (i.e., from the Reference or the Optimized directories) + to generate an executable genKAT.exe, which can generate the + KAT_MCT vectors. + [FWIW, compiling this source file under gcc gives several nasty compiler warnings!] + +skein_golden_kat.txt + The "golden" KAT file generated using "skein_test.exe -k". This + file tries to cover various block sizes, message sizes, and output + sizes, as well as MAC modes. It is used for testing compliance of + a Skein implementation, using skein_test.c + +skein_golden_kat_internals.txt + The KAT file generated using "skein_test.exe -k -dc". It covers + the same test as "skein_golden_kat.txt" , but also prints out + intermediate (round-by-round) values. The file is very large, but + it is quite useful in debugging when porting Skein to a new + CPU platform and/or programming language. + +skein_golden_kat_short.txt + This is a shorter version (subset) of skein_golden_kat.txt + +skein_golden_kat_short_internals.txt + This is a shorter version (subset) of skein_golden_kat_internals.txt diff --git a/Reference_Implementation/SHA3api_ref.c b/Reference_Implementation/SHA3api_ref.c new file mode 100644 index 000000000000..6861a3e4bffb --- /dev/null +++ b/Reference_Implementation/SHA3api_ref.c @@ -0,0 +1,115 @@ +/*********************************************************************** +** +** Implementation of the AHS API using the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ +#include "SHA3api_ref.h"/* get the AHS API definitions */ + +/******************************************************************/ +/* AHS API code */ +/******************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* select the context size and init the context */ +HashReturn Init(hashState *state, int hashbitlen) + { +#if SKEIN_256_NIST_MAX_HASH_BITS + if (hashbitlen <= SKEIN_256_NIST_MAX_HASHBITS) + { + Skein_Assert(hashbitlen > 0,BAD_HASHLEN); + state->statebits = 64*SKEIN_256_STATE_WORDS; + return Skein_256_Init(&state->u.ctx_256,(size_t) hashbitlen); + } +#endif + if (hashbitlen <= SKEIN_512_NIST_MAX_HASHBITS) + { + state->statebits = 64*SKEIN_512_STATE_WORDS; + return Skein_512_Init(&state->u.ctx_512,(size_t) hashbitlen); + } + else + { + state->statebits = 64*SKEIN1024_STATE_WORDS; + return Skein1024_Init(&state->u.ctx1024,(size_t) hashbitlen); + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process data to be hashed */ +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen) + { + /* only the final Update() call is allowed do partial bytes, else assert an error */ + Skein_Assert((state->u.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || databitlen == 0, FAIL); + + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + if ((databitlen & 7) == 0) /* partial bytes? */ + { + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Update(&state->u.ctx_512,data,databitlen >> 3); + case 1: return Skein_256_Update(&state->u.ctx_256,data,databitlen >> 3); + case 0: return Skein1024_Update(&state->u.ctx1024,data,databitlen >> 3); + default: return FAIL; + } + } + else + { /* handle partial final byte */ + size_t bCnt = (databitlen >> 3) + 1; /* number of bytes to handle (nonzero here!) */ + u08b_t b,mask; + + mask = (u08b_t) (1u << (7 - (databitlen & 7))); /* partial byte bit mask */ + b = (u08b_t) ((data[bCnt-1] & (0-mask)) | mask); /* apply bit padding on final byte */ + + switch ((state->statebits >> 8) & 3) + { + case 2: Skein_512_Update(&state->u.ctx_512,data,bCnt-1); /* process all but the final byte */ + Skein_512_Update(&state->u.ctx_512,&b , 1 ); /* process the (masked) partial byte */ + break; + case 1: Skein_256_Update(&state->u.ctx_256,data,bCnt-1); /* process all but the final byte */ + Skein_256_Update(&state->u.ctx_256,&b , 1 ); /* process the (masked) partial byte */ + break; + case 0: Skein1024_Update(&state->u.ctx1024,data,bCnt-1); /* process all but the final byte */ + Skein1024_Update(&state->u.ctx1024,&b , 1 ); /* process the (masked) partial byte */ + break; + default: return FAIL; + } + Skein_Set_Bit_Pad_Flag(state->u.h); /* set tweak flag for the final call */ + + return SUCCESS; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize hash computation and output the result (hashbitlen bits) */ +HashReturn Final(hashState *state, BitSequence *hashval) + { + Skein_Assert(state->statebits % 256 == 0 && (state->statebits-256) < 1024,FAIL); + switch ((state->statebits >> 8) & 3) + { + case 2: return Skein_512_Final(&state->u.ctx_512,hashval); + case 1: return Skein_256_Final(&state->u.ctx_256,hashval); + case 0: return Skein1024_Final(&state->u.ctx1024,hashval); + default: return FAIL; + } + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* all-in-one hash function */ +HashReturn Hash(int hashbitlen, const BitSequence *data, /* all-in-one call */ + DataLength databitlen,BitSequence *hashval) + { + hashState state; + HashReturn r = Init(&state,hashbitlen); + if (r == SUCCESS) + { /* these calls do not fail when called properly */ + r = Update(&state,data,databitlen); + Final(&state,hashval); + } + return r; + } diff --git a/Reference_Implementation/SHA3api_ref.h b/Reference_Implementation/SHA3api_ref.h new file mode 100644 index 000000000000..6d62304e59b7 --- /dev/null +++ b/Reference_Implementation/SHA3api_ref.h @@ -0,0 +1,66 @@ +#ifndef _AHS_API_H_ +#define _AHS_API_H_ + +/*********************************************************************** +** +** Interface declarations of the AHS API using the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include "skein.h" + +typedef enum + { + SUCCESS = SKEIN_SUCCESS, + FAIL = SKEIN_FAIL, + BAD_HASHLEN = SKEIN_BAD_HASHLEN + } + HashReturn; + +typedef size_t DataLength; /* bit count type */ +typedef u08b_t BitSequence; /* bit stream type */ + +typedef struct + { + uint_t statebits; /* 256, 512, or 1024 */ + union + { + Skein_Ctxt_Hdr_t h; /* common header "overlay" */ + Skein_256_Ctxt_t ctx_256; + Skein_512_Ctxt_t ctx_512; + Skein1024_Ctxt_t ctx1024; + } u; + } + hashState; + +/* "incremental" hashing API */ +HashReturn Init (hashState *state, int hashbitlen); +HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen); +HashReturn Final (hashState *state, BitSequence *hashval); + +/* "all-in-one" call */ +HashReturn Hash (int hashbitlen, const BitSequence *data, + DataLength databitlen, BitSequence *hashval); + + +/* +** Re-define the compile-time constants below to change the selection +** of the Skein state size in the Init() function in SHA3api_ref.c. +** +** That is, the NIST API does not allow for explicit selection of the +** Skein block size, so it must be done implicitly in the Init() function. +** The selection is controlled by these constants. +*/ +#ifndef SKEIN_256_NIST_MAX_HASHBITS +#define SKEIN_256_NIST_MAX_HASHBITS (0) +#endif + +#ifndef SKEIN_512_NIST_MAX_HASHBITS +#define SKEIN_512_NIST_MAX_HASHBITS (512) +#endif + +#endif /* ifdef _AHS_API_H_ */ diff --git a/Reference_Implementation/brg_endian.h b/Reference_Implementation/brg_endian.h new file mode 100644 index 000000000000..978eb33f08cf --- /dev/null +++ b/Reference_Implementation/brg_endian.h @@ -0,0 +1,148 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 20/10/2006 +*/ + +#ifndef BRG_ENDIAN_H +#define BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined(AVR) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) || defined( AVR ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 126 or 128 in brg_endian.h to set the platform byte order +#endif +#endif + +/* special handler for IA64, which may be either endianness (?) */ +/* here we assume little-endian, but this may need to be changed */ +#if defined(__ia64) || defined(__ia64__) || defined(_M_IA64) +# define PLATFORM_MUST_ALIGN (1) +#ifndef PLATFORM_BYTE_ORDER +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif +#endif + +#ifndef PLATFORM_MUST_ALIGN +# define PLATFORM_MUST_ALIGN (0) +#endif + +#endif /* ifndef BRG_ENDIAN_H */ diff --git a/Reference_Implementation/brg_types.h b/Reference_Implementation/brg_types.h new file mode 100644 index 000000000000..d6d6cdab9fbf --- /dev/null +++ b/Reference_Implementation/brg_types.h @@ -0,0 +1,188 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 09/09/2006 + + The unsigned integer types defined here are of the form uint_t where + is the length of the type; for example, the unsigned 32-bit type is + 'uint_32t'. These are NOT the same as the 'C99 integer types' that are + defined in the inttypes.h and stdint.h headers since attempts to use these + types have shown that support for them is still highly variable. However, + since the latter are of the form uint_t, a regular expression search + and replace (in VC++ search on 'uint_{:z}t' and replace with 'uint\1_t') + can be used to convert the types used here to the C99 standard types. +*/ + +#ifndef BRG_TYPES_H +#define BRG_TYPES_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include + +#ifndef BRG_UI8 +# define BRG_UI8 +# if UCHAR_MAX == 255u + typedef unsigned char uint_8t; +# else +# error Please define uint_8t as an 8-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI16 +# define BRG_UI16 +# if USHRT_MAX == 65535u + typedef unsigned short uint_16t; +# else +# error Please define uint_16t as a 16-bit unsigned short type in brg_types.h +# endif +#endif + +#ifndef BRG_UI32 +# define BRG_UI32 +# if UINT_MAX == 4294967295u +# define li_32(h) 0x##h##u + typedef unsigned int uint_32t; +# elif ULONG_MAX == 4294967295u +# define li_32(h) 0x##h##ul + typedef unsigned long uint_32t; +# elif defined( _CRAY ) +# error This code needs 32-bit data types, which Cray machines do not provide +# else +# error Please define uint_32t as a 32-bit unsigned integer type in brg_types.h +# endif +#endif + +#ifndef BRG_UI64 +# if defined( __BORLANDC__ ) && !defined( __MSDOS__ ) +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( _MSC_VER ) && ( _MSC_VER < 1300 ) /* 1300 == VC++ 7.0 */ +# define BRG_UI64 +# define li_64(h) 0x##h##ui64 + typedef unsigned __int64 uint_64t; +# elif defined( __sun ) && defined(ULONG_MAX) && ULONG_MAX == 0xfffffffful +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# elif defined( UINT_MAX ) && UINT_MAX > 4294967295u +# if UINT_MAX == 18446744073709551615u +# define BRG_UI64 +# define li_64(h) 0x##h##u + typedef unsigned int uint_64t; +# endif +# elif defined( ULONG_MAX ) && ULONG_MAX > 4294967295u +# if ULONG_MAX == 18446744073709551615ul +# define BRG_UI64 +# define li_64(h) 0x##h##ul + typedef unsigned long uint_64t; +# endif +# elif defined( ULLONG_MAX ) && ULLONG_MAX > 4294967295u +# if ULLONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined( ULONG_LONG_MAX ) && ULONG_LONG_MAX > 4294967295u +# if ULONG_LONG_MAX == 18446744073709551615ull +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +# elif defined(__GNUC__) /* DLW: avoid mingw problem with -ansi */ +# define BRG_UI64 +# define li_64(h) 0x##h##ull + typedef unsigned long long uint_64t; +# endif +#endif + +#if defined( NEED_UINT_64T ) && !defined( BRG_UI64 ) +# error Please define uint_64t as an unsigned 64 bit type in brg_types.h +#endif + +#ifndef RETURN_VALUES +# define RETURN_VALUES +# if defined( DLL_EXPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllexport ) void __stdcall +# define INT_RETURN __declspec( dllexport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllexport__ ) void +# define INT_RETURN __declspec( __dllexport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( DLL_IMPORT ) +# if defined( _MSC_VER ) || defined ( __INTEL_COMPILER ) +# define VOID_RETURN __declspec( dllimport ) void __stdcall +# define INT_RETURN __declspec( dllimport ) int __stdcall +# elif defined( __GNUC__ ) +# define VOID_RETURN __declspec( __dllimport__ ) void +# define INT_RETURN __declspec( __dllimport__ ) int +# else +# error Use of the DLL is only available on the Microsoft, Intel and GCC compilers +# endif +# elif defined( __WATCOMC__ ) +# define VOID_RETURN void __cdecl +# define INT_RETURN int __cdecl +# else +# define VOID_RETURN void +# define INT_RETURN int +# endif +#endif + +/* These defines are used to declare buffers in a way that allows + faster operations on longer variables to be used. In all these + defines 'size' must be a power of 2 and >= 8 + + dec_unit_type(size,x) declares a variable 'x' of length + 'size' bits + + dec_bufr_type(size,bsize,x) declares a buffer 'x' of length 'bsize' + bytes defined as an array of variables + each of 'size' bits (bsize must be a + multiple of size / 8) + + ptr_cast(x,size) casts a pointer to a pointer to a + varaiable of length 'size' bits +*/ + +#define ui_type(size) uint_##size##t +#define dec_unit_type(size,x) typedef ui_type(size) x +#define dec_bufr_type(size,bsize,x) typedef ui_type(size) x[bsize / (size >> 3)] +#define ptr_cast(x,size) ((ui_type(size)*)(x)) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/Reference_Implementation/skein.c b/Reference_Implementation/skein.c new file mode 100644 index 000000000000..945baa7b9f78 --- /dev/null +++ b/Reference_Implementation/skein.c @@ -0,0 +1,747 @@ +/*********************************************************************** +** +** Implementation of the Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#include /* get the memcpy/memset functions */ +#include "skein.h" /* get the Skein API definitions */ + +/*****************************************************************/ +/* External function to process blkCnt (nonzero) full block(s) of data. */ +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd); + +/*****************************************************************/ +/* Portable (i.e., slow) endianness conversion functions */ +u64b_t Skein_Swap64(u64b_t w64) + { /* instantiate the function body here */ + static const u64b_t ONE = 1; /* use this to check endianness */ + + /* figure out endianness "on-the-fly" */ + if (1 == ((u08b_t *) & ONE)[0]) + return w64; /* little-endian is fast */ + else + return (( w64 & 0xFF) << 56) | /* big-endian is slow */ + (((w64 >> 8) & 0xFF) << 48) | + (((w64 >>16) & 0xFF) << 40) | + (((w64 >>24) & 0xFF) << 32) | + (((w64 >>32) & 0xFF) << 24) | + (((w64 >>40) & 0xFF) << 16) | + (((w64 >>48) & 0xFF) << 8) | + (((w64 >>56) & 0xFF) ) ; + } + +void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt) + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n>3] >> (8*(n&7))); + } + +void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt) + { /* this version is fully portable (big-endian or little-endian), but slow */ + size_t n; + + for (n=0;n<8*wCnt;n+=8) + dst[n/8] = (((u64b_t) src[n ]) ) + + (((u64b_t) src[n+1]) << 8) + + (((u64b_t) src[n+2]) << 16) + + (((u64b_t) src[n+3]) << 24) + + (((u64b_t) src[n+4]) << 32) + + (((u64b_t) src[n+5]) << 40) + + (((u64b_t) src[n+6]) << 48) + + (((u64b_t) src[n+7]) << 56) ; + } + +/*****************************************************************/ +/* 256-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + + /* build/process config block for hashing */ + ctx->h.hashBitLen = hashBitLen; /* output hash byte count */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + uint_t i; + union + { + u08b_t b[SKEIN_256_STATE_BYTES]; + u64b_t w[SKEIN_256_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_256_Update(ctx,key,keyBytes); /* hash the key */ + Skein_256_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } + + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(256,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_256_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); + Skein_256_Process_Block(ctx,ctx->b,1,SKEIN_256_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_256_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */ + Skein_256_Process_Block(ctx,msg,n,SKEIN_256_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; + msg += n * SKEIN_256_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_256_Final(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_API_CodeSize(void) + { + return ((u08b_t *) Skein_256_API_CodeSize) - + ((u08b_t *) Skein_256_Init); + } +#endif + +/*****************************************************************/ +/* 512-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + + /* build/process config block for hashing */ + ctx->h.hashBitLen = hashBitLen; /* output hash byte count */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + uint_t i; + union + { + u08b_t b[SKEIN_512_STATE_BYTES]; + u64b_t w[SKEIN_512_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein_512_Update(ctx,key,keyBytes); /* hash the key */ + Skein_512_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } + + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(512,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein_512_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); + Skein_512_Process_Block(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN_512_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ + Skein_512_Process_Block(ctx,msg,n,SKEIN_512_BLOCK_BYTES); + msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; + msg += n * SKEIN_512_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein_512_Final(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate more output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(512,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_API_CodeSize(void) + { + return ((u08b_t *) Skein_512_API_CodeSize) - + ((u08b_t *) Skein_512_Init); + } +#endif + +/*****************************************************************/ +/* 1024-bit Skein */ +/*****************************************************************/ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a straight hashing operation */ +int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) + { + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + + /* build/process config block for hashing */ + ctx->h.hashBitLen = hashBitLen; /* output hash byte count */ + Skein_Start_New_Type(ctx,CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); + + /* compute the initial chaining values from config block */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the chaining variables */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized for the given hashBitLen. */ + /* Set up to process the data message portion of the hash (default) */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* init the context for a MAC and/or tree hash operation */ +/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx,size_t hashBitLen,u64b_t treeInfo, const u08b_t *key, size_t keyBytes) + { + uint_t i; + union + { + u08b_t b[SKEIN1024_STATE_BYTES]; + u64b_t w[SKEIN1024_STATE_WORDS]; + } cfg; /* config block */ + + Skein_Assert(hashBitLen > 0,SKEIN_BAD_HASHLEN); + Skein_Assert(keyBytes == 0 || key != NULL,SKEIN_FAIL); + + /* compute the initial chaining values ctx->X[], based on key */ + if (keyBytes == 0) /* is there a key? */ + { + memset(ctx->X,0,sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ + } + else /* here to pre-process a key */ + { + Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); + /* do a mini-Init right here */ + ctx->h.hashBitLen=8*sizeof(ctx->X); /* set output hash bit count = state size */ + Skein_Start_New_Type(ctx,KEY); /* set tweaks: T0 = 0; T1 = KEY type */ + memset(ctx->X,0,sizeof(ctx->X)); /* zero the initial chaining variables */ + Skein1024_Update(ctx,key,keyBytes); /* hash the key */ + Skein1024_Final_Pad(ctx,cfg.b); /* put result into cfg.b[] */ + memcpy(ctx->X,cfg.b,sizeof(cfg.b)); /* copy over into ctx->X[] */ + for (i=0;iX[i] = Skein_Swap64(ctx->X[i]); + } + + /* build/process the config block, type == CONFIG (could be precomputed for each key) */ + ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ + Skein_Start_New_Type(ctx,CFG_FINAL); + + memset(&cfg.w,0,sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ + cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); + cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ + cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ + + Skein_Show_Key(1024,&ctx->h,key,keyBytes); + + /* compute the initial chaining values from config block */ + Skein1024_Process_Block(ctx,cfg.b,1,SKEIN_CFG_STR_LEN); + + /* The chaining vars ctx->X are now initialized */ + /* Set up to process the data message portion of the hash */ + Skein_Start_New_Type(ctx,MSG); /* T0=0, T1= MSG type, h.bCnt=0 */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* process the input bytes */ +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt) + { + size_t n; + + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* process full blocks, if any */ + if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) + { + if (ctx->h.bCnt) /* finish up any buffered message data */ + { + n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ + if (n) + { + Skein_assert(n < msgByteCnt); /* check on our logic here */ + memcpy(&ctx->b[ctx->h.bCnt],msg,n); + msgByteCnt -= n; + msg += n; + ctx->h.bCnt += n; + } + Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); + Skein1024_Process_Block(ctx,ctx->b,1,SKEIN1024_BLOCK_BYTES); + ctx->h.bCnt = 0; + } + /* now process any remaining full blocks, directly from input message data */ + if (msgByteCnt > SKEIN1024_BLOCK_BYTES) + { + n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */ + Skein1024_Process_Block(ctx,msg,n,SKEIN1024_BLOCK_BYTES); + msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; + msg += n * SKEIN1024_BLOCK_BYTES; + } + Skein_assert(ctx->h.bCnt == 0); + } + + /* copy any remaining source message data bytes into b[] */ + if (msgByteCnt) + { + Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); + memcpy(&ctx->b[ctx->h.bCnt],msg,msgByteCnt); + ctx->h.bCnt += msgByteCnt; + } + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the result */ +int Skein1024_Final(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(1024,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_API_CodeSize(void) + { + return ((u08b_t *) Skein1024_API_CodeSize) - + ((u08b_t *) Skein1024_Init); + } +#endif + +/**************** Functions to support MAC/tree hashing ***************/ +/* (this code is identical for Optimized and Reference versions) */ + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); + Skein_256_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); + Skein_512_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* finalize the hash computation and output the block, no OUTPUT stage */ +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ + if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ + memset(&ctx->b[ctx->h.bCnt],0,SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); + Skein1024_Process_Block(ctx,ctx->b,1,ctx->h.bCnt); /* process the final block */ + + Skein_Put64_LSB_First(hashVal,ctx->X,SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ + + return SKEIN_SUCCESS; + } + +#if SKEIN_TREE_HASH +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_256_Output(Skein_256_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_256_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_256_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_256_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_256_BLOCK_BYTES) + n = SKEIN_256_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_256_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein_512_Output(Skein_512_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN_512_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein_512_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN_512_BLOCK_BYTES) + n = SKEIN_512_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN_512_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } + +/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ +/* just do the OUTPUT stage */ +int Skein1024_Output(Skein1024_Ctxt_t *ctx, u08b_t *hashVal) + { + size_t i,n,byteCnt; + u64b_t X[SKEIN1024_STATE_WORDS]; + Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES,SKEIN_FAIL); /* catch uninitialized context */ + + /* now output the result */ + byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ + + /* run Threefish in "counter mode" to generate output */ + memset(ctx->b,0,sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ + memcpy(X,ctx->X,sizeof(X)); /* keep a local copy of counter mode "key" */ + for (i=0;i*SKEIN1024_BLOCK_BYTES < byteCnt;i++) + { + ((u64b_t *)ctx->b)[0]= Skein_Swap64((u64b_t) i); /* build the counter block */ + Skein_Start_New_Type(ctx,OUT_FINAL); + Skein1024_Process_Block(ctx,ctx->b,1,sizeof(u64b_t)); /* run "counter mode" */ + n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ + if (n >= SKEIN1024_BLOCK_BYTES) + n = SKEIN1024_BLOCK_BYTES; + Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES,ctx->X,n); /* "output" the ctr mode bytes */ + Skein_Show_Final(256,&ctx->h,n,hashVal+i*SKEIN1024_BLOCK_BYTES); + memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */ + } + return SKEIN_SUCCESS; + } +#endif diff --git a/Reference_Implementation/skein.h b/Reference_Implementation/skein.h new file mode 100644 index 000000000000..721c9bc9ce0d --- /dev/null +++ b/Reference_Implementation/skein.h @@ -0,0 +1,327 @@ +#ifndef _SKEIN_H_ +#define _SKEIN_H_ 1 +/************************************************************************** +** +** Interface declarations and internal definitions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +*************************************************************************** +** +** The following compile-time switches may be defined to control some +** tradeoffs between speed, code size, error checking, and security. +** +** The "default" note explains what happens when the switch is not defined. +** +** SKEIN_DEBUG -- make callouts from inside Skein code +** to examine/display intermediate values. +** [default: no callouts (no overhead)] +** +** SKEIN_ERR_CHECK -- how error checking is handled inside Skein +** code. If not defined, most error checking +** is disabled (for performance). Otherwise, +** the switch value is interpreted as: +** 0: use assert() to flag errors +** 1: return SKEIN_FAIL to flag errors +** +***************************************************************************/ +#ifdef __cplusplus +extern "C" +{ +#endif + +#include /* get size_t definition */ +#include "skein_port.h" /* get platform-specific definitions */ + +enum + { + SKEIN_SUCCESS = 0, /* return codes from Skein calls */ + SKEIN_FAIL = 1, + SKEIN_BAD_HASHLEN = 2 + }; + +#define SKEIN_MODIFIER_WORDS ( 2) /* number of modifier (tweak) words */ + +#define SKEIN_256_STATE_WORDS ( 4) +#define SKEIN_512_STATE_WORDS ( 8) +#define SKEIN1024_STATE_WORDS (16) +#define SKEIN_MAX_STATE_WORDS (16) + +#define SKEIN_256_STATE_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BYTES ( 8*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_STATE_BITS (64*SKEIN_256_STATE_WORDS) +#define SKEIN_512_STATE_BITS (64*SKEIN_512_STATE_WORDS) +#define SKEIN1024_STATE_BITS (64*SKEIN1024_STATE_WORDS) + +#define SKEIN_256_BLOCK_BYTES ( 8*SKEIN_256_STATE_WORDS) +#define SKEIN_512_BLOCK_BYTES ( 8*SKEIN_512_STATE_WORDS) +#define SKEIN1024_BLOCK_BYTES ( 8*SKEIN1024_STATE_WORDS) + +typedef struct + { + size_t hashBitLen; /* size of hash result, in bits */ + size_t bCnt; /* current byte count in buffer b[] */ + u64b_t T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ + } Skein_Ctxt_Hdr_t; + +typedef struct /* 256-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_256_Ctxt_t; + +typedef struct /* 512-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein_512_Ctxt_t; + +typedef struct /* 1024-bit Skein hash context structure */ + { + Skein_Ctxt_Hdr_t h; /* common header context variables */ + u64b_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */ + u08b_t b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ + } Skein1024_Ctxt_t; + +/* Skein APIs for (incremental) "straight hashing" */ +int Skein_256_Init (Skein_256_Ctxt_t *ctx, size_t hashBitLen); +int Skein_512_Init (Skein_512_Ctxt_t *ctx, size_t hashBitLen); +int Skein1024_Init (Skein1024_Ctxt_t *ctx, size_t hashBitLen); + +int Skein_256_Update(Skein_256_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein_512_Update(Skein_512_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); +int Skein1024_Update(Skein1024_Ctxt_t *ctx, const u08b_t *msg, size_t msgByteCnt); + +int Skein_256_Final (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +/* +** Skein APIs for "extended" initialization: MAC keys, tree hashing. +** After an InitExt() call, just use Update/Final calls as with Init(). +** +** Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes. +** When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL, +** the results of InitExt() are identical to calling Init(). +** The function Init() may be called once to "precompute" the IV for +** a given hashBitLen value, then by saving a copy of the context +** the IV computation may be avoided in later calls. +** Similarly, the function InitExt() may be called once per MAC key +** to precompute the MAC IV, then a copy of the context saved and +** reused for each new MAC computation. +**/ +int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); +int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, u64b_t treeInfo, const u08b_t *key, size_t keyBytes); + +/* +** Skein APIs for MAC and tree hash: +** Final_Pad: pad, do final block, but no OUTPUT type +** Output: do just the output stage +*/ +int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, u08b_t * hashVal); + +#ifndef SKEIN_TREE_HASH +#define SKEIN_TREE_HASH (1) +#endif +#if SKEIN_TREE_HASH +int Skein_256_Output (Skein_256_Ctxt_t *ctx, u08b_t * hashVal); +int Skein_512_Output (Skein_512_Ctxt_t *ctx, u08b_t * hashVal); +int Skein1024_Output (Skein1024_Ctxt_t *ctx, u08b_t * hashVal); +#endif + +/***************************************************************** +** "Internal" Skein definitions +** -- not needed for sequential hashing API, but will be +** helpful for other uses of Skein (e.g., tree hash mode). +** -- included here so that they can be shared between +** reference and optimized code. +******************************************************************/ + +/* tweak word T[1]: bit field starting positions */ +#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ + +#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ +#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ +#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ +#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ +#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ + +/* tweak word T[1]: flag bit definition(s) */ +#define SKEIN_T1_FLAG_FIRST (((u64b_t) 1 ) << SKEIN_T1_POS_FIRST) +#define SKEIN_T1_FLAG_FINAL (((u64b_t) 1 ) << SKEIN_T1_POS_FINAL) +#define SKEIN_T1_FLAG_BIT_PAD (((u64b_t) 1 ) << SKEIN_T1_POS_BIT_PAD) + +/* tweak word T[1]: tree level bit field mask */ +#define SKEIN_T1_TREE_LVL_MASK (((u64b_t)0x7F) << SKEIN_T1_POS_TREE_LVL) +#define SKEIN_T1_TREE_LEVEL(n) (((u64b_t) (n)) << SKEIN_T1_POS_TREE_LVL) + +/* tweak word T[1]: block type field */ +#define SKEIN_BLK_TYPE_KEY ( 0) /* key, for MAC and KDF */ +#define SKEIN_BLK_TYPE_CFG ( 4) /* configuration block */ +#define SKEIN_BLK_TYPE_PERS ( 8) /* personalization string */ +#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ +#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ +#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ +#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ +#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ +#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ + +#define SKEIN_T1_BLK_TYPE(T) (((u64b_t) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) +#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ +#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ +#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ +#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ +#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ +#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ +#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ +#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ +#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ + +#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) +#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) + +#define SKEIN_VERSION (1) + +#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */ +#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian)*/ +#endif + +#define SKEIN_MK_64(hi32,lo32) ((lo32) + (((u64b_t) (hi32)) << 32)) +#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION,SKEIN_ID_STRING_LE) +#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22) + +#define SKEIN_CFG_STR_LEN (4*8) + +/* bit field definitions in config block treeInfo word */ +#define SKEIN_CFG_TREE_LEAF_SIZE_POS ( 0) +#define SKEIN_CFG_TREE_NODE_SIZE_POS ( 8) +#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) + +#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) +#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) +#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64b_t) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) + +#define SKEIN_CFG_TREE_INFO(leaf,node,maxLvl) \ + ( (((u64b_t)(leaf )) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ + (((u64b_t)(node )) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ + (((u64b_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS) ) + +#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0,0,0) /* use as treeInfo in InitExt() call for sequential processing */ + +/* +** Skein macros for getting/setting tweak words, etc. +** These are useful for partial input bytes, hash tree init/update, etc. +**/ +#define Skein_Get_Tweak(ctxPtr,TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) +#define Skein_Set_Tweak(ctxPtr,TWK_NUM,tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal);} + +#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr,0) +#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr,1) +#define Skein_Set_T0(ctxPtr,T0) Skein_Set_Tweak(ctxPtr,0,T0) +#define Skein_Set_T1(ctxPtr,T1) Skein_Set_Tweak(ctxPtr,1,T1) + +/* set both tweak words at once */ +#define Skein_Set_T0_T1(ctxPtr,T0,T1) \ + { \ + Skein_Set_T0(ctxPtr,(T0)); \ + Skein_Set_T1(ctxPtr,(T1)); \ + } + +#define Skein_Set_Type(ctxPtr,BLK_TYPE) \ + Skein_Set_T1(ctxPtr,SKEIN_T1_BLK_TYPE_##BLK_TYPE) + +/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ +#define Skein_Start_New_Type(ctxPtr,BLK_TYPE) \ + { Skein_Set_T0_T1(ctxPtr,0,SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt=0; } + +#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } +#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } + +#define Skein_Set_Tree_Level(hdr,height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);} + +/***************************************************************** +** "Internal" Skein definitions for debugging and error checking +******************************************************************/ +#ifdef SKEIN_DEBUG /* examine/display intermediate values? */ +#include "skein_debug.h" +#else /* default is no callouts */ +#define Skein_Show_Block(bits,ctx,X,blkPtr,wPtr,ksEvenPtr,ksOddPtr) +#define Skein_Show_Round(bits,ctx,r,X) +#define Skein_Show_R_Ptr(bits,ctx,r,X_ptr) +#define Skein_Show_Final(bits,ctx,cnt,outPtr) +#define Skein_Show_Key(bits,ctx,key,keyBytes) +#endif + +#ifndef SKEIN_ERR_CHECK /* run-time checks (e.g., bad params, uninitialized context)? */ +#define Skein_Assert(x,retCode)/* default: ignore all Asserts, for performance */ +#define Skein_assert(x) +#elif defined(SKEIN_ASSERT) +#include +#define Skein_Assert(x,retCode) assert(x) +#define Skein_assert(x) assert(x) +#else +#include +#define Skein_Assert(x,retCode) { if (!(x)) return retCode; } /* caller error */ +#define Skein_assert(x) assert(x) /* internal error */ +#endif + +/***************************************************************** +** Skein block function constants (shared across Ref and Opt code) +******************************************************************/ +enum + { + /* Skein_256 round rotation constants */ + R_256_0_0=14, R_256_0_1=16, + R_256_1_0=52, R_256_1_1=57, + R_256_2_0=23, R_256_2_1=40, + R_256_3_0= 5, R_256_3_1=37, + R_256_4_0=25, R_256_4_1=33, + R_256_5_0=46, R_256_5_1=12, + R_256_6_0=58, R_256_6_1=22, + R_256_7_0=32, R_256_7_1=32, + + /* Skein_512 round rotation constants */ + R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37, + R_512_1_0=33, R_512_1_1=27, R_512_1_2=14, R_512_1_3=42, + R_512_2_0=17, R_512_2_1=49, R_512_2_2=36, R_512_2_3=39, + R_512_3_0=44, R_512_3_1= 9, R_512_3_2=54, R_512_3_3=56, + R_512_4_0=39, R_512_4_1=30, R_512_4_2=34, R_512_4_3=24, + R_512_5_0=13, R_512_5_1=50, R_512_5_2=10, R_512_5_3=17, + R_512_6_0=25, R_512_6_1=29, R_512_6_2=39, R_512_6_3=43, + R_512_7_0= 8, R_512_7_1=35, R_512_7_2=56, R_512_7_3=22, + + /* Skein1024 round rotation constants */ + R1024_0_0=24, R1024_0_1=13, R1024_0_2= 8, R1024_0_3=47, R1024_0_4= 8, R1024_0_5=17, R1024_0_6=22, R1024_0_7=37, + R1024_1_0=38, R1024_1_1=19, R1024_1_2=10, R1024_1_3=55, R1024_1_4=49, R1024_1_5=18, R1024_1_6=23, R1024_1_7=52, + R1024_2_0=33, R1024_2_1= 4, R1024_2_2=51, R1024_2_3=13, R1024_2_4=34, R1024_2_5=41, R1024_2_6=59, R1024_2_7=17, + R1024_3_0= 5, R1024_3_1=20, R1024_3_2=48, R1024_3_3=41, R1024_3_4=47, R1024_3_5=28, R1024_3_6=16, R1024_3_7=25, + R1024_4_0=41, R1024_4_1= 9, R1024_4_2=37, R1024_4_3=31, R1024_4_4=12, R1024_4_5=47, R1024_4_6=44, R1024_4_7=30, + R1024_5_0=16, R1024_5_1=34, R1024_5_2=56, R1024_5_3=51, R1024_5_4= 4, R1024_5_5=53, R1024_5_6=42, R1024_5_7=41, + R1024_6_0=31, R1024_6_1=44, R1024_6_2=47, R1024_6_3=46, R1024_6_4=19, R1024_6_5=42, R1024_6_6=44, R1024_6_7=25, + R1024_7_0= 9, R1024_7_1=48, R1024_7_2=35, R1024_7_3=52, R1024_7_4=23, R1024_7_5=31, R1024_7_6=37, R1024_7_7=20 + }; + +#ifndef SKEIN_ROUNDS +#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */ +#define SKEIN_512_ROUNDS_TOTAL (72) +#define SKEIN1024_ROUNDS_TOTAL (80) +#else /* allow command-line define in range 8*(5..14) */ +#define SKEIN_256_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/100) + 5) % 10) + 5)) +#define SKEIN_512_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS/ 10) + 5) % 10) + 5)) +#define SKEIN1024_ROUNDS_TOTAL (8*((((SKEIN_ROUNDS ) + 5) % 10) + 5)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _SKEIN_H_ */ diff --git a/Reference_Implementation/skein_block.c b/Reference_Implementation/skein_block.c new file mode 100644 index 000000000000..48d499813642 --- /dev/null +++ b/Reference_Implementation/skein_block.c @@ -0,0 +1,369 @@ +/*********************************************************************** +** +** Implementation of the Skein block functions. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Compile-time switches: +** +** SKEIN_USE_ASM -- set bits (256/512/1024) to select which +** versions use ASM code for block processing +** [default: use C for all block sizes] +** +************************************************************************/ + +#include +#include "skein.h" + +/* 64-bit rotate left */ +u64b_t RotL_64(u64b_t x,uint_t N) + { + return (x << (N & 63)) | (x >> ((64-N) & 63)); + } + +#define BLK_BITS (WCNT*64) + +/* macro to perform a key injection (same for all block sizes) */ +#define InjectKey(r) \ + for (i=0;i < WCNT;i++) \ + X[i] += ks[((r)+i) % (WCNT+1)]; \ + X[WCNT-3] += ts[((r)+0) % 3]; \ + X[WCNT-2] += ts[((r)+1) % 3]; \ + X[WCNT-1] += (r); /* avoid slide attacks */ \ + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INJECT,X); + +void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_256_STATE_WORDS + }; + size_t i,r; + u64b_t ts[3]; /* key schedule: tweak */ + u64b_t ks[WCNT+1]; /* key schedule: chaining vars */ + u64b_t X [WCNT]; /* local copy of context vars */ + u64b_t w [WCNT]; /* local copy of input block */ + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ctx->h.T[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[WCNT] = SKEIN_KS_PARITY; + for (i=0;i < WCNT; i++) + { + ks[i] = ctx->X[i]; + ks[WCNT] ^= ctx->X[i]; /* compute overall parity */ + } + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + for (i=0;i < WCNT; i++) /* do the first full key injection */ + { + X[i] = w[i] + ks[i]; + } + X[WCNT-3] += ts[0]; + X[WCNT-2] += ts[1]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,X); /* show starting state values */ + + for (r=1;r <= SKEIN_256_ROUNDS_TOTAL/8; r++) + { /* unroll 8 rounds */ + X[0] += X[1]; X[1] = RotL_64(X[1],R_256_0_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_256_0_1); X[3] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-7,X); + + X[0] += X[3]; X[3] = RotL_64(X[3],R_256_1_0); X[3] ^= X[0]; + X[2] += X[1]; X[1] = RotL_64(X[1],R_256_1_1); X[1] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-6,X); + + X[0] += X[1]; X[1] = RotL_64(X[1],R_256_2_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_256_2_1); X[3] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-5,X); + + X[0] += X[3]; X[3] = RotL_64(X[3],R_256_3_0); X[3] ^= X[0]; + X[2] += X[1]; X[1] = RotL_64(X[1],R_256_3_1); X[1] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-4,X); + InjectKey(2*r-1); + + X[0] += X[1]; X[1] = RotL_64(X[1],R_256_4_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_256_4_1); X[3] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-3,X); + + X[0] += X[3]; X[3] = RotL_64(X[3],R_256_5_0); X[3] ^= X[0]; + X[2] += X[1]; X[1] = RotL_64(X[1],R_256_5_1); X[1] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-2,X); + + X[0] += X[1]; X[1] = RotL_64(X[1],R_256_6_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_256_6_1); X[3] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-1,X); + + X[0] += X[3]; X[3] = RotL_64(X[3],R_256_7_0); X[3] ^= X[0]; + X[2] += X[1]; X[1] = RotL_64(X[1],R_256_7_1); X[1] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r ,X); + InjectKey(2*r); + } + /* do the final "feedforward" xor, update context chaining vars */ + for (i=0;i < WCNT;i++) + ctx->X[i] = X[i] ^ w[i]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + Skein_Clear_First_Flag(ctx->h); /* clear the start bit */ + blkPtr += SKEIN_256_BLOCK_BYTES; + } + while (--blkCnt); + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_256_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_256_Process_Block_CodeSize) - + ((u08b_t *) Skein_256_Process_Block); + } +uint_t Skein_256_Unroll_Cnt(void) + { + return 1; + } +#endif + +void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN_512_STATE_WORDS + }; + + size_t i,r; + u64b_t ts[3]; /* key schedule: tweak */ + u64b_t ks[WCNT+1]; /* key schedule: chaining vars */ + u64b_t X [WCNT]; /* local copy of vars */ + u64b_t w [WCNT]; /* local copy of input block */ + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ctx->h.T[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[WCNT] = SKEIN_KS_PARITY; + for (i=0;i < WCNT; i++) + { + ks[i] = ctx->X[i]; + ks[WCNT] ^= ctx->X[i]; /* compute overall parity */ + } + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + for (i=0;i < WCNT; i++) /* do the first full key injection */ + { + X[i] = w[i] + ks[i]; + } + X[WCNT-3] += ts[0]; + X[WCNT-2] += ts[1]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,X); + for (r=1;r <= SKEIN_512_ROUNDS_TOTAL/8; r++) + { /* unroll 8 rounds */ + X[0] += X[1]; X[1] = RotL_64(X[1],R_512_0_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_512_0_1); X[3] ^= X[2]; + X[4] += X[5]; X[5] = RotL_64(X[5],R_512_0_2); X[5] ^= X[4]; + X[6] += X[7]; X[7] = RotL_64(X[7],R_512_0_3); X[7] ^= X[6]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-7,X); + + X[2] += X[1]; X[1] = RotL_64(X[1],R_512_1_0); X[1] ^= X[2]; + X[4] += X[7]; X[7] = RotL_64(X[7],R_512_1_1); X[7] ^= X[4]; + X[6] += X[5]; X[5] = RotL_64(X[5],R_512_1_2); X[5] ^= X[6]; + X[0] += X[3]; X[3] = RotL_64(X[3],R_512_1_3); X[3] ^= X[0]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-6,X); + + X[4] += X[1]; X[1] = RotL_64(X[1],R_512_2_0); X[1] ^= X[4]; + X[6] += X[3]; X[3] = RotL_64(X[3],R_512_2_1); X[3] ^= X[6]; + X[0] += X[5]; X[5] = RotL_64(X[5],R_512_2_2); X[5] ^= X[0]; + X[2] += X[7]; X[7] = RotL_64(X[7],R_512_2_3); X[7] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-5,X); + + X[6] += X[1]; X[1] = RotL_64(X[1],R_512_3_0); X[1] ^= X[6]; + X[0] += X[7]; X[7] = RotL_64(X[7],R_512_3_1); X[7] ^= X[0]; + X[2] += X[5]; X[5] = RotL_64(X[5],R_512_3_2); X[5] ^= X[2]; + X[4] += X[3]; X[3] = RotL_64(X[3],R_512_3_3); X[3] ^= X[4]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-4,X); + InjectKey(2*r-1); + + X[0] += X[1]; X[1] = RotL_64(X[1],R_512_4_0); X[1] ^= X[0]; + X[2] += X[3]; X[3] = RotL_64(X[3],R_512_4_1); X[3] ^= X[2]; + X[4] += X[5]; X[5] = RotL_64(X[5],R_512_4_2); X[5] ^= X[4]; + X[6] += X[7]; X[7] = RotL_64(X[7],R_512_4_3); X[7] ^= X[6]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-3,X); + + X[2] += X[1]; X[1] = RotL_64(X[1],R_512_5_0); X[1] ^= X[2]; + X[4] += X[7]; X[7] = RotL_64(X[7],R_512_5_1); X[7] ^= X[4]; + X[6] += X[5]; X[5] = RotL_64(X[5],R_512_5_2); X[5] ^= X[6]; + X[0] += X[3]; X[3] = RotL_64(X[3],R_512_5_3); X[3] ^= X[0]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-2,X); + + X[4] += X[1]; X[1] = RotL_64(X[1],R_512_6_0); X[1] ^= X[4]; + X[6] += X[3]; X[3] = RotL_64(X[3],R_512_6_1); X[3] ^= X[6]; + X[0] += X[5]; X[5] = RotL_64(X[5],R_512_6_2); X[5] ^= X[0]; + X[2] += X[7]; X[7] = RotL_64(X[7],R_512_6_3); X[7] ^= X[2]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-1,X); + + X[6] += X[1]; X[1] = RotL_64(X[1],R_512_7_0); X[1] ^= X[6]; + X[0] += X[7]; X[7] = RotL_64(X[7],R_512_7_1); X[7] ^= X[0]; + X[2] += X[5]; X[5] = RotL_64(X[5],R_512_7_2); X[5] ^= X[2]; + X[4] += X[3]; X[3] = RotL_64(X[3],R_512_7_3); X[3] ^= X[4]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r ,X); + InjectKey(2*r); + } + /* do the final "feedforward" xor, update context chaining vars */ + for (i=0;i < WCNT;i++) + ctx->X[i] = X[i] ^ w[i]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + Skein_Clear_First_Flag(ctx->h); /* clear the start bit */ + blkPtr += SKEIN_512_BLOCK_BYTES; + } + while (--blkCnt); + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein_512_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein_512_Process_Block_CodeSize) - + ((u08b_t *) Skein_512_Process_Block); + } +uint_t Skein_512_Unroll_Cnt(void) + { + return 1; + } +#endif + +void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx,const u08b_t *blkPtr,size_t blkCnt,size_t byteCntAdd) + { /* do it in C */ + enum + { + WCNT = SKEIN1024_STATE_WORDS + }; + + size_t i,r; + u64b_t ts[3]; /* key schedule: tweak */ + u64b_t ks[WCNT+1]; /* key schedule: chaining vars */ + u64b_t X [WCNT]; /* local copy of vars */ + u64b_t w [WCNT]; /* local copy of input block */ + + Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ + do { + /* this implementation only supports 2**64 input bytes (no carry out here) */ + ctx->h.T[0] += byteCntAdd; /* update processed length */ + + /* precompute the key schedule for this block */ + ks[WCNT] = SKEIN_KS_PARITY; + for (i=0;i < WCNT; i++) + { + ks[i] = ctx->X[i]; + ks[WCNT] ^= ctx->X[i]; /* compute overall parity */ + } + ts[0] = ctx->h.T[0]; + ts[1] = ctx->h.T[1]; + ts[2] = ts[0] ^ ts[1]; + + Skein_Get64_LSB_First(w,blkPtr,WCNT); /* get input block in little-endian format */ + Skein_Show_Block(BLK_BITS,&ctx->h,ctx->X,blkPtr,w,ks,ts); + for (i=0;i < WCNT; i++) /* do the first full key injection */ + { + X[i] = w[i] + ks[i]; + } + X[WCNT-3] += ts[0]; + X[WCNT-2] += ts[1]; + + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_KEY_INITIAL,X); /* show starting state values */ + for (r=1;r <= SKEIN1024_ROUNDS_TOTAL/8; r++) + { /* unroll 8 rounds */ + X[ 0] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_0_0); X[ 1] ^= X[ 0]; + X[ 2] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_0_1); X[ 3] ^= X[ 2]; + X[ 4] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_0_2); X[ 5] ^= X[ 4]; + X[ 6] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_0_3); X[ 7] ^= X[ 6]; + X[ 8] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_0_4); X[ 9] ^= X[ 8]; + X[10] += X[11]; X[11] = RotL_64(X[11],R1024_0_5); X[11] ^= X[10]; + X[12] += X[13]; X[13] = RotL_64(X[13],R1024_0_6); X[13] ^= X[12]; + X[14] += X[15]; X[15] = RotL_64(X[15],R1024_0_7); X[15] ^= X[14]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-7,X); + + X[ 0] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_1_0); X[ 9] ^= X[ 0]; + X[ 2] += X[13]; X[13] = RotL_64(X[13],R1024_1_1); X[13] ^= X[ 2]; + X[ 6] += X[11]; X[11] = RotL_64(X[11],R1024_1_2); X[11] ^= X[ 6]; + X[ 4] += X[15]; X[15] = RotL_64(X[15],R1024_1_3); X[15] ^= X[ 4]; + X[10] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_1_4); X[ 7] ^= X[10]; + X[12] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_1_5); X[ 3] ^= X[12]; + X[14] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_1_6); X[ 5] ^= X[14]; + X[ 8] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_1_7); X[ 1] ^= X[ 8]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-6,X); + + X[ 0] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_2_0); X[ 7] ^= X[ 0]; + X[ 2] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_2_1); X[ 5] ^= X[ 2]; + X[ 4] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_2_2); X[ 3] ^= X[ 4]; + X[ 6] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_2_3); X[ 1] ^= X[ 6]; + X[12] += X[15]; X[15] = RotL_64(X[15],R1024_2_4); X[15] ^= X[12]; + X[14] += X[13]; X[13] = RotL_64(X[13],R1024_2_5); X[13] ^= X[14]; + X[ 8] += X[11]; X[11] = RotL_64(X[11],R1024_2_6); X[11] ^= X[ 8]; + X[10] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_2_7); X[ 9] ^= X[10]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-5,X); + + X[ 0] += X[15]; X[15] = RotL_64(X[15],R1024_3_0); X[15] ^= X[ 0]; + X[ 2] += X[11]; X[11] = RotL_64(X[11],R1024_3_1); X[11] ^= X[ 2]; + X[ 6] += X[13]; X[13] = RotL_64(X[13],R1024_3_2); X[13] ^= X[ 6]; + X[ 4] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_3_3); X[ 9] ^= X[ 4]; + X[14] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_3_4); X[ 1] ^= X[14]; + X[ 8] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_3_5); X[ 5] ^= X[ 8]; + X[10] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_3_6); X[ 3] ^= X[10]; + X[12] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_3_7); X[ 7] ^= X[12]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-4,X); + InjectKey(2*r-1); + + X[ 0] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_4_0); X[ 1] ^= X[ 0]; + X[ 2] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_4_1); X[ 3] ^= X[ 2]; + X[ 4] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_4_2); X[ 5] ^= X[ 4]; + X[ 6] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_4_3); X[ 7] ^= X[ 6]; + X[ 8] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_4_4); X[ 9] ^= X[ 8]; + X[10] += X[11]; X[11] = RotL_64(X[11],R1024_4_5); X[11] ^= X[10]; + X[12] += X[13]; X[13] = RotL_64(X[13],R1024_4_6); X[13] ^= X[12]; + X[14] += X[15]; X[15] = RotL_64(X[15],R1024_4_7); X[15] ^= X[14]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-3,X); + + X[ 0] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_5_0); X[ 9] ^= X[ 0]; + X[ 2] += X[13]; X[13] = RotL_64(X[13],R1024_5_1); X[13] ^= X[ 2]; + X[ 6] += X[11]; X[11] = RotL_64(X[11],R1024_5_2); X[11] ^= X[ 6]; + X[ 4] += X[15]; X[15] = RotL_64(X[15],R1024_5_3); X[15] ^= X[ 4]; + X[10] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_5_4); X[ 7] ^= X[10]; + X[12] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_5_5); X[ 3] ^= X[12]; + X[14] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_5_6); X[ 5] ^= X[14]; + X[ 8] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_5_7); X[ 1] ^= X[ 8]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-2,X); + + X[ 0] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_6_0); X[ 7] ^= X[ 0]; + X[ 2] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_6_1); X[ 5] ^= X[ 2]; + X[ 4] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_6_2); X[ 3] ^= X[ 4]; + X[ 6] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_6_3); X[ 1] ^= X[ 6]; + X[12] += X[15]; X[15] = RotL_64(X[15],R1024_6_4); X[15] ^= X[12]; + X[14] += X[13]; X[13] = RotL_64(X[13],R1024_6_5); X[13] ^= X[14]; + X[ 8] += X[11]; X[11] = RotL_64(X[11],R1024_6_6); X[11] ^= X[ 8]; + X[10] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_6_7); X[ 9] ^= X[10]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r-1,X); + + X[ 0] += X[15]; X[15] = RotL_64(X[15],R1024_7_0); X[15] ^= X[ 0]; + X[ 2] += X[11]; X[11] = RotL_64(X[11],R1024_7_1); X[11] ^= X[ 2]; + X[ 6] += X[13]; X[13] = RotL_64(X[13],R1024_7_2); X[13] ^= X[ 6]; + X[ 4] += X[ 9]; X[ 9] = RotL_64(X[ 9],R1024_7_3); X[ 9] ^= X[ 4]; + X[14] += X[ 1]; X[ 1] = RotL_64(X[ 1],R1024_7_4); X[ 1] ^= X[14]; + X[ 8] += X[ 5]; X[ 5] = RotL_64(X[ 5],R1024_7_5); X[ 5] ^= X[ 8]; + X[10] += X[ 3]; X[ 3] = RotL_64(X[ 3],R1024_7_6); X[ 3] ^= X[10]; + X[12] += X[ 7]; X[ 7] = RotL_64(X[ 7],R1024_7_7); X[ 7] ^= X[12]; Skein_Show_Round(BLK_BITS,&ctx->h,8*r ,X); + InjectKey(2*r); + } + /* do the final "feedforward" xor, update context chaining vars */ + for (i=0;iX[i] = X[i] ^ w[i]; + Skein_Show_Round(BLK_BITS,&ctx->h,SKEIN_RND_FEED_FWD,ctx->X); + + Skein_Clear_First_Flag(ctx->h); /* clear the start bit */ + blkPtr += SKEIN1024_BLOCK_BYTES; + } + while (--blkCnt); + } + +#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF) +size_t Skein1024_Process_Block_CodeSize(void) + { + return ((u08b_t *) Skein1024_Process_Block_CodeSize) - + ((u08b_t *) Skein1024_Process_Block); + } +uint_t Skein1024_Unroll_Cnt(void) + { + return 1; + } +#endif diff --git a/Reference_Implementation/skein_debug.c b/Reference_Implementation/skein_debug.c new file mode 100644 index 000000000000..fac5038598ea --- /dev/null +++ b/Reference_Implementation/skein_debug.c @@ -0,0 +1,247 @@ +/*********************************************************************** +** +** Debug output functions for Skein hashing. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ +#include + +#ifdef SKEIN_DEBUG /* only instantiate this code if SKEIN_DEBUG is on */ +#include "skein.h" + +static const char INDENT[] = " "; /* how much to indent on new line */ + +uint_t skein_DebugFlag = 0; /* off by default. Must be set externally */ + +static void Show64_step(size_t cnt,const u64b_t *X,size_t step) + { + size_t i,j; + for (i=j=0;i < cnt;i++,j+=step) + { + if (i % 4 == 0) printf(INDENT); + printf(" %08X.%08X ",(uint_32t)(X[j] >> 32),(uint_32t)X[j]); + if (i % 4 == 3 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +#define Show64(cnt,X) Show64_step(cnt,X,1) + +static void Show64_flag(size_t cnt,const u64b_t *X) + { + size_t xptr = (size_t) X; + size_t step = (xptr & 1) ? 2 : 1; + if (step != 1) + { + X = (const u64b_t *) (xptr & ~1); + } + Show64_step(cnt,X,step); + } + +static void Show08(size_t cnt,const u08b_t *b) + { + size_t i; + for (i=0;i < cnt;i++) + { + if (i %16 == 0) printf(INDENT); + else if (i % 4 == 0) printf(" "); + printf(" %02X",b[i]); + if (i %16 == 15 || i==cnt-1) printf("\n"); + fflush(stdout); + } + } + +static const char *AlgoHeader(uint_t bits) + { + if (skein_DebugFlag & SKEIN_DEBUG_THREEFISH) + switch (bits) + { + case 256: return ":Threefish-256: "; + case 512: return ":Threefish-512: "; + case 1024: return ":Threefish-1024:"; + } + else + switch (bits) + { + case 256: return ":Skein-256: "; + case 512: return ":Skein-512: "; + case 1024: return ":Skein-1024:"; + } + return NULL; + } + +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr) + { + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_FINAL) + { + printf("\n%s Final output=\n",AlgoHeader(bits)); + Show08(cnt,outPtr); + printf(" ++++++++++\n"); + fflush(stdout); + } + } + +/* show state after a round (or "pseudo-round") */ +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X) + { + static uint_t injectNum=0; /* not multi-thread safe! */ + + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (r >= SKEIN_RND_SPECIAL) + { /* a key injection (or feedforward) point */ + injectNum = (r == SKEIN_RND_KEY_INITIAL) ? 0 : injectNum+1; + if ( skein_DebugFlag & SKEIN_DEBUG_INJECT || + ((skein_DebugFlag & SKEIN_DEBUG_FINAL) && r == SKEIN_RND_FEED_FWD)) + { + printf("\n%s",AlgoHeader(bits)); + switch (r) + { + case SKEIN_RND_KEY_INITIAL: + printf(" [state after initial key injection]"); + break; + case SKEIN_RND_KEY_INJECT: + printf(" [state after key injection #%02d]",injectNum); + break; + case SKEIN_RND_FEED_FWD: + printf(" [state after plaintext feedforward]"); + injectNum = 0; + break; + } + printf("=\n"); + Show64(bits/64,X); + if (r== SKEIN_RND_FEED_FWD) + printf(" ----------\n"); + } + } + else if (skein_DebugFlag & SKEIN_DEBUG_ROUNDS) + { + uint_t j; + u64b_t p[SKEIN_MAX_STATE_WORDS]; + const u08b_t *perm; + const static u08b_t PERM_256 [4][ 4] = { { 0,1,2,3 }, { 0,3,2,1 }, { 0,1,2,3 }, { 0,3,2,1 } }; + const static u08b_t PERM_512 [4][ 8] = { { 0,1,2,3,4,5,6,7 }, + { 2,1,4,7,6,5,0,3 }, + { 4,1,6,3,0,5,2,7 }, + { 6,1,0,7,2,5,4,3 } + }; + const static u08b_t PERM_1024[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }, + { 0, 9, 2,13, 6,11, 4,15,10, 7,12, 3,14, 5, 8, 1 }, + { 0, 7, 2, 5, 4, 3, 6, 1,12,15,14,13, 8,11,10, 9 }, + { 0,15, 2,11, 6,13, 4, 9,14, 1, 8, 5,10, 3,12, 7 } + }; + + if ((skein_DebugFlag & SKEIN_DEBUG_PERMUTE) && (r & 3)) + { + printf("\n%s [state after round %2d (permuted)]=\n",AlgoHeader(bits),(int)r); + switch (bits) + { + case 256: perm = PERM_256 [r&3]; break; + case 512: perm = PERM_512 [r&3]; break; + default: perm = PERM_1024[r&3]; break; + } + for (j=0;jT[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag) + { + if (skein_DebugFlag & SKEIN_DEBUG_HDR) + { + printf("\n%s Block: outBits=%4d. T0=%06X.",AlgoHeader(bits),(uint_t) h->hashBitLen,(uint_t)h->T[0]); + printf(" Type="); + n = (uint_t) ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) >> SKEIN_T1_POS_BLK_TYPE); + switch (n) + { + case SKEIN_BLK_TYPE_KEY: printf("KEY. "); break; + case SKEIN_BLK_TYPE_CFG: printf("CFG. "); break; + case SKEIN_BLK_TYPE_PERS: printf("PERS."); break; + case SKEIN_BLK_TYPE_PK : printf("PK. "); break; + case SKEIN_BLK_TYPE_KDF: printf("KDF. "); break; + case SKEIN_BLK_TYPE_MSG: printf("MSG. "); break; + case SKEIN_BLK_TYPE_OUT: printf("OUT. "); break; + default: printf("0x%02X.",n); break; + } + printf(" Flags="); + printf((h->T[1] & SKEIN_T1_FLAG_FIRST) ? " First":" "); + printf((h->T[1] & SKEIN_T1_FLAG_FINAL) ? " Final":" "); + printf((h->T[1] & SKEIN_T1_FLAG_BIT_PAD) ? " Pad" :" "); + n = (uint_t) ((h->T[1] & SKEIN_T1_TREE_LVL_MASK) >> SKEIN_T1_POS_TREE_LVL); + if (n) + printf(" TreeLevel = %02X",n); + printf("\n"); + fflush(stdout); + } + if (skein_DebugFlag & SKEIN_DEBUG_TWEAK) + { + printf(" Tweak:\n"); + Show64(2,h->T); + } + if (skein_DebugFlag & SKEIN_DEBUG_STATE) + { + printf(" %s words:\n",(skein_DebugFlag & SKEIN_DEBUG_THREEFISH)?"Key":"State"); + Show64(bits/64,X); + } + if (skein_DebugFlag & SKEIN_DEBUG_KEYSCHED) + { + printf(" Tweak schedule:\n"); + Show64_flag(3,tsPtr); + printf(" Key schedule:\n"); + Show64_flag((bits/64)+1,ksPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_64) + { + printf(" Input block (words):\n"); + Show64(bits/64,wPtr); + } + if (skein_DebugFlag & SKEIN_DEBUG_INPUT_08) + { + printf(" Input block (bytes):\n"); + Show08(bits/8,blkPtr); + } + } + } + +void Skein_Show_Key(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes) + { + if (keyBytes) + if (skein_DebugFlag & SKEIN_DEBUG_CONFIG || ((h->T[1] & SKEIN_T1_BLK_TYPE_MASK) != SKEIN_T1_BLK_TYPE_CFG)) + if (skein_DebugFlag & SKEIN_DEBUG_KEY) + { + printf("\n%s MAC key = %4u bytes\n",AlgoHeader(bits),(unsigned) keyBytes); + Show08(keyBytes,key); + } + } +#endif diff --git a/Reference_Implementation/skein_debug.h b/Reference_Implementation/skein_debug.h new file mode 100644 index 000000000000..7775c0165c0a --- /dev/null +++ b/Reference_Implementation/skein_debug.h @@ -0,0 +1,48 @@ +#ifndef _SKEIN_DEBUG_H_ +#define _SKEIN_DEBUG_H_ +/*********************************************************************** +** +** Interface definitions for Skein hashing debug output. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +************************************************************************/ + +#ifdef SKEIN_DEBUG +/* callout functions used inside Skein code */ +void Skein_Show_Block(uint_t bits,const Skein_Ctxt_Hdr_t *h,const u64b_t *X,const u08b_t *blkPtr, + const u64b_t *wPtr,const u64b_t *ksPtr,const u64b_t *tsPtr); +void Skein_Show_Round(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X); +void Skein_Show_R_Ptr(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t r,const u64b_t *X_ptr[]); +void Skein_Show_Final(uint_t bits,const Skein_Ctxt_Hdr_t *h,size_t cnt,const u08b_t *outPtr); +void Skein_Show_Key (uint_t bits,const Skein_Ctxt_Hdr_t *h,const u08b_t *key,size_t keyBytes); + +extern uint_t skein_DebugFlag; /* flags to control debug output (0 --> none) */ + +#define SKEIN_RND_SPECIAL (1000u) +#define SKEIN_RND_KEY_INITIAL (SKEIN_RND_SPECIAL+0u) +#define SKEIN_RND_KEY_INJECT (SKEIN_RND_SPECIAL+1u) +#define SKEIN_RND_FEED_FWD (SKEIN_RND_SPECIAL+2u) + +/* flag bits: skein_DebugFlag */ +#define SKEIN_DEBUG_KEY (1u << 1) /* show MAC key */ +#define SKEIN_DEBUG_CONFIG (1u << 2) /* show config block processing */ +#define SKEIN_DEBUG_STATE (1u << 3) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_TWEAK (1u << 4) /* show input state during Show_Block() */ +#define SKEIN_DEBUG_KEYSCHED (1u << 5) /* show expanded key schedule */ +#define SKEIN_DEBUG_INPUT_64 (1u << 6) /* show input block as 64-bit words */ +#define SKEIN_DEBUG_INPUT_08 (1u << 7) /* show input block as 8-bit bytes */ +#define SKEIN_DEBUG_INJECT (1u << 8) /* show state after key injection & feedforward points */ +#define SKEIN_DEBUG_ROUNDS (1u << 9) /* show state after all rounds */ +#define SKEIN_DEBUG_FINAL (1u <<10) /* show final output of Skein */ +#define SKEIN_DEBUG_HDR (1u <<11) /* show block header */ +#define SKEIN_DEBUG_THREEFISH (1u <<12) /* use Threefish name instead of Skein */ +#define SKEIN_DEBUG_PERMUTE (1u <<13) /* use word permutations */ +#define SKEIN_DEBUG_ALL ((~0u) & ~(SKEIN_DEBUG_THREEFISH | SKEIN_DEBUG_PERMUTE)) +#define THREEFISH_DEBUG_ALL (SKEIN_DEBUG_ALL | SKEIN_DEBUG_THREEFISH) + +#endif /* SKEIN_DEBUG */ + +#endif /* _SKEIN_DEBUG_H_ */ diff --git a/Reference_Implementation/skein_port.h b/Reference_Implementation/skein_port.h new file mode 100644 index 000000000000..e0dcc85bdc62 --- /dev/null +++ b/Reference_Implementation/skein_port.h @@ -0,0 +1,44 @@ +#ifndef _SKEIN_PORT_H_ +#define _SKEIN_PORT_H_ +/******************************************************************* +** +** Platform-specific definitions for Skein hash function. +** +** Source code author: Doug Whiting, 2008. +** +** This algorithm and source code is released to the public domain. +** +** Many thanks to Brian Gladman for his portable header files, which +** have been modified slightly here, to handle a few more platforms. +** +** To port Skein to an "unsupported" platform, change the definitions +** in this file appropriately. +** +********************************************************************/ + +#include "brg_types.h" /* get integer type definitions */ + +typedef unsigned int uint_t; /* native unsigned integer */ +typedef uint_8t u08b_t; /* 8-bit unsigned integer */ +typedef uint_64t u64b_t; /* 64-bit unsigned integer */ + +/* + * Skein is "natively" little-endian (unlike SHA-xxx), for optimal + * performance on x86 CPUs. The Skein code requires the following + * definitions for dealing with endianness: + * + * Skein_Put64_LSB_First + * Skein_Get64_LSB_First + * Skein_Swap64 + * + * In the reference code, these functions are implemented in a + * very portable (and thus slow) fashion, for clarity. See the file + * "skein_port.h" in the Optimized_Code directory for ways to make + * these functions fast(er) on x86 platforms. + */ + +u64b_t Skein_Swap64(u64b_t w64); +void Skein_Put64_LSB_First(u08b_t *dst,const u64b_t *src,size_t bCnt); +void Skein_Get64_LSB_First(u64b_t *dst,const u08b_t *src,size_t wCnt); + +#endif /* ifndef _SKEIN_PORT_H_ */ diff --git a/Supporting_Documentation/Skein Cover Sheet.pdf b/Supporting_Documentation/Skein Cover Sheet.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c08b0e11322df7ab7568759245d39d261dca869c GIT binary patch literal 44070 zcmbrl1yo$ix-E=Da0@PtYjC=8cXtWy5ZoJg3+@mC1Pdg1(BQ5Kng9vzPJrP07TNpU zd*43iANP&%_ZmHVb=9h>ujV)B{8+0KIyEVo7o2Q77<6mvfOahq4>vyt54#-500iL#gE%34 zMj|2@R!){rvw;4?3j-DEW@Y`f5*H^1D#zb1VCbJSkkfy)@$vKgTN^)w^Pg>;oIHI0 z*2cxp|8H$zzJD9b#mNcz=XG41yj=fmgK$9pxdtaEHwX7WuH)o^aQ$;ECqF00KV#wI z0`vcKU7(4V;~(>JbAUPji2*kc59FU?xxrxWf4h#Ko9o}Mh_sXp zSe#c9%p=LkA;H5b&BHAXmKNuh=H=t!l7jGw2ysYoNJ2P)OE{$Xcp*Hz{Ni9qacN!| zP8kRUxQIvOiA<>O9%gPHf3b!G1C@?WT2%%E^`F$jK;`Q5&JUIRddQE^>&p;EEpC>>mK_U0~Juk-z)vCk0;drR-3Ywhnc0BhZzV+B!Hxv znT?e@2p|}6uI1xm#V!f-IXK(==}-4S{5_|nvy%t#&E0_W&l;d7o`Ez#oPUxBddmLOw?A10aX!rqd;@3< zd~iKYs$m4A-2*@r#KEp?Woc*jZ>wooxjTEhSpb{(GwiQTJnibg1AglGd-r;(=C74A=pCW7o2RdI0kRYfBihOG75}M$2jfYyH`=2=MxsY5ke+@3=HQ z%{`vb*K+f;`jdvIb)~G_E!^x}Je=J?Tz_Rq$;{nD($>rk2!Nkm*Ur+z*4==M_X*Pf zd~ida_WrK}{3nKgef#GppabAa$e+yo)yEIyjECnbMSmT9-2Xirn1>$(=KKF%nU^07 z0{P!~z)v0o z;sbLVu}he_TRow|E^FoNW@BgeLc-a>5>TDn-;i)~u}eDvva@rt0ibabcXGG;ukZf_ z>Hk3gDTV*9=>Mga|3;sS^S{vNd79@x9b6D#rw|^%#GZPe43>u**cdNA@a6*Xf_Xu_ zTztUi)25%$dOG9&qmqBk#?J*{3=|?TfESntP}dXYya1X{MF%*5f(QV`2Y?6kbMS!p z0V9Thfj59E_z$~)elRfpkD@q0{E+|7<-gVdt^h{zf;m7uz$$<>f_V4<=mDni0_6m- zMi3YO6Zd$5B%z#~9Qu1kDOVCpx1Z?8JMD-Nx-)U9)%bR)lczOSWJO8&E%f%e%8n~0l z5(IARp3x}8xGuB%yk?i5S&E-!Qcp+LrO=C`Ge>wPr+)YA^6fPD#54!Z*I-NMS1buT zAxF*~?uWmGZcij?M!&A~LysMVGL&jg21C@OkK=lST0kjhQG!c-K}5C)o#a*e)kP@_xw6G z(1{WJ_EMyO1)4Xl{*&{8-o)i+70#pfb^00NT*Sa&dcA3cHT_jj>WLgmGu+wn+O$JQ zvF7Jd`~wa)e5w{S2m?FoH2^S73%VFB3}py+5l`qhlwT^D!S7GAtNiH$;)Yw#{rPhr zMfAm_Km-s--FZ`7yWX-&cl>nurh|wJzo^Bdc;UP53wh7IhNFcZZy!Vro6sd_(U`%B+Q2min$+ZP zjcy*ilIXaIIHg}@YH|$}ZF^JWoqAr{I=uZGYJGb9igUTEcfKwe&MA(#NZORHA-Z9` zk&wlRQ-|}19c9Pv^`Xh{e*aGoZSDt1LbqNb1`cyrb?2Lh)(t{7gpj(#tkVcews8d9 ztJ%XdJZ#?xla|#z2d%l)L%Q3|SApCn#?*)9*KgXhUJ#_7SD<5zGo|d)es&VN=D2SA zY54VrsIKnlcYZ(ac?au*%;7b>$P0{bOQX+%9Y1=MXZD5I2D{Er`L^G=H@4~X&No(| zf873-jTGBG@;oaeOYF#B)PbvUF=I~s6#==&SUVE^#r>_*58P&}Y!~r9|9N^iv-IqV z6&TIzPF}AZcbHjY%-n^PD83Vqql9nTx5}Km4!=h?H@3Vmtl~QZux5KlVto2YczjnYXD!leFW_*8mF`uFhT_nhy()*7wBCORg#tYFNB*;H2} z@;M!IFLIxyvenyce&|W*+#z>yJoGKWKl6L0^{bVp8;wP2%d_=dfOphPI5>S%gZqjB z&M4w6bLqw0TZ{)&>W;i&HKH1m6=nYR_w32tWfLhcR_F$L=IQ*!7_=hP&x*UPjyHG@ z!n|RKeS>q*$GgDLV51d280=>7PdK-NE=_pv}bz0VX{6$#T@2&03nU@4*qV!3VXciX7*Guj#i#qwxIq3{=ohR&I`4wmqxxn_kcP%Fl( zn&TBy$~Ht++%jxtxHA}=Xj$pV=%YZ+U2wv@!&&g45xsBM>ZO3M7?XbzZa^hZ-WF~y z?2p9T_mgJnoi6fyK6Xke942feQ`!by4b5p9=g%Ud;%Ht-7j!+qETO3qpqiZ^JCW;P z;f4@-pi8wg7r$t;99czcbajdN0A0mx53$!Y<)U$tR7->(jlK@`jZ}We`je`LkUHbs zLy1twNira721m_PgM=m9PG7Gzj3k+b`Xo+39Z}LO zQ+m$p8mu?7$_o-^A7*d+c!-+I)Q9Kxa%K1VB-Hxjc#09K$kWSb;bJq2(2=PZ z-!-lSKo@&xxC-ho=`NtT^%cpn5IIc@ayXX`9MkIC>(jL*eQf9LvzaQb-1qjAwk__t z8XodUjjcANKZNb$%~J4@@^s36Df!5z>GHhDkw(N0-{-Z+LQy8yDz+@5r{S%*(?QmH zt5IP0SyJOQYgi72Jn;-YPXWS<&p)iD-luWPBwO$>V8FJc2@*ln@90^Tdf>tbbV7q| z-hF$X>;5KOqF#|Bg>bs^i;Fr60^u>N73vN{TYO2fL;3TG*9wee9}E1xOo|T{#z-?WP`5?OI zU#q_TtoHhnsLYbpCofzP0RETt^EP%bv6ve1~q zK>%1y(8=8dp0qWBbW zhDymGZ>rKiRHb9I z%#3~6D(&)Wx&#jB{k#AM}gItFL4C*t={i8mbMt9 z+O;!m`;ZoU$zYY8Q3*+}(!wEYDi%r~uljhu<%1QQwK#{HgqS>0Np?I#AAvxo=49(5 zS3>s;ktrjb?u>v_+gO3Emb9@2yZnM%*VO8xKWy_l7kEU}6J9*E` z-iJy)ci0tnD-AVBGemApi)~41BO`vzm_T}NsJ- zFx`K@LgHDAw$nd2)z2U^o}#?&Jh-#=1e;Wvi|o#t2)5s17e#rjpV>i#WS$h^CGtpU zFU~fj(E>8Hy)YwAg4#gt_%F|nwZI5{XA`DAZ928l|DS;_M#_hYa|?B^X!OigKO<-)Ed3ngnyV#tV`Pxz12;T};b`t6E@ z?vK;BhDBa`m7fLk$2De5#kZJ$>`KuUwJdzx02xSM!mqq46zt)BeFS54lEi#bIN%-QPd*E5Va)gzJ7`0yM zk+yOKpA5-UsGFs8hnB55!9i=b24Zk5~+C$alJtxII&)NHwgsRdp-KnE< z1HrL&{lQ8aW06DM6v-$3U6a{u{^QVLMg(WFQR~NJ%(2JJdx1H(#qe&JwqKfG(0Gt7 z!d87+4sWYjR^{KPzvYzckvH^$Y!5s#qM&uOXtzFb(Pc`v=j{D)b}ORP)3-Ek*hr(`z+PlMZ(UJId$ zFVy>|T|+ObuMmuVR%6PMXH}dy%weIr0b7~#7dYer7x)sWH zgh{xMzNoAj+Wz=*(X*^T-#gV{wAPVU$dT*?TeqS`k&re4(Y>#O2f3b9tS?{wtnQp6tQ=ERfC2g4Fj$v z#yZOAO6@!%^}BbyUiAB;y|6GCk`wa_PsYN<02krD-uaP$5x$p(%uU*)S08%h%tk{M znM+Ofe|y{_wf&g?QB7Sv9OyC|)zq4EAGdz(36_5T@DtBAXq5i@u4+oR4Sd3@+y^PD zHagDi9o0N3gN@^IfqQwbtDHtyjgOrSu;({n>WyC$ATyLRL*c%k9#*bGmW4cAo|7LK zvi}LQP?U&l;tl)*VpKfoUHRJbq`5R}1>{j;VD_m0ZeLAG@G+y|?UK%&=D_3{u9E=G z!&8trRVT*Xoip3J7Z_JG2UohuvwfXAPVReyqU+F>UAVj!l7|L{iHEhxis^h%s45cK zH-Wnt2|kVqbEKgrAj_QpVLfI<;}$yL{uy%Tp5N^EEZa zAUT*ceGnr#FnsYRPzfN=V-~EPrk436laf2knLXIf1)MZ?f6Gjtv)d4~u+Pp+z@X@9@Zg!)Nuacpc;dmjGy% zYOMgKBT*4%)L^G%&CZe7b_( zA!quD+2O~$U=+VuR%U)vIzygWH>gv)1( z&&a6#A+w|zao@!8i@?92Z^=F|&uJ-m-v+tkbsNEn&-S|u;}vd|QUkV?S>7dk)pv{M zP?`xJAi8b11UvGpr;Kazi02E%G?tMWKLx+z=T%yS9XJm6aWg8+A&2ajVyDJ>>}Dn| zk89ToGl!|WRsE&HVSnbVARkKx{N?qq*|_MpfK|gwayEjGADV83&NweDmNj}iFnM{( z$6Fi77Gw^l0Vq$uz7F`vs*!<(dde%T1abWQ?2S(~ef6*(2X(>lVwVk>D#Rd|ow@5t zJO+U=BYuVGA~-HWqRz}cXDUkcgljqfx?YR5N7jz(@|XUyv7WE5w&G^rIc=b6 zZveb(z4m90YD&LObFFWVD;huv_{gC#hkJaV)LVV-(-J_lbZ~e8@Y1rg4pE01WzUZ!XdME0G>o^%6m8%j2GO1ZGy^8TG)5jO@q$YJm^hrxj?;dwNjXD6qE zdgaJ>8nRb(QPs+js~6hGPny3oXSQj2e)k94O!g-!?L35#s%mrr*q(0BqD=RV1q`|> zA1LBsrZ^pNCv1EFc&l~rho;R-e@W^1B|u83WJ=6UpLo4+Nu9tsATWc*SKpc*{e^E# z!}}o+$~2-ZMO856TY=-*sApB?p}7Ws#rWkZ#)mF{pPp@1ag#<#6DY<_0>OSk$AW4= zoqN{rm*?k8PtE-L?B1R-kF9h2fNwZa-%Xq7H;n!Zj$uHf+(>i+c;u?+)Re+@pBwud-&_X zBHys1dn^({CIcP~>TJppX8ly=VQO}97@v?LmA$ju!)527_WMhoX&dQBl1&D8uHX3g zyR)KqgF#CN#b>*VDhX_f)BbN5ws(uxZXX|XOM_8N!Ktqq$mtufT{Mpw1WV**bC`Zk zx8@nP5lhBKevy3pRz#ndE+F}xV1;OVj=Vu2-jrOb$*^>+oPLu2bgLnPNND1lDoo89 zf6~YF2dE&+p~5FA-#6>SM0b8ZmBb%Pf?P>OcXPP{Tct2asL?AYb!e0CPSbG_Qnp%` zdN-UIHX>o%+$6r+XI9A8{dffrRJ#Er!CX@*AnDUUniJ2Uyoip zwaD=^M`(L|-{2iFZ{V*zuoSA=PW(NhC_pBSQ`Ca6*|z=;|FW37bo@bSG>eT&|K^+q zecKJ1tmQ5=>z76P7MJ{NmMBA>>*4f={zDwnknB2n=_Ywew@nz_2eP1*NY^`JJ0U+b zwiT0R-?PrG<x3GO~WzoPwqq-vjOC?T_pU)svQ840;J`vb3~EL61JSe)`bf>)PUq z>NjYjzYLcRh1U55nRG2Nx5Kwv?po4lbEoq>*k<1*b(dMIH1OVi-D1ck-xF5-R_*kh zkV%Sp%Wh1ilrLZ16%%8M;NAwt=62j-Gz%8^jW)}Bt8m{e2~O(h-zR6>kSsKJOgIw{1(x4k z7Rvu|4|6l5P7Z3-UU*#$jEv%~a{l&RsWuj$$*e6)e0a&D;2f-dIcbtAr;ke>HDS`4 z!iodYro3;k9ZJ1`O{zTemNXT;*AKc#Kz=XUr-!|lyPJ6M`sIN%^6ZY-V~B?u-)MP< zs9;g-*TxYhAp$wvxtM-f@VL@R5@qQKc$Eq`7* z09i_1H9+KKg+e+22)Oy3sX$m+wW1O@3E8|})8z1;i#=^QWTNSXZohwWMK8smf7s@z zG`Np#t6VqfJMUF3*3C;ERGS^?4&R7RM1d$rylBHy$1#V!?F8|s;h!lMS_2;gf70Cg zddwAJE90!^nkCX>@_)yQfPJZd&EP(^JYCS8e!r%DSXv1V17)Lr9mT*_y1Q$wF}+#W zcVW>tWg6&)l3~8&wa_Gs4Qt?mN@1V#5KZ%7Bi(+JvXw7zMy@r@%sQY*By_Ou-g{!t z9h40HXrjD8ynKh10x2x)`zSBu2OB{J5bmaDVIradkAg|C^5v>2K!y|!q-XQ#mN8ce zBJ>WEjN|$PBQlI_bTv7sW|lwrDG1T{2b|wayb<0iHSfU-?N0x`roFVy#E!{s7#2SB zd3=nd{MM+M9dlbmH1zhs>1bXlL)LT!aFa_r-#gQTobu1k{@CB=Y_!*gg`;nMH>F&D z3q{`uq*<;*1%?j$bbVUC$aXmpBl-KCeGFFp-OLwvg)};vij6WP5M6C7rZWq}+Y($Y zU*v&s5w zPyxC&54AB1UE}E7yVW|k+M-#PGZ?aAR6G$G%A~-%{$d9?b)-Y<>Kh`&k`nP1y64Nd z{+RqL7_^t22)0H4rT%4>i6OLTL5&o%r-P02hiKmHj`dl2yU7-vDLeUKpy1iZIV|?~ zi58vRr>9Z~=(QAA03*&DP}=^Xx95NBjWg%4xDy~nu~o1`Nhc}X?QUWPtKT5}G^IH! z%K6xt4ZhNT)D$c)@uTI~wV!|vW5PxGPgO%lddbluPk0RS(p@%NB1$}r!$6(|1psHU zUJnq7?;dFY8Ocv0Tc{ZJxvedp>V>-P91yqr+Z=J*IXe2=!Sz8eo{Fca%?~M~B7vwU zChWgd_fELfvU;}}JV*W1;Wd?OIR{b~fRAgzLeG&J2{nbmg6Z|5(+5e8zN91s z%MEE>h-XylK>?p2xN*6Nhp(ORGN-C&6q^;aDA44nphQJgjYjI%5Bz@lVQ+Ltc1uR4 zAtW%w{4>^diR;ETQVJkJ`p$U6fKU@5>^o}6nci^@;l{RC@-nRYw^S1~p*GoMlhU{7 z3BD`|)SUp{Vz3?aDuE4`qis?z_Xk6k8x5adN}sa$kzE3gk*QDE+6*{`&g@#-MTiMd z&ZR=tFM$&o{Upx0SytLn3+y1u3&7qqz32yZ4xU7~M}szvESbL_E`sXH)^#>EAq$_p zuooo(E66*u=>DLC(|s2b?P7KBqkU^v{#8uwy##wRY`A=pR1gWyjZ*m0m{fCmg&(Lu zlUw8Ts~l7c|6DE$pZ=DS`am@L2DrsIK!08h4bv1)*(U;2#1_M*+cE1&5e|B57iH@srw z9ky)!NlZB^j8gcPZ?CSk%-I9w`_Q&^zosAd@AHx2l@%3>tZGF?FlM_k@`?5mn>%jp z7#4hou&;JkDaPF+#~LZC$x+RRsH;6hOV1Yr4u6DJ1ru}x*8xuc1QJh#>A@hVrVc_mjI2$8onk8e=>AiP>qAReRQoe%8u}^~uLyc;7R#6XR$c zs3_A72X8?pWC{L(2&dW8P8;)o*bKPFseWMRdfE74VF!GnUZ}}jZ-C6nF-1S%%UsRG zvlwtsv=D`*m`nJJ7DT6f7eF2951d)yp&SSd+uRf&olF)!)~yEZ+Y^hl>{Lq^ogwLY zQr`2%q=ni5G(X=DsE*5g`U@!?P}Vi>yi%7Ss@+Y+lPF9#!sH+8%G zeEFL8TduXwNaJlMR~BZPuT4wIl5!lqyvB-SoB~!RtXAYG_$ZDNaJ0BxY^VCDM-Znz zr=510y{_xts9F0tBu}O_WY^mlgUj0ae#BRxqI;Y#G1hVE8ZDH+GiOKOyOb?Xbp(=u zTcT`}p;=8criZ8bzL@O1T5!goCTVchP2lG(>VR&G20d$^WH@ot$!a{Kq7}ztMUJx- z(o1M=OeoDPA z{Icymsrb-!p@B<$?-_r1(CdO8uKp9VT%hZ zc8l96P{b)FL0zQL%??+417$s(__0xD19`6gMiSLba-)+*5*tBANS9d~I=)CCFb#*a?3Vm*u;yp4(_rTm0$sFl@! zI-3;RxJfP3thi#gmW?fv`31F8m3!5t=!j1R4_AZnlM>o^ZIF_r=i`y*`w-#^XnkZ;Dx}FxSNhGiGx}?;@MSg^d)Q}j{fxdDF zd4BM)U+SwKv|yz@p=irc*pj1OeKmg3?&M3rS{*!Wy!vs8cH&t=vDR-S5)1<;qvlOVi`^1#oVnXEmN2c=_DyLc7*+oUL>fMx~p-+ z6o#xo7>31^8y(d^P}D03x^EvSFRZID3Fw+JlA^3p>sR@00hZm71b>u+;_Na>P;yTd zTYg6Vz7~Y>z1!qrNlFh(nsb#Uyf*zzVvWNoUln*Lic%L4ZgndTx#!q*r zF7vSoXmsvkoNt1B_Qd6g2(!-Na*;eOL~sp`&Ox+XuQ0s7{p1LC`g(o_g+3E{%{XkL zcGuC`Ky0kjs>%-OIqs1o6RkgHic=uA9#>(1+`Tv0-Q2SJ5q$y`fqf-ik^V@t@&V`A zr~ZOCNdyA_irgG|<79u|1}5C6ZTLSufOz^>a{u=O2ySj3zW?yjc4d^EAS$o8l^!Z-*(-9EbLwm zZm-Q8o_!$PpPk-rH{pY)yD-lh=?~* z;FvYe*fi0%IHXb0_FtTBe#a>gh4=jQp8k?#{DYd*y7U!KY{|A5BF7eASiq*wI? z#UxdH9>HJ_BJ3Dw9p>-J;*`w(J&(8LZ1(~Z6SJ9Ffgd^Vm(%3l4UCp>L@eEGD(t7e zMSSS54*9)8>`9595d>+Op3?Uf|tM3-x1 zUO`0nNEFAjh87(}hS63`m_c{dyWUI15R0niDY6AosGX!-zwdeFMr9^so$cn0PG)uI z{bn1uL}?PaVIEJ8A+DzX9ThZ|o%Ds!OmUIs$gb6Vr@I}Kf5%xAf3B!`C}NTJhfwj= z=B9_V1ToIX8@bvba?==!&Q*9ehWW_+DSVPft=DVilQpd!`=9hzAS@ilre~Obr}U;& zU2G@gQc6pY55G??J0RlXIXM)PK_2XGA6JmzVb)?lu$C{^<-PvBuaVijEwZP@ zje^s}*`U$!wegqrS*BOfjgsVr>NKjZPoICqDC*4XbhNCDcZ|t7^82^CFt=oKzgJER zU5cK~NxNYBs~`o8eH17?n=$&F^P+SpITtl1+5`14PUzWKjD^q{gO!6Kjs#T_Ak3(A|QhyK7=D3JU#$j^k*J46; zWgN6vZ7h1lxizPagT_3Ge@G+?$BZpGUO60!7v5Y8{Q}pi84`ojE%=THG0FBhfwP1; zG40aVj9w2a+JTTHTN>jI8_Bh3#yzUZE1qFL@xmwB)%^AqFI0+;K=P~ljw@F|u;S=o z_xl%0jPz^mq@8b{B3zT^^z$J3(wBtdPoZ)p*i+Fo+HQ!5kG+uW1E-UTgziR0bMYFp zA~q??L}TDy=gt$?J;zSOAA!YTZWA0vgQq5OQK`j3lFE{|G)tw#6`gQ8P+CJ4GUvIG zJ?V`vu^_n0MO6w$S%NKPg^{Yx7>+9OhqCKqbxTsZ2Kj?25~2$!uyR%1+XyKX&Y@78 zfU3Wc?S99PdR+4|xT)4iLRWwlry{iX-2`4_x0#L4k}tun1VR)BZrcFM5*`&fIA%kH zs-P3L>zMRbAL;(p^SnnkWQ@oXJh*vPYtT42#XY+fThf>2Q3ao(o$2^FxhsFr>D`s*{F*fN931KnHb=G9n`PyU+e) z1HQT$0B-HT3vsy!fCq|6&Z9Lo01v+GwGOUS#Ugcj+H^Ve^-4yVi%5CT zw{LjDCWrciDI>+`!Mm%!LXb%!y#W9&(QSZBY#y^ z><1=GKq-8K(Qs-P?HwvvOM%7Cwz#53KL9ta0~hJJy!BOa-b|L6RkNZ{g5%4|fY}qC z9t0wc-91Q%#ufsxaj5fBZb6_<0(G+@1>+3m%R-3)5_ZBbXP}}UdW^vRG1DIk5wfV8 zT4jfsnQa`;C6btBX9vv-bf~iDHX%0y7e-V=$iui3U_cN;F z=*b<*2y6&JqW+3EGY2O%6xs3}46bW^*0Y$1Bq+oHb2=i42w9>q9{Scb$u<_`3fw&> zp^&Sjwh_|)iQD_mIEhiyq9A1CJzdft6PVmu#=!+l0EBa6im?u=t@H2LM0yd{hxE+Y z%0-gte=4hub<9IjDw~9zK*^1Y3M{wb?PT*sM@&eA5n1ZEaZ%ow2zxTv{dE8bs7Hw! zE$1%jsvAd?Rxp^H!npCO=xTLfiWBWl1(9B?k6a@YrVx2`I4a^vST;iF@FyIMZ9?nE z6ZosEfE}DnMDPsytrk4lfh+NTL=+eAl5`C$;xh^zv>KSU-jYrl@(dK6ZZo~wrYJyP z*Z#fn;z)nwMZ(Qi4Crcsp*{;UN9y!bxRV1T?s^GzP-?ANtW4l`TQmk1!pJ1H|JQZ_ zVu?8L;4E?~RUFM}^)pOJSNENmOLMLBU$%wbEeN_qv?S8~mSw9K8R=c;Y2C}0fB`8{ z+a7k{qkZiQMh`*qVdudgsa+}TDd`|222zTpUm+(*JcD+;!1wY>u+S{o9D~#ewX~0s z=b&Qj7vfn&a`Ka|INx-p0ndNjGC|831!g%4Dio8RVUpP|joC^fJmw0Mms`j=NKcN{ z3bL>!7w^j9sS&RoM9`C!9Hx4Cg|DQrE|8}rtvTvCoV88bI&8@}x{sz;8zpFcr_6}| zC<{DkMkE)dHoo1v8VvgQ!$p${<3z=UamHe`^3`Pfleh#?z=6+5W&-oK`zL#r6VXTU z%ZJpevv$mE0e4+SL$=Thjl@808`v3_;Bvql77i6Mkcy0F_R-f_j75)l)n-VND@SX&DTMTr9xG%fl(CUHuZR2S1!=wBwa;buVM6{2d6r9M!g zjWg?!v^E+z(khaax?y_UrM4+Dmyj|-&o+1 za?l(eHCKX2rF}Rt7e|c<2mIsLLN=!X?1@|f8Fu$KF9shoF^`QqRU|3A)ClE3C|+Q5 zEzZ}cI{OT}Td<236=rf@J}ScFD&Qm87V0Dird01cmlkiC-i9|Avf=9Ek!1 z1MaZD1bQR}u8+JSgb_dHgv&1OB~ru-lIwsvYDOV!MjMIa`b=4gXxGs&e-7LpM(SU0 z`i|-QTr6g@5Cco$@Nh%DyW&{IdgM5jm|2T@T7S5W&~u=#cTJQ%zw8?`)f>rI1*$b&3eQ&ib%Zd)Sf9^z9$?<<}40INdna|3xKtg zU`5&TMZbSIntgEw+tPZJx+~&bh1Jm-wKR?Kc<6MC0xddT9iIuikfw2a8DihqNa~dAd%&W=@_W{Lu6sjlv|D zy-McM0*Pp)_nq9{J$#VXhqU)l1woO7Y^g{ZDO7>d3KC4~ovR#r-Vf=kTpQyn5~9R9c-BsG`D=yHYG0>Iv}-I| zrPP7+LBp929>W_ya|cNguUa#3eVrb1?dUJZABSXWwZ0!o4L{(;I!c+&KD16cc$Q{U zlKkq^h|i!7)0nDFE2r-CLfJ5VmkF}qTfAH9kFUk|<%^1AT;Bw6njg{cm)axyE9-+i z-AboED^d=~GRZJQM8`-5XE%lx9F7*yj!iH@56)LjFmA{koxiPz+Su))t0wB2iknot zR~Q1O{jxfQJ&YOHH1FP>{0Q;y*(jkxkcEJ2FU1qa*tSV?ma{cpSC!rcJNY9N_R5sg zhT@DTR^ zSd6GYUw)&l^U?3to+2PNuP5+`Ji?`N@L1M8i7u32o8xCDEvtZKwW5p2`{v?@zuWMZ(;MT9)p z5byIlTSPpV(%w?TQ3q?WBwOdP1)269p>ifdCbK)GvFRB1khp!)4Vlc@%_3ug+?JNEF&1-wXY&^e7->&S1)ISvN_(`B(70OAV{hj%S1Db zW;Wp1R`61pM@SJ{fu>1`YZWYiFg1HOahm6jaB(o_4MaP>vTAmyE=b0)=gIKQnF{9)Xj>@$>A*(H_v&QEu9zW5Zu=bSK>_V z{JyQHpP&1R9Q+ z`w2xF?9Cnjx4ZmM~HrAEbym}pzIw-?;NWtf3l6psZ^ zLOT6R6-!FDz&rOJDO932iH2>hE*Q|PiO+ccEx5~vPq#yYC$9sCV3N=Ox^fWR@Jw1j` z2RI8gIW;vJR>T>mV?8}G4O5Cddh<0k3KsgF*{t@pE{bNN1m^Ce?ZvQYRu*kZXde*S zSI_;rldN&QYa#q?DEt+yJ+P*lXapqc(jZ|&l95#}Q+?C)$5Lt}vmfs&$>09FSCQlFz`!9xIlVYDdF=VLxF($5A@dTSDlKC?7s;;)E=LvNB8Lff1z!ju~x-un36M3CdTnqNhT|KMLg~w@rl=E-~!^)qz*60 z;ZdVDM33pJJcy#g*~V?Eg-CuOroI!B?2=Eq!aR;e%%Z~aLw2ecWSmsjg=(&;DERNa zpk{sFH)nMrAQxkXOgAOPVWYW*j%!9pYi!oBR#3nz=<}7UalgEvFPCCvyX0QGhi`}XA#btfm8#&09s10NR@zZGn}#Q9me1DJ)y`nwg= zpHq*9ACqLnr*{U+dUC_#e@#3thv$V%p~*`O40&duJJ8zxB&Im0Wyh@XN1lnJf2Gts%fiPEaATzT>HZt6Nc@UP(hh3?YifSPML#PckMtAot-a0o&5=tM zhfrw8=6kW;b0}+ZM*{{KTJ%B3nPcrlo@TfEcGq+f(?TCse_>hNyB&A%1jPE?7Si;N zh8MjASiu%|3DqIzr5#Ba;RU8B-fs_r7hT=0H0>xpX&WVO-#-!5e#%*?IP97@e%q)a zf+@_Zi#7WjuuU84%9#w1&DLi2kF%rP$5S7RS$7rZoA)woPE$_xXT|dMnO{!6xXf}{ zFHn++Pj4$DwTV?w<+W|cExRmzKBaT-TatODJQ>tYAu7i)wsE=u3SGyw^8ew1SH>rU zDBBoDyDygE)H*n0%jyR&QrTQSM)u;^d?+G0AjdL(!Qt7UbcT3$bkt`EysY3GpUqQ~ zlSC}jN}aMZ^mhpjO3v#ATY?qapU=yXwd7d$(eoSM>esnP+;8U7!L;@=kH)*?}RX&N085y?(GpGG0S4}rf|Mo{gTnp!0Hbzy@q7&CkK7H z7AL&c@nhS4$3b+p8B=$cRTQ>(n>9+5P{~0U#gXLwSUgw7O}Jw|?qpG={n;{aq>`Hu zT*RNr!=+pDli59_l%}3z7AL}U3(gq+BeorUiyDrpH0Kz;MU#x{{uirSknTIVZFFa= zs=AHYY_uHXaG5QE;@UAoodEqH;>^?93+`FYWbG|D(0XRQW$Uwyo-mRM9DTZ~HC7th zfDh~KecDWo3Wi{{J>k_7Wb%3$%iI<<+HO?OVb~*af`h1U4M{`a8t7|PjK1Z4k=!Ez zD-Mcye_HiLsl)I_YZW?N>^^;YjdgqeW%u-Kw#pl)W+I$O#3=(}j77o6Z`MB0jLW_;ADTFjUCtOoPZ z@ap%fUMkk0!0i+x_@3cM8+}RGiGFCw1GC=8nMrFp(ogAE1n`vcCQhDA*>_8AnJE~% z&a;Xij0Zkwyu>uUb$~S0i7`D;F~+yVt*V^D=i&#=em7_MVFh8TR|$3fs-jTIc;>EQ zLE@1@C*l~NO8ab19^Ct`G}R$6|CZm*)mwBzzJ0AWRbxE%B|{8XO)5ceyV@?TK8i=m z`RA{@Z)wp(RG9Omw~Gfjc8clTWd9Fg?;X}u^KK2RAkENgXi`E6ozO$kgqBbOgd!ll zgY+g{N+{9{9i>A6X-ZY9N^gn?NJptk5h)@dh<=;rIp2BT_gug0I{$>7&Cbl)Yu3GH zPwvT#Aso#TU66@1DbJvy1?pK0_!@{cSo54qafU-V)WQUcyNL+vf=i-Ri*q;A5M{KJl1LGLPrat3s;37q$pfD|z|S)Jjh?R& zOBxi)dG)K+=soYbEoBhw$f7I0eQIks)v%>c7H9~vmQjA-qlBH=b^fE_iKTaOd&{^h zD)YL~?t!vp#EQOgvsg!4sS#u9J!8hYs7G}f)#j7z_X}FDzJ@qnbs%sl^s%uS+w^}bi)z#BJ(bZw*v?>OSzs}(?)!PC2HPznZ zOEbokZEjP1;f?_pS@vch8uLWW&AMBi%nX(t+j44ICyQ$nTKU`@_|R_}OD)3m%+fYI zI*MwMXE!+oYpf1TO}O`+{Tt~VZOpFsZW@eeFTaY+bB@w7i6?8Qbz%E!I9mPm0B%;k7AQ4TZ9Yie*qHWytE;wCugs;mc6FE6 z^yBCzaZMxDH=+CcOX7Zv0sbF9Mz98et#4WGLc9VqkUJ2TnQxGrzIt2hgaPSINYSf0 zzjVk|#bLkC+uMZ9)6Ff&R}!HrJ$c#m8^1*h($v7SsIkSac~m2f3!#}Vs3i@@J&poc zOrByPNm)<0f4>|!Gm8>C+^K_2a3kFzp1MtJfCAvW`$#UKCtr>{uJ>ihdP@zU4Y5juv>TG(QQ%p7jUw5pQ2OiFjh#}5fuN2!pJm0nU`Wtrs zUcL9;^}lBhOl)F^4nC`h3(JVJlf3p$ad>z9XJ*Fb7OA4=s==-;9i2P+Ra~2U@U3u#9I~|`-#Rd;$`A!Lz(KDx*uZLo7KdHNCzNO!ycr$ZTd_~ICBbcM~ z^`<)CBWn}rn>@0jnAdho*K2+W7I#MNWx_rCGm*C*tnJ;Z*tfUkc+jKwD`1@V!S_Y@ z)#)BK`(^Q-n|)rwF_UHQuHQPjSMaRAW8rOGY|W01(N@3pU-@S6^pc$Y#o`&9FO2@# zWUW?CwAtsaL&evx8aD16pCO7y?sC9nxrH{b|8_k8FjgP81MAQ{7AxFFo36R|#8#?@~Km$z->>UPV8!KYwf|Fx8Zio$Nm}v?nlE(9T!Qu3cN_{=+r}G}CKVY4`g3syNxqU4832K|KaS zwQft~7Qq#^HwE9<-^)>ye>S;RyiC$;4zH~K#Z(^jsZ5hfUq9i}YB`bp~~ws?E& zf6ds|o##gBK3|$G#9v^pYDW6BRxQ4NA$;#m4>R`h=j6~2%;!Y696qx&u_cM7h>;E~ z`B_AST-Y|LUfBKwwWTxG=M}{WQ;+>ryRktg{Nq!82~dt-j?bKW{%D5*-^l^Ha9HA6 z&U+>>t)Z0NYgPUxPrdt^-9IC|A9$JS&GetczJAInr?Kd}PZjv>ebcWy0kZK|F6wE= z0)R*J&f^g)70j!J;M0#?{Hr37*Js`X-BJtnKYY%H5o_FAk`%$gr{7FK^)- z74(}l4X57rZu>?jo0F7f_Eq)kz9YwU(CM0xEweHvWzrZd&vq)U zcKG?h>x-7E1_Eg;o1ns-R;Ra(&v;orEWzDgzGbVeSv2bE&HiN?;4WwUXk>gRuHDlk zV#%sNYXXiE`1LJ&+)rlamxzAi1FFm~0rtXtUenZ85ea##U3shTx21Ra%Vh=Pr*s`G zUgp`{8(CYNdaTio4#vgx>E;v9>sHt1rFmi_giDG6cbrxraN!G{THjJ=cQ03R5bO)g0?y_b;p?TyQOPxb;zWk z@vn8C)@#NMbSUFrgaaWZedAmfjcTFQCKWy7vQ<~(I$daoN4S`Z=HYJWEbw)m>54avWGkAbzmeJ*`AAj@-aE5xKOmbSk^h# zNDE*)(?&dTjR3H%1r2hqQq#&OHrKj3J!lF=l-`8$d1Xr&XaP8(-Gw>jSUbmY>>?k!sgr260rItnHNVxNv^vOMWO|3fJ2I{>@ZN{fI^Bcxp92&j@FAzYvnn=CC9y(nt zZ){X-rBp1fPkmX(w?9nY38ep~W1F7`pG3*E!H6H`$&)wdS(qL3F0GFwvVG;hp?9h# zmy%eyM676&-u^HP(J`DEiW}GMq?yot#vtxbFNO0rlwo?D2k%A6%|&YKZSp&j?edci zBjt8^W<7m3WW7D*jAc@NGi*D^D-;X6b>G3MyfY|TJvlpu39>fRgBlfwR9OD0Tx$;7h)+{&i=Cb2ma#({0pQiZ zwts6SN+EveRGelJ_tm~IEv`R9qeoesECmR>ly&St+E6kOcqs&t2($~tiOi7@BErM_ zGtNN6cg{Z4QDZd@ui>k(9K|a+p%U!@0Wf?Mq&Ejx2 zI4aGQoRDVG!>BNU%6+QIN-hvlnx@e|sS&MMlfw#VfDP)9?^lu*#GRS8uSG-n2UGO` zj}+FK#TAmf1CgbZ!pUMmt+5Ho%}L9wRqlEMa#+i?AIZd%yJI#4Cjj1X)caTVgew^; zlc*9;Ga@t)S*S!9WS62OYyG}ysDkUCfc6!F*}EY0)*P0?&Cdi?t~L;$4VCZwotl2B zfbwOZqzu%7*LafNDAg%QB4e7bug!Jl$q0A|;hR|?G zXV;CaK2S-P1Cf|lT6!R@JKxH*e;}W+jEWvwVkrflYJ3^>nS^UrOgDvxXZi9G0E?-{ z>7QJ8%L%{kY{jJm@X86!!sN0Q()wt?n$nYF6mC`kdc>kD7Nz*K06l6&PeOptKy}$G zA(WpBHVFm?&6&-;hC2U}mnDf7oTf)f;Ie7yC3HoA{nbI;sdIbgMzWGSQ z1bkFc0(0#9fB1ZW(Am8Vo$o;CR4R~HoD*xg8T&-0A>WL?Rpm7^E%^RRqMD*!krhGHX#bK}y9(A~D$d>EkRF%)h-_65WbBM2FtRy}G0K-3fvhgxOB zfdE$=cAULWz^umNF%*u>0KWjMRjf&G(FL}^)z-0m!ma0s2W1`loh! z0@E1mheZ_t!L3OCqEuoG4a81HelIsFzYkqpu+AWMo`#85lbb*@6-@+VSQ+&chqJ*M z3jctF{4O3x13eLX%;o?o0Q8&!K+mZF^qjz0zx4=*0o)xEjTi9;BKyCa`>Vq6>T$?5 z0ht~7H|UYI_TX<2BDpt@F%$u#tolH&8H3G4xZWWXwPwz zyG~$n=1{9A6zsk!0R^Oqg;6=*<>A9z6XlcFeS6u%b3pkhg@Z~n0k_0PD~oHD_l*>n z6X?Fj{%b*C!xsvV|3?@te)!Mb(4_?_<|DNJ(qrzAzp8X_u%+-kqWK2BTJ|LF0^)bQ zv>n~bjwr{_wF}oDrzML>p1GBUL+0fO4JT)Ed%;ChEuZ_FZPcE1# zxzLY!5Q&dbx-VpfhSe{SR647M0s&_~4)z`a^N2$y_N}^W9yv*TameDZfQMhGKKS#M z7X&Xj!qbOwQ*nQgTcUZpMgPZbIi~d1#l`bN*Q5z7JC8$KciUdA-YsIyJ306FS>;Bn z^i0Pvtc@|mvApsEeZjzJNAGO3h+3K8eb%h%yCCI{kxoVHH1$bqzs<{VIH=*@=^l$C z8^vSxtBI)m638{H>V>3CSsK}tx*lD9@X;e@K_S8*3>Bk`_w?ZyJCK-{a8%2uQ$DRX zv(+B(&8iB@pi5n6UupR5W780nAFlI&<8Nyla@Teg?3M1*#mcgxjm0+a5bAgZuwT3 zQB6@VOQY*Z0S=>o}Z>;K@>W#eQlGt*~5|@k8htFH;cg9E~#`?kzG81Bo+cl=X&n7;;^)u`*4<;6$2E zDd-(#%9L=S95$F?1o&?^8+m&4EGmFIKdqjR49oj5u3p1DS=|Zv>T?>k|D;c&8nlwa zuti1-c4aiU2Z0wBE}}+R#aWkv^mx1dJDepZoH*`Fq!Sf!NMa-J9%O4!jPfnqo(SZi zPmcznYmh;FzZj=KVc=7kN;df|lca;}iHq#)ob>n1?XIq`1*OT6Pi9=;78n6^p zkK`X_y3qwegF`BgGO_xz$|o7=@rnm=*lou5+QWA{HTcV`K~`E~+;%u@^$pu{hNstQ zwW?satO+|27*r;%0eeVQX)IViLSv=%X|3rtq}*7oC38P0BoHd<7M7oB)4nT(a&1ag zL=6Y!SMFeaXQ7|;wGRBrv=$uwqbC}vKA{6QT3%*&e%A5&2W{X)S%Jf3e+F_JBKvF( zojTo~lfgP^Dxe_e@;C!PDlnIPAFT&GZ+zc^glhBsB>$S?AR3|VM5qRRLh(~#+zIV8 z)2LJ>UTLf&**!wPse^_a$Uen^|oU(9>NBDeNpVT~c*f;GPBnyJyT}_E$F}axdKU*=$_ei<(x-k3l@HF$FzN zcU7|0_6DcNvq{_g2bbG`g)#aH)3x%f;3iMU-rnNynBT3|jX$->PJ&4zU=>)=_-C#? z-iH-mlwHE%C+G7v9pHi8_U*ToPQOc~?!-NtR3|RqnqCV&49{;pv=QR_hTSS>a@4~j zCaYO=NA1wqKj)zQ=>!!W47}dvG@_Grx?SmDvLALhsu;|WqVn!GJ?3-iL8$W5ha+wAgM-eT=KfG;kAlW5>gXyl9CJprATOh#C7Ce4 zDs_zT>>PAGXoyi#bbyE6(4Iw@Usrh4hBPN=0yD2r@dO(r<@)Gl@4U`NX#CseM3#4b z>ibR;&AFjYLXY$}b6iMEBbPso#p;CU=}aZwjjuD-e^H>5ME5oPdFda;%@01jEI}~c zfGm@2XnY_sqBzJ#FgX~gb9`F%>ms*nLtp}CunAcaCV8j zYD!J2bhmV@J z*i%n$#l@j4wM)%}ZnX$^yxa19h+B2_X3}F0f(VZGd{-(}yQSQc+*(@x6*Og;vb)*l zRi=7xTYjrSYG+|=PjRd}ISan>hWaTMT>E>ab`iRk`fO@$yz>=_ZNx)iYhd}UXSKfN zW8^n>H%7k5f6DR}a-4jZmEu|}oYns`y|s})DxiD}oV5E@3&FLcRlxY^`^)H!hEpfA zZTEx1tl>r=_R2BjkBAXp(P_C_{PYR}v$*DR`<*zz;8TDWV40^$}% z5{%kXpBnQO@~bGd2<7mp=w!8SKlXb4xwba%dC~Wg<&m;8R*`_x5l+g{F4%W+ZI`Qc z!Hm61;v(U9OkYiv{=Q}^`%H{lTOxMv>&Sp~c2*F7|9DbXoh!SrjI{L!l&M+PndUBSPsrX zn=T0*mUXXDHufuou+Fu%E>!v?BPemvD8rz7c$DI1JUVx0=;fl9&h)z(y+J}*+RZl( zk#=v_B-;!JlqI*SQX*(yDv9%3-qHC7rw)Mg5n8XnLQeO^z(GFFu&RiEp~qB*WKoAl zXBx7fdjzH`PR({7TZ3*f7977UT`xHe)Cy_WM-8{r6sC)G{2G5T6ETdX``w2OX08nQ>0_3q!4*AEYxV1i|HX&v{mDH@lLPakVPREVounT_^ zR@PrvP2eeddGSrYmC2!v6Ho4}jCx6K?9>jiWd;;CKm5LARqUR!hS&lXZg#27S@)G(0gi*s2=PRnw@Bp8iis=%S+JA(T!rb>wNwsA5e&3 z!F38jX8W|LM#<#s^w9u5((-_ygaoy=#Hajx`2s@9a?neJnGy@BUY(x_mo1Lb zQs=#OTPtSwuvfj!vR@fdCG({-!6qUJYVBt5skYX>ykfGNP5ACwkO?ciIkcfsGR3jH zLB56V4nSc&u5GMJKxBa^*wiiP+tOH;=EQ*%Gri~>Dl$~NLBGS_Dn$-g!j4uzID$=q65XSKp3Vdm^Z4*53smIN|@VdhpxbemR z!rO$O{p6~VkVV#D4ZxscBP=KU7i*-efn%L%VM?xe%WW@P6LKlddA)+jxhDhX*ly`u z&sXj`_Tg0`VwHV4GUT=5em(rpZi+AbFtw~mE_pezQF7#D>6DnU(kK?w!spJwhHI`J zwQ(fZjn3#fL=C(WW+tKqZZyfzaiGc4Wn=_ok_l$jaR5G369oe@(m2Y8mDxuqB zbnn$}#Jxth_J~$~BFf)zfsBe`(tEy@iHde~FOub++hu)D7|t;-mgBFH0H?`ziOjJK zVClvx*hA1jGsQp_>E=-EX7q1hbXKaHLqo?(*=bSUKEfKDT3$|&Z%iJzWB?Z!ylR6> ztW8{|A%B^i#&f7Ypu!_Q$PIJ2%bbfZIs2SLm*pR$aSF-rnI22#VI`6WoY!; zO^qSc9U<0|*PYo^FEV!t^({5u~JF3W+LyH^kOL!`J?@<$P5L+v0Y zyx3}u4n6?P&P`Nc3PW;^%nGdAaq+0iI{73VG2JhuNo(O2;pw6$pLotsh?LA0vn^ITYe5F-YRaeldHbt@I>W*T+9%_DqsTO^o}TT7qEh~sF#i?BJe!YC7II=@Q z!gV^e!@s(mx=C_G_w1f~5pR7pj(#P7SB2Wy@+dD#D)H6K(~*i~Rp;ETOmm?NM>`F^2?{U}Mfqy2^2lGjp}EKBz48WaOQAk2+Y^J5(Y{Kp0h1LR6S- z8~NwuVRNjMX!1uh#+v-hGV}@-BQa=w(V0Ku*lCD4TYmZ16mv_2&~dyP$UVmvyZ)!H zfRT?l3s}!$E((CjqSJ&@1G~J)9ojjVE z=^qkJaNJ*+Bzoy4CDemvS8D;D`k^;LVqH8lJk&} zMRFNqewYk{roulzoaWvRe`{b(t^Lq)IQ7GQ z(R=mq9Dz<{`|vR%%`G!brShJDmgi?)b6DCG`(aDhIHXNBi8#hdppADtGD zF27mQuaBGQUVA5Wy)~h|6SXgG`mQe0_v%R|{8bV(_RpU6?~57to}>+_Pqa&iW-IEZ z1AHq>5#Z#62lqX}EcaG=SS&=RZL4AO?(;gQUDl%#t4ZM;A2y<rdH}zWyk-?53cj^^d&T({DLabMP5gaXGfeu?q-=RP zxJij>PE>fg%w^GC5;SmQ&+oK4?6s-CZF+5h+1TnGRfA*N9ly5+B#QS)F0fKN+r{|AGIYWF|!T)7!-+{JpO+0);+uI znKOdLAt-%pMrqSZdid{cI)g&e%#~)H~{#anY{M=Q9ZjWj>bqM|smU4dAXu zFz%^$y%#y>z1J#}jV0Z#cR%;vg#{|c=#|??9&bkwa+tb%C^w$AIxL26UmB?=)~3L- zsDDgbAjhQUwd~}NQIG+))}X2sKd)ouvdPxCn+8A zTHjxUF^YXUbumaImo%mi?65NrB%C|89g&@M_1^E;8Vx!x-=!4$)YA3q*+Qpc(aQDc z7Iu#<8Q$Y3Bbb+l-_O6*$jRz+8@(3w`FL%xod0$6s%zD-vS*ac_M14YNe|O(|Dfpp zM`4MnRI%R28}Xy#EXIkjX{Do)MBT#f3O8`f+&WLlIy5gjZ<2i0NPJCS_E*6t6>m6O zzQ(%`@tD0LpL7i3ITZtU!BDdX{Q4tHpPO(Qr!;?xt1pq{1@h#O0%TPeZ$WhTi+1J$ zrP@VX|7}Lh#Bk{E!~{$&Svtn51n%I!>WKG`e%76|1tfa5nuD3Z1+2X&_^-%bAp(%9 zBxF2l{i(g#tZg#^716wSywwan@bHf=c_C12*pe(-U}p{Qe&4~oxz5h6xL0Y|mFxxY z=iPL;AAR2KA5v#4Tb3a0CZ?hW_jE#2D73)2a11?6f);JYN{uRvH@#hyw~ z^ad;DO>qeLGl)V{0<|Z7Y&(AH^w??qY1`=%OXhblL&WpPd|=`ZgY7{sSX~-|#Qdcd z$n7l;nD|k(0|-D0g0bGsN(Du~nR$j30xCjrGNP7Fhvp*|m{*i22tjk3&i-S16hT0;UG zvSH|JLS0Uz`Z+Owl958b-n&xWhc%pXoP#_p+Z2h-H0s|27(OPl=#86OaaN+2`Ax2E@E1wTmR~I5qDqSLc%JPWcmnjdL$gEG zJpuMT6U$4o+ko6w8~IFfcKbs%WVhad&xQ5JUrX6?ZrZ!uQ`C*uPt;>Us6mU0EFwEl;R-4{0S zu4#z{12t9|=9@ema!rufXmbu4Bt@EQ3PZG|;Bl$GJrlZQ*$2k=kFqKD?flDWh=FO) zhfY+zf>Xlz1R8fgGZeQ8F`E0iO&Oq)90ZnZI+@P!|B=g~t*jjdfS2M|f}wE9xL6dL zdoc}zDFG;dA^XB`$QMdXUns-?XymE+fDJ|W0N%P7@8{>(NL8-im8=SNxX*L0?-JIl zW9TwvQ43vvci6v0RCe-asOqB+H;(r4G#T$~22z{_a106G@-Xsbz%Y_6fUR2>)UnS-dI{tfYR0EYDaflhVvd&7G?#S?A@kD{eg6kS@KV)wI+(77tf zq~?qR`7t_0A3EUI%_(-9E&^oD7vBggK`c&{Rx5-8oAV^3VgnCKu_`&Q^ev_nG=Uk& z-o-cxIFY$l0v7TOu#kB)^8(o}a4);WV6;=E<+VvnD{sLfCXeophqwGVMq;YOPC&Fd z!+y5Pd|(<=Ra0O=QQj2nB(T9b0%gBeX-=!zNM2!I^YZ4~ijzZ^S&OisBPSAQ3!EJX z0;~>9So{_u2{@et{!m_+js$>80<{dBj{rsAo=icA^;Wa=7XyJ9-x{Yz1%whqU<^9kvn-1@7<$l(S7Wyw*|oSk=;z{* zdaO>0iO8}qtEweyPT2$+&e#hhLA$vrn1 zdIEGF^tv#zxE=~H5MN+of%*&>U?4T^0i6jBsfMhPx z>je(cLChcHEeBVKk>z`pf{>WX0_f6jN$v(=uU__Yus|vSn@bk}6YjH~OKhT>+%+Tt z8wg|ZUwoSmidKgHw5NGIJ;b$XX(UbpH~TuGf!nHy7hPH^#PwUrI;fo4vj)m90FZk2v`$4^rc`(g&|}@VgbRxD$9J60x(pD zSexFv95gVB5C1yq$v=+DeLVzkaM{c#=-Vw6H(BzMpo5gyDuQ8~31cwiw3!1uNT};G zG(y*;0I+Ylpv3nE5&(!Lg3aMfw6W&Bf)sP#4qP1=R^x!H6l`Ad6_2a@A5+~cr2z$8 zN1BsypTKc$>L4e5ve^bDd%SL7AT@aX2Q5{I{$L4JqGD<|`w`);Y#ICIgALW?gAHka zC!7OE#WOk2j_c6hUN4>&89~Ti06Sc?U!}oCU1?9`b;0qRi}UkEbkiDYGt!anW3eLP zsS5;eyx>&@ms=)Quych3qN<`8Wczm~s%=Ip4hHrIHNJSkYw!fS2u)+EbVn}IgIi6m z91zMVDxtAbzwus)&En-Bjg@|Q31YY)A*#YpKa0yxzYt}w_*CRTinKgVlb1X=3irg3 zPsC$#wmHp*k=FtgCyZEEra|9P|41pFcA*z-#D) zKv<0)KWI3=_I-?8rS;$O*M*afb2{fcvLcjt#&0x}GQuFZt!ZgJelygQ?8}kzA7blV z$j%E9=YulYR_^wD({d>!w=Au?i+t*G<8TXfwY`V#`Z3y*u7yUa)_*D<{JKyVCT}WS zG*_Rwy_fq&np`L2O{a-9=QXA4tbaPG-h=oHmi1D0E>dn>cfB%ZqP)wf6+clyvRfUxjIYgeKE7Q2b)&^wGUhUa~yV z%u`1wMA+xf*GFew}vx%ehTt3Y#^7c;}^m7 z8(|2;-Foroa;bYKzgk4urb3^1?QKbtiLEL+>nIp+3cr4}`SaA{R6nw%{zK2-kW-~F zvpL^zo6YBkL>rU$D#eU$i5uv;O*BlY-N_W!!%2B6!Jn&l=XJMYvu2n^DU3}1od$fw z`+JSuN3eDIp*Ybi^er7jdj>{F5#5xc?5kd48TK~R;4sq9cQ}O`Ictg^ugLXifW;;c zZ2T%J@Y?(32GfUjxM|Ar54>*s$VUhKJ_q zsQgp7YsBvMnDaES?~@+iydlLr$*HUEd%EQKvjo4Z_OH#h<(XAut^N+`T~3ZKNxs$g zBjqfGc{r%Qm)JDSzFr~na&Ek;X1c=4*RjgD`(H2f;Gv|$-S=#l{6$xP(FwksJPGde zFIiPI{N#Fe>z1eLnYd!axIjn^kw+HUAkkZ9PSZ8FxKW}IvCx?jetQ?ktp@$($Paxd z=5y+UF(FYM19EVyk&fZ*=qKl`~b3+K8@$vc;P9fy`{loT&p7keL;AP*w><-Y|}jFCxAdP87VvTxw^)yP_LD|TG@do}f0HO9JLIwaKl%QN_i=+S$GDIgs$^eAv zmk8xA5vCv|KCWCBY&IWgV1Nx9Iyb3vo34IUb*W3xgbl674yb_Ougsjl{(qJuS^h`osJb$iRISEs{f!q@?*+M4cIXjE-;Xmun=(eBK}!y1Q&P4Z-sC zTvD*7EQBQ{B(@2Z=--yaV#L@D6NKcugJZT~@+7vTGz3sJr7c*REh!lRWP@T#%VMkt zibrU&5ou$TKi#gV&YL>@pBnUwpPC z`SqKIv~K1=pY+%DK^40B&D?hebeE0K6tXzd$aMJYOB{`Pg6_R${rcH2#|?l0?v*U> zyJVO?<&SE?SLPAQVxxOh+Hzu*zP;`h0iHWr@Gnnk|J7(4E*Wq36vz?jzezH8D;c4g zMm9AJiA=E!m=7uDo0P-~shZvhex&)j51EA1x^c-b)03=I=^Ncf3o#<=LLz_)x|v{{ z&MuQks*Y~v`tcAB>K42u{+oSCO6nY^V@cWcs(r~|oya|I)6a|Vk?!VYvFvqzH*d%| zH8A;(8x4yXWxO#bW6hoVh@7SU*@RcdAj3<|NX?S0=@gy{MF-F*J;Q7lw}Yk;sc(7w z0gdhy0~)0QG|EBDCXm2k1nz+kg@iToQJF)G;ADDb%=gD@`zjU4DniOZm9bQZj8x{L zYc{0N(&oUT6#J}N%PrYf^d!jbfqxvlbh~$TPVXWt8R@Q zx%Qusy?;r>>^kzQZ}a`EboJ^pkdmpenuC8<8$XUlAOQts)8j?QKF| zxO`{rc1aEHv6AxG`;44L0>hx|wElOrNJ^g&GpTu(_leRRM64)J6%GcYgiqh3i^dgx zP7^(oHqe&0oLk-V_fl-~LifctK_-}9j!sBz{91A@1yWq<6c|fhH3LhMBp&;xm2G>E z17eJD?!_*$OyES;UQ-5F^E(4lt#6#i2_MUrZgB$U-T>I7@K>r zDYwi=jZjpU6Em@Y2>R{gxahC{kd-rjcY-GJZctd3{apOP!KutKNro6S^H?Z5OSIp*$;L_616zpkRLO45n z;y<$-F3_cDp<4meUNub{bm+dxSrJZQZ)(BO`CVAC+Uv0Ils?MZe&DAmtmk9eF&dR0 z+gb=6P7}qJR`>Ix4N~DN#V=Q$f|xGc6+Tt1ruhT295P>#8b*+Ce7>iv;$K@*Hb%bl zO>OXtbZV+MkK^RS)D$xjj;#KybiiVx%F4#T_jWZj5uB=;1&p4D_TO5?I~gCr?=DvG zD495U2Teg&{xGjJz+(D&zp?VRtQ97o`{zGWQyZL?NKJ)zbfY&+*?PJSoM-hkt_?aO zIoJ<~4_2~PGtj ztZanh+3&~7jna;_x#N6+jO_+XIzEN*YChQKwV;67+ABPz42G14*It`$r+;%Fl@;9} z8_URxTj!!%<0-)=2l;kNvv}PId%|p6$otviL8qyV<%33Mj+Bsvq^sK!dMB#|LK@<6 zn%*A_D}VD#|Khd?tC9KT-H@16nBAJge0Ne_q9kRK{7eE z?0yhId&M-br-v4qK9uq!QcJdn*3N1nrHP`5L{snNEorLaN~?33WY34MnnNF+oIpn2ORRT18zaTDMdE&btYYH=Al`<@#E0BO$T~IPaIf=Gc*d(D zTqRqR&&s$Ke#j${_U=6fxJAZKI;eXjRa`YZrngz2c!0TfV5YZ`cWF`>@Eu6mD++8$ zAUx1raMhLH)gc?2XJEqDr^V2d`=;rmQ3(cH<>U%A`>r_LxM~FjlHMH@4mD0cGq27cG)C+CmI|O zVM-lXC1|aiX1tl8tx%{;^>=^2w)u3WLQyLJWT4oGCD&aXCWZ2?S97$|P5UiY_v7){W=r};7ApKp9B4e!xec*K4( z;uG-VkBvM(`wv+*#%F_*OF@;1q&<>3q=(AM6RQK}nVl>=pWVV$;%X%wQjes6ay#5( z5kOrxw1JZYQxq5pjjeb(#5BY^`Yrj_b~dhsHI`fattX3!h{dd>?Li$^9+hcz?+~3d zb0u=l9UfCfq~0>Ye=)K{yAL>EY2B|r0Wk9GPhAH2r(u&-h5 zns2-~{n*1U6nv}FrZ!3T_~+^X*Pk}idA80W?VE2AN{6vlN>hI!2&({8xkV~{EpjY`gK zdvpt@cJ~@I?IhrXruCL=EfXmebE)7D_JIuxBk2H5!rdpnAwJ#3vF#s4em zDybm*-wUUys-lgIG;D*^9PPav{_8qjTfci;{=NZ@;z(D2KYd4ERWA>3FHc8Le=bS! zJFX7?E?kn5Fd6ZGKcjE!>*1%QB(BXRDk-k&G#k7RaC@Lj;_uw{#??MGU7&; zFiiFA+#K!w|4(|Czc_%>`tRRg*X!@^aJ^u?*(ry=OGJ&oOA@~ag2m6q%yAI)0sA*F z&UUYK#jLC|b)(rx>kn_()Q98W@lupe zY-47XqH>*#uVjPqULfU|?oKd zsvt^YA1F4C6Qm3j0#ag|Rg(C(B%tm|j@UP}wV1O}8D^v`J$P7o(>d|7_y<;y?+WC3oLj)oDT1=dW1q_H_oB%$VVPNkzfAd@(Xis|8+0CCcwiKx-QWn^5rN)S2{ zP8uBB1W*C!DbekB069QqTrt!L5^8c9DUyCx;@&BsY8ojb?;!L)m2|HVvjlO)ke1a( z#TTQvZ1FQU$ssH-*4`C;L>wH!)xE_P6(3Db%o0KlR6@>WyTyfw=>g7fLZaevGzf_h z;6p3d5naS6f70B=;spu%TuAvO6$+@1ggb+vHjv>WG z0gwslhL?ie)=}=q=;J9nHKsA^AMU!ySkppF-A@tOUYnL2D0HH*aLFfTEK+}|SGjq^f%2yhnw=TUq9TrWr|r17^)h-uWQL0*aM z5+Mk#XmZHyeS@aB{d97!Xfhts7>yy2GK7W;O`O2Vy1*reqa))&LCB((hnoS~AqkPd zZ6$DCXB%fxhLG`^x)IZH<4rsOd8G5n-ArnPR#>3_UTymwVp*tQ>CV+{JNYI}a6ltbS0>o81SzHT}Mh)grZ1WRA z9F2EjFWuw;b7Ol$$cbrLSl#2^a7A;cgTQf|I4)F-ej1GqL=^(!yA&kZ?I@aIlCsvO28Chjip+0-3nB>LG${$9dQbNy-OHy1*+sCzuKn?b_$g2zIs3tiom;G za#|U>Fl878+DlZ`>=-Hekf?IK4TjRC(53FYgxd?HAd66?e@1g*Kn7r^D2a17tWBtc z8OmoUSjp9D{tVRaH_OnC(=~7UKHY7N;H@^sm0d|DZu)b>SA7T`&=4jN`-a`>j~=1E3DuW zD-{}LPHRx48m;`WepYbt#8O6uma$y|4@9SEhzsnOO4h*?0Ka`QawTfE^|xULR#=g!R_UMjOyk5Poy4@u_T<0{;gm^uUYz3?Zf@SY*z#WOAq^DR?q~a9t z(%F8ExkhyhwJ90^n`Edi3PvSZTq&+md7&;$!7t@|Ne@de-3;xGYQ5$iQt7M(xp6Rn zra;)Ap9r(mN!q4lo}z4i%1>yXp}hcT>O2W~mU*vCpAV$%L*!@$iUc5#>17b^<|ZKF z8Wj0>^U`>jnu0yUC^febWQb`+7My)Qf*5b-{(n)G-rl+T; z#LT5+zm{xZ=aP*N=>--~`r0cmP)-02MPY+XK8FS}q2wW)MQ5(JwU|o=f*u?>5aI6L zu9p0t>WHYko9Nr>QJF||>=+VQD&@K)KxWBxv3fa2c1n@Cm7=1!bw+Nicz&K;Ns9k+ zgCf7hb_y>3=(M6DhE3br=*Y<7zP>kCs~>g+4h!CNZasPyDW>bZ42hCHAOU2rc(fqO z_>O0yZOiF*Vlu*5@#A7>?}6eP%8OcaT@>-VsPl~rr*=M&6SteHVLlyjE=-tqzFp#8 zBPQ0kxuKu!L|)hMR@>>iNhTdQn#?N$=UK^X}aeY8J7qZb5ag9 zES3iqNBLsBf(n#z{~|SBnN*KuWkEr}tI92-66cZY_HcqjZ{}|0V>68<;_}vHW;OPn z89*9dD|XmK5Zf8}{s;9fp##UXsfT^2J{pJBv$wexO`o42>b-cbT`fEDJ1(W|P4e?o zQ|q6J`8A~lpBY?CZQ3SAJ|_W0OfSyqUoNI=9TReK^m1fSPh{$Mu=Sz4cH?lk-4&V# z_EzLx9NbQxg2#-S>Wqe8Jed6AdMZ;kQwrR2-^GJ#Qhm-vYi=}&VzB^7nZT#k_^t;KEywr)k&ATTxpC~PH2weD9eP>8d$ikLb zF+YBG}5rm3clzihrn)rF~;hT1d8-2B{p5-J!jzT4ogd1YZ+>;n!nQ3*7bRjRx6^_ zAXw)0LPwibdGHoO5Nd2Cs7Y4#jYZlLbMLhrY>8-j{FJyvE-C3&tryJ%(|t&)iR^&s z7dx&h(89&4*Eji}_KGYKZ<8NPX8KEaNDOTo!VHmyfN;%8HQR>bo+jRl3Yg~NgUPF| z;=7xBWVR?*L{_P6TEtCwQ-5Z(b3!TFv}VzE$u;RmSJ)|ou<#vz77{dEUVMv1g_Y1w zXsxSsH?^%nO#5I(su#t?smelAuSVxu#+W^_z;0hgc_z36{0ZDMmgb-zlDYJnB11fK zMdjY;y;Fy#JmR%!y4`xU2RaN~^==_C2BQ{AsA9bV-AWIvZP~_`1-?lUW8mNw91~Ss=qU2xtc%RH}I14GWF$!zSX_U40Bb+R3%ifs`4v0 zu0P~au(E4qnaUznXM+cx@mXV85?O7AOFf>s9p2#KI_~1M;lZhB*GQkl<2^=@-d-t^d6Urpa$|F!;N2d0DPq5kaf?DxUP zxmU0!uops|2oc1GK@V8(`PsGD=CXtvFMM2nh(?_A2C32ICpb6!r~EFWv?1}Yp^ zB)8oL^EeIP)zul5|C%)zK{!B=$WbO-?9k}a?;>e*i+Qq7|!M$;oTOj<&pcx>N*>;hkx%kdE)=L>_K1Es-9=vK2N)b z^&hMCcnpkp_}({duI`MvQeQ{^@oJ=VdEswQIRBxcn5YAu`Rz@zCsYSL$Ap+99Z4ty8L)rwCn? zJ#pV-*w<8xrRc3dDz^!AY&(1+M72%-S5sv@^~<}+>bukBcXHC+UmBygVFKw@{=d)z zypSa^B~RG4;p$t{=vNK<92&YVAGqRlrStOh)SR@46x5!h{pWeBDpATN%E3zgO7E2R z=puC!_0H-)-j|>0JjxKJR{f-swZCM4oO?oReCuALobqZGE}FZ|lDxl0zuL`Q^_|1( zg<*-*#C-K}^$qG=oF&dVz&>!a?2oe7XCfHRy%kTQtL4w8pFO$S6A>YJ@ieON_1W)p z;h68w6NA}J4~=WC(eK>4x#MPP&h|#!wN*E_R_n3XdDigXAD=q?s}?n9k*@57bp1W{ zZ+F?Yp-(u&4{0^@>2&ABp}G)jed|8!Woh1*+iS4ze(5=p_~Yr-lk0w1C;y^usOPyx z*YncJpkk2aG|-{-(DXTTtgNoAeQW;sCd6isO=tF$I#+ND>I#nuF806oom}I7#q&Wi z|77s59=|+rHI0ak@f*o~Tzf#H@9?q1Z;~sLU$jNFo@-~dXDz#;*=#&jZdP9NCge){ zZn!Mm8oq4LNct{KxJlQYpt~I%1+DJ4<%o-N3e&B%F1+16Nc<6zggEx^@@hj(vmuIzkO{G<@o;Jain{n<9&mb;hlnTngfJ6MJCtsL6+*8h~- zaOJ9SPp8diE2ExiJodR7c4P2)j9N_S8n3CAHg4y`ZS~J=o;VKX_)6Yb&@4}u+f>mP zHTW|0)V=oMk}4cp1q((P^M$?0$rruV1zSSonJ($Bg=pJr()v)!Tvi zqd#vyxh-$9=fY;q?E^X9c-;Ga)BNuC9?zGj(yKO&pWoWHeS9>hm-tGM$nATm6?0YC zK4~`et|7XA!BBGAo~UIP%p$i(Rb*6^j^scMuQxkhO@1{A#myz5ZXh$%1HcfV+d#M+ z(1{kxMA@|^8d5z%Or4VxAy0B}Z7iYlOs znqCMjhBSj@VZjwr5f~C$1R??$EC~;gFj!v{WfqYndLaSC)rJwV`i<`ph(D62X6JYTZphRyUmXSljR9aI5o z9g88LiXO5tpVWNAd}z3OIXZl7x8ewB(ro+FkqOcmG>|q|@|dg<@@VuBCJ6X516dqX z?coa-wE-5xRNE8ph<4;zGJ{xlQGBLbl#@F>DuhmAXsu z7shALaAD9d}g^&o%%UcL|-H?n#%nvmFH4Q?p z{dEmO1CR!BpKA~Tl3n;*Mi`yRH={uA>Q4g!E0zFcvDuK58Y3Z{0|uTf=)X;bz|ce# zICJ>_w&;*4938(dJTx?8{?O3R<)4{Cw&K#mMDcHLWfm63VvrdO1_5hiL`2{Nuq1>L z1KN9NI4lOiq|^QVX#qqM22Y!@rlaHhxu2U88nfpJKpKb6{6v>YBM=A}CW473U=T(` z0ukX)GA1E#befS76NAI~V+bGV&YSy}dYUynw3R|zIO@|L{xN<&?LhyPleq=|SIEy2 z{l?@UDfgSYzNzaUY2Y6rfAg+y>iS0-_(#a!yz5_6m+V{>2$lM#+7VEV_mxX+^UC9y zL>Nj|MfscoZ3kZWhPks$iAJasG zKT8uge#{d$8XBUY4EL`EaTirt27j+aGMuZ#g?wfJTow%wps!B{K)~a1ctA8DI&dUo zJXDU04uCTUBSF>u7ci(f^S@zGgX42NBOCz_bpSrj1^F}@YmjinR4&2&8ba zn+wR$wN%)evEQ(VB)=})y0GTD7k1~f7c_%C{$(cQDqvPim^0UAaewo>ZQzUVB0~lm zMt*K9m%VvGu35)d=R1cNizkJ>b!QIGU!-a^C{R-VuoJPJH9j^njQ!#8gICm{49>%6 zel{YF#VD1y>oABsiJW+YAa&#H+uGs%k#TD6yVjy>-oktf_4Y7#Z}v_)nmaHzbM`Tv zaf+EQtJC+-_^)gkR(t*X4vr0Uw#Hx|r?7^{%C+k!@|$Fo#wr{xSG;3de8Zhj3X5}_ zkQtKBwDsN?tNMiYV6`)+ZR3rHxS`jX#Ng zZ=$h)hsRnF9r?U(s?{R=LGIwxqwmJ7Ur$oLx$#)RSR1g~`o?12v1Ozl9gvbu-@swW z33Cu6$>e(63D5BLc_OT3NcWNNL09F8z8PYB+*AIc?{WA;j>x3Y=LxM^m&Akc6sL+1d72(ahM)p|4h&rS z!h!H$CAeZO*U4A6HZFFSi#P)M_n(^!$?TyLwv?$u@2X>S zZz;X0J*XLnI#9Npuxqj}7%jThE!4D`idM|SNRt0x%=;!%c9X8tIxDvJ!>*9JQb`nL z5-A3(F&WJ+cv|y8aLa)!anZPK!Gk85Zi>gu0<>Lh(vR0yzH0|-+?DQULO+UZB(G$m zD#@Z1-_bMnV+b#I7-eGTD8DbZtbN;wJoc$!JHK2>s4(J(0pUH`!ta7e=kD%rE1(WC zkhJNeZ+-at>HIq7y*91i-^hQ>ep5Z~OfX4E{YWP7iA4SL2feiW8wo~kP&onmiwK<=gCFVt&NacZ|TOAwz2O39hv&PNMsmCQYP zAHVQ50wV0${VuHVts!Rv5ZvEkz2cOjJ@z$EDOpa>{oaEE?`XbT!k(h6*nzhac?Yw_!WSRz-GNB^zqc*J| z(^9lzSnHG~)5B$mQnf8nB7dZjorJ3$#eQ3IXt8qh-qXmqmN53W0$FKU1;@A(#K#`p zO!aI`$q6!EhaGs*o3|4LB$&I2R|cljQq@$~Z+(*Amtw*|H<4f!(zqkCfaY{AJWC2? zazEqipe5RWaftfK<(56&Mhr`#qut%);+!-^jgE!FNwT-M7yH#_85Y|gR2i79Bm0Ve z%PmYGAf^*o60tWk@;rF|GViI2py9(pdueLEJ$>wM%<{?e!| z8R1LtcH+y3Rcb=glO*v+C0%=Ia4eXAGEqg``AkFxk5PLE@#~C9J)@5&m+tyE+M8dJ z+s8Fu?!V+}aFL{a-hIJJAK+-EdbH-g=e+lt`i`&v?Cul~Ai*sZ8>3{IooaN6$PaNa z|5z2Wkd-W+-(IVhWb{qi!F$`n5f4QyZhlnS^L=j4i zhtC=}*&LN#LZe}9F)nDPcUhnBEG!1ldcl1W_GZ7R6Zm#b(XQ#(^(RO_xG;XDas5H< z)V&-LFl7@91WW;8W@U2U!IMgl9n9u|b5QZ}a~NR2R2^JF+DF9>mU2eeyHasp6Gku@ z;o{)tY>IFJNv5*1gQ*(ARSyOdRVtXA9oU=%6-*uB=?d1D@^qC^``HBiS59l&Pc*CbdK1Ej7n3Gh4kOD-xV@X|Q#$<5WL;?m`8Vw=#2~ z(!=rp5mTusctDpVZTv-7i`zgp(~Ys-I(7XzlGtUv`95&69fe{>(F%8PMbPxVh7{1Ygu za1IP`{{NbP{r$JW!_?d!xnAS;bEdE7%hthJ&C$dZ0h2_yTbUwMW$t6Zw5-fPHinmv z4+AEPu(GfOk>ljRfZexpbx}q*OE}m$I@n+13FiOC<-jB*)PG}fe&>PU+}!M(A|k&Q z)Ya(KCV(vtt||mz27rH%t8qXAz{a?VbrT&M3lj?$2OE!&_6{LFJ|PPgH3=;j8!rzh z8wZDgxSF(pu%ZYDhwLLcMNMr3Ljyi3b325tt(u;J&UGUY99&$&+k{Ma?l9>HatP}D z$A4Fi03I6TArv19xeFlSL7;e$t5$##>>~=~=ihaIum2!OP-K)FsA%XIH^BlGxBwCa z3PnPOqM#rngSmcSK7fpea*G-+e&e=^3F=)Ze2$>F*Jw2N%bEyO`?hI0A2|o3V-VgU zA||1`N6)~>#Kq0S%f~MuAt@y-BP;hnOHbaR{7SRC|V%m<(U=@U3<=~d{PA0xHTGP#1+6mxxwqQ zq15jy?2*?sqMO=9!$;#n3Ex>;W`18$8P-othWE}=kfCE~O^ozE2WM`^RdTo_Q?IRz)h~el?Ha)Bq zQA9sTU0EKyVW%-aweIq~Ds%39M?1ygafKbn1&q~=PbXTa4R6k~TXsJadtG%vtWDrw zLiMj;{6EcO#G=kb;U;L^(QO`3qKf94FXL-d9jtif$Ca0eh)XS(Pji~8-f{7pEs`UI zSN6$$zQjL0&9DYCv_`hvm_sZ2j|$kE#8Q*n{g2KX=5$Xf!;C-R^*9$CL6ND5WRJ;F zi!4_Jgeza5$p*z6&Bk5sTmcYWR{$G@6<--5^$#9dE6Yq|{BjjB6#PFPG%K73Y9I!v zShMwF-#v>vvm@)A#^VY2EzD^#aqUZSE_J_+L_6kdN1t_1m=GK5o?D*GQ@O7D-t-N_fs8ALl=Xwfv~{22-UK^6 zPi&1D*d@1lKk&|O{|r%8DVEoed_-Tbv+{reV)-TlVE7Ln31x$cHVfN5#WNbiw;6&0 z%^M_Pu}B(n?Mfzw8VHyCHGzcQDW$ueoF*prPeZzeox}*a3d3Q(Hkjk%Uy>HGAx|DM z(wtx%p!E1xmyo1PZe;J-HM(FX<6MpkQMQg0tyIbTRUO|yY~3MhtK9aSS(Rp`|h>pP7EQxP-g=!D$Nhqf$nnOZ6XE^vCv z*y4F*q`v=XE&Q==R7seM4i4%=Yf@IFqGTxH?MgX-oQn%q_C1|>+1P;FX&BQLcJ6lG z8@4RQv65fK$R}Lso@_^33!bfA0k^Hqr@F~kC?};UnQ$~kDu$OyFi9l&`H~hc&#T-0 zo@8cXOp$KSd+X9337XY2R+NfpEb$rjT8!1gtc{wJ0&!LQ{*8m|&xc)DQfx1Z!h;nS z*OOv1TC@=%H~LKIL^|`$2YuB2n_F!2$#30aNJm|15iz^xaG9v$;hTa)qop{mk)xY= z&MuluF9Vw4ZZ5FNcphI~RI6^zlU`IhlN_!T`|3B35VA?yC#b@Dxib+{+sK=)1Va)p&@*x5 z@XLQ#tBBqFN9V-9o2G0?rt{TV8ZU<2TfIGhHM=;`y-9+j^(%flI*y zI(D;@MC=;17W_98qsmTZ19`2a%)TFoJgoDJo5=IEuraep)6Am^3dy)W9>nGUa;TwD z=e!9deZXk>r$Y%mG2%mXQo2j*vd5Cm`D0_^;YVq`$frrKZ=_m|++n#74Foz(mr;wn z_P6qmOVrGwWFz^u&Z@OWCi6|7sqEMnm@%11QEl;$6YWzR{8;h!B|#K4*$aOUU+Brt zys@HdQ2D}PVLTJl|19oHhMHCz9YZF{|AZCD<|=7x6-=SjR%j68Gdtx^OhNvrx_JhjY{be^F2P`^SC8QBS5` z$(}>50209H3B2Ig@5dLWElzt{=TYbtP@?mRuQH`auWGQUojqu8d9~$Sv$77z_B6cJ zLYnP<{*6N5@uw$C*d&ZMyxP^b16~2Haqnwys3p=wc)km`sekj;jlsqhPAcBR)x`Aq`9uD^^&gsQ#Ee;A|JNXEu5FTGBls0 z`7&`-^a?$kp9pP$_t_t?zqoafW4_DiY86jyl3{(wu&)cD?j;EU=Dmt$pAX~yPKU%LD&z;Xo- z{U-6o(4&N^9bADJ)3vFpP>>{FQ+-tuK$o%n&Cikig<{``O~YUo)E=7dFM1e{i=dg0 zbTbvh8g@&yqZs_cKQTXZ7j@Betoo*n;1xh%UBV0>)qawi7aCBTJ3^H6p=wY_qkuP! zNLr6W{u~~&S>!Dh8ujUw6?w(GM|`oykRKDhklfMnmBV}o8=`1#a$%+XLcTVJod3#@ z{FyoVm+#!Q0ZO`V)R{PZABm6S%R(8jTFnyqemq9auxFN;6)`I-3pdVyXLpoRUU+xc zU-YzEraa8!q%lrMGnP`=!FIWLZ9}AbMr}?*QKyWmHY4v|zCHAq4Gj@~3&%FzdiaCY z;pDx=AIB-2Ro}~+&X@b%u&?lhYggVibQ-x^+>3f=`*4}dqcTGJbC&?-J~D2=i#3D1 z6+@3K`a`W~M3uoA@fbVm>yw~Mx8>l!o9(}2USjuivPC{6K)!iq`K`xVsqm>W3H5X+ zXDy(vD#*Vxo zeE}1Fwt&0kkQHElApgy)zaFIEn!yr=>j$(FcZ?PlNl$bw)LIY`jHc|xkjb+iJ+*sR zl-+W-0D943dei`k0M_alQNP#mBZ$RGiHd*u7C$_zr-6^OMoju)4Zmz zEn7Qzo-|uXI;18O%~z^OChI%Gz5Z*O6!EfOPiZlB``8uGdYUxn+>g#-?)-yAKJG}y zb}mPty?w8>wi1zC`hCwF>Zpt}i8_+9|B+PI)=qV55=k-yz`ooECPmtRC{@V&sDCGk zPR|`YI~^($`!m!1cZ=%Z;h1Cl`zaD2i=TLI>3Y7u8GggeGH$@}^8|63>hUL7xo0d| zXHQaTAU4NR8jdZVZUCO$!xX!^<*cvK3awcR`!^1Za7VeDZZam{m>FU25GjYO z_T?$#V*>Eu#9HM#)f8aw2Z0nt#_XEUc4UVuM;UpktV1uxJ~BUWC=@X#a#!Lq3gMXP zActY*)-+3T1kIjv?~zSC;QJuY6un`W415@^c#ZJX$O-f@;+(F z1tg#qWB>K(NUEn_>!nb<9N8vEH~Snv)f{EXieY?%elOI_IPdEO12O$rH4GV+JS$iu z$B#DJ<68>KPX_Mw9*3;AcAcWuh|zHFcLa)ZFsB}SSQ1XHE~lMe-oGSbjcJYP3KTTC zV6;Mg@K+|95sQ@$pynf3^fs~6Yo8VK+#6H7T-|1_8?pbE6yxTYyhnicFu-$LR82po zWt_$O&WQN?*vv|zOE!sHepkThKI8|hK&ZREoQ^2qb-}5y&AL*zy~{X1{<(LvN8jZP0SOvTivHW zzG^bF`!cSpDILtTCq}LjSCLdt=|k!#+IH0w37m zRH!WPPa2-Y$fD;L$+`lH487jSo(4QC+K5-ZCHRze(!)d%om`Y;KkBZNOOD5@lZ(m0 z(jhDzrI~2l3ip2~l-r~xjYWtNt;(%4ci-NaVi|rlXv(~Ybp^06ri%MzGV5sCMb?V~=j$DY$K>rwp(v*j=L5z#oEh_rg-o2(;)MXDRtE5OMVSEaslmH@>uir` z1m{dnNoM`YAAa%juvNMt%1N||b|X+2aQzALpZosj_b`y&a3w$%E(xWSh}$3g4bXFt zE!2{c9AHl+E|T2#q7dA5lz&z;kluo6GvJg+oQW;pF%q5RIM$JoeBOz6i7Vh=Q8TlK zmuq>rZMxKcbY$9j>Z0#!;wwH^6Bo8jEg06%uEMvrIeHG1NKighMvBn=>)C|NmGCxY z0yF-GU}^1RI{r75FTghfyL3K*;l z!V5xXv3AxIIZxR??o+v+*PPc#kpS$-|D9N=N}?+vh8gD}-6ib|kC+j{UZV`JxT*YF zhfv|mek{h8O^<765{y!x#nQ1~elgyu-6dmC8TGY_*oth2b&@df4KQ96p_34~b6S&!)ngu*q6}r)Nc_&CKs(4j8V2u8B zm_8;*L3d1H^u`UMDh^G&j3t4$x04<|ph|*8 zO@-}=4}2##je6BRq!RB3iY_n0D^jEUFId+#(uZh~7#gntZI`Hy@quyfoH-co&8RWI z)ZmEGSVthzpI>A!P_#_+qDswd++wBpK8SmCtMbGQN-PpG z>RuG5lR{WR_`%#FiiG2xMqF8<$U**pVHEx)xAvzir@xe=&IET=<4#>WCC1c-F@;!J zw7^0jl|ON@M^mktz>6mbmfCh!=A$@>=BR9c+TkayI(@EdmN!k(4W=Q_8`n;Ut^lO* za9>XAMgTFf6nXo(7+FK;0pmGu(YLHsdR#e1($9@rbI3k;7tH)xv8cgWJ-`-#>uK>L&wy|q%8U4%1YC?;YHcE6$P zyLU@@4D(iw46kvQ7v5zj3rX|y!*Ue4Ym(#HFPNzG-20(Yz7PqpAEfxUgj@imejux@ z49&Yfa#f_LGl5=4M*MUaF`Qpw;ZO1fUT=9tW#akD+fC1HIOD}=qMqB-?hqUgxa#G9 z*h^pv`6Sy|OsN{FcP_Pu#vn2P^}!ug(>yozV9PHWU(qz^k-1QsjJiQTy* zU)dsvJlcJ6Z$4C^<5(u3b;O*hBJ{nEu9O|+#MApBYg?L9zhr@5w}8|+pmdh_?Rasg zX#YmUJtV716|F%x(XLvF#uBa_tohTeaID02Ba&XWt@g(;72u; zq)2zo{PVOb(L|5nyya70Z_Q)}3*reHLp@5*bh&dmZr}fU@AGqmqR9|6-eo$MaT}9A zIcc;tCOWeCk^5jPrS5>%{5YQ0)`^o1-)x7XFQYk77>y+CWSE(F-ndPZhZd#^AIl{p z)9`zEW57OW>-H5uBSv9h0WB`d)tMSkOwJDd?h-Ffm3DcfUZ?pz2M@b=w;w392<#c* ze7VSFq3v0n7_wv`^WYDm>yD>uC-;a@G!@BaoOSc`|5=Z(Nmh zLXlR?wENlh3JCANL1+BwFOdQ4s5vsD7b5F zaGJ6xNXn<9aVVqgNdSrdngC*(4uuNBq_3K*Ca=J4S!g&PP_d#rwpel_#`N%MkU2eC z$Z?d{RGf*+te@TdOV0a5q)%~7((wb*2O;^>epqZS1(R$hnEu_59sRT~MTaZ=sr2!( z+pGiaUma1tG2YpF)evTrA3+<&B|FGOHC7-1+0fUDNRe;A*VbvTX%5s9of5hX>_y2y zdX9e@S~H4^_KGK(lxqI+%||>#FRYQnfEbaUTcM%zyBLxCRHi2*ud&O4)qg6_|7&%~ zpYPFe`$^PfEulRk)U};?dQzXq(I0gjI|rN&O^!;IYLB;upv4sgDhXj~rQ1lk8IgqF zr4&siap?`^U#w>pg964liV1+Gd*PeW)x0;mz-EJ62WG>4X4!saREW#;xBKlg3;k>% z*s_=lqYv69VxJMgIewKKI)*N|A z+*v4cr}w$~s|r7fqj`S3V0aShmX=Ze52?4Koo=v+huGDf^w5)Dl)Qujrsvf`!Ab$% z@ipIIZJJu~Uc8jc&ogS@Q;?>&*W&dD;`cHNGn4A?pcF@#>V?LQiRoFND&LqSK*{cO zaUf`wOh|+l%b0%7U;bVH1i_-tRnrR+qi~y>$F-?^fiZ3<8`#K4j~X*VAR4@m{W`Dn zoj&;)(TT-S&lLS$!4-fDLz&9K&LDZsv*c3cs7)ctq9r#iBfVY!o(hs>Aw#qYlBWm2 z@{7|)uN3GBT0L_O>z`>5rnnYS#y7xKUVZiP$A?VxDUZWFY24cvl@*1pt$*D#$Mz!i zt14SUMfDpD1Rti@#&Fu>QmjikPeBLBmr~U;oG#Jcdx}n7NNMh{Wskl6fby#NNZsYu zTMptL&2#4J0aldg43tvpx@?TEqE8#bFX`#`GH=Rnjo`2TmA&|Rs}bAZCx%5s6|t9k z!%N6>U3y6`AAf`I^FfJ>F!R^_;P;*<$v9mWiH*vFOCxJy6iG};WrE_l4@}3*ofcZN zFP4NLnmZi?Pl^q*?b~FM=?b>g$79XxZ=d#}-kz?miy_qOXd%#1-VlbzDh))7RIK_! z4OMI@C3eU&#?+DxKnFqG9CNrs z9h2^Jz2#^x02-1VLdIZ>H}l=JFefJ8}% zEl7wrH}E6x{gSEy8tz<{XZQ-Vk$CuKWy>g12>VL+*l}vQyUKyjd!R_wCFE-nP33vn z#`xwg(e=%}H>5x0E6pl6Gq9>uZtiaM#UOjQoL>PL`&g*5PoxIo$5=kN#Y+s0510Ag zeqB_QIcTqT!Q_-U!7sI63=%Rkw+J2&K@DlSeTs@^Pkp5#&y+c>*!L~B;ZZ)p(!Ds9 zQ~ni&bF2!Ff?SX1$GLvMiC44o&OHk+i3b`!lzXc5sp^P&Q5^|i-xMqe>GrPpQ^l`% zcooe*e7vs@pNwfDtjFLWmVn*o)Q7ct#@1&gT+A_3nHcy-iH^CZ%58JfKJXNB&Ai#1 z6sy|%4^>?M?$xz|{VOL4*J9=llJcE8UAK2p#7?!+(kJ0UfG7#DeruMP6z>};#Y;>a zZfY_zw-VkLji-_B6xIr4&y7VBII<~C*W_@XIc%`lzvCugbu3&6GbR|IsU}lK3BU*& ztb0WNNL(}C8NBA^>sUM0%8!`#C(#S&IOyz_Q|}fyDn6_KNr2J3wPBH=tZi zac@U;T+if<1p_VFc>%wu(oW%N&r^$GHpS=J_swqkzH5mG$t!}WARW7skb(xU>)B%E z@eOSE_mOc|Ky(G*%_Vy`ibiF(Ae3ky{}g{Orahq-Ni|}wVp7%eKz7oegjrA%rIHL$ z>oQJEQ$jGLi;nW{u1j(y+ul~RUmj8$elV!)8TPNrdz+lo<2*W%g(j?5S_JK{+v<4n zerh$=%Ds!U%Wd9mPcM`*4s|;d^Ma%40E#5?7y~t1G-|c}K+dy}dye|M%e&41VO5*NGwMB3Eh^DAy}rknkyzp5!<`%Q8Cs>ot9fa@K<=pRTefQ%z<} z`#|s>m+%~9i|ssVl;oB&)R-_r7FyA^&J~VRHVVI*J=5I|o$bogTPK=)N@N{zZ>&%`5Gge1)t=y;PX+O z_UbLH4xW-(;w^KW`TOIa>BC->o&pL8>)KaaPu---LmfX3N|2=ElbtUuGxwI;aj?=W zMfa1%)_q7Xp6txT7Mx{Ee|YH87xWYo^rM=O&go#W)&bp!`wDonfrv}^9N6ASxfy@M zKNEU96oYG3Sg>+?9PdT@^2JNn;yX8Or+=*ZC<)4g~@9Us~g8w}$Ridx)(ks4(S+6NJ{amFBkOap@OXvCn9)O_GX?|)lM2Q5qn!>8_fCvN*NpNg z6%X`U?|dn?&e^~ZwUr@(1S@*a){@%Mw~mp&mwGEt)@vpx>Au;y$`9S=f=uGP<$G7; zp5m4~*5VuF};Wr;_mhAgQe~6j$#&=V0z54br}4^&sV-Xx|$7t;{W6cYP-@ z(n-`}=FsDW(sw+}^RQPekq-l1LdivWC=)$=wNiZVf849cD~b*5lZ5m*cbs#I5!4A5 zDa~}^S)jZtcA^$v4p?||x&+-VQTR|Bd^pL*YNqqZDG~ARm3&L7HHi@6W<(XP^c#9; ztXQ$AtA zIpnk-n!*4T)atXXcu6Q&-O1P`n_WnK8ZldR&T1u6?_~c%i$W6n&QkmAghgDmD8?4I z;PO7W{}9WsfYpvtg05q@y&Lmdz2xcEp^GDf-ZF>OZu{tYdUoE1e^Td=4dNxm@SnDP}NL9r}Lp_EFhU&C5cf|!zBczzR zd_u%1OyAj{=XaPm_9s?XJ+mnm*%56Osjz7xahb4guzgt;Hb5)51M(KO!$wbCmDZ#5 z)`xcqBcwE=Ub&PK3J26j)H}Iw9ah#-gp)PDH%@!ifBL~Rv)d9{J|rY#kvo>Goc#1e zL+iDu?oxLB&b^9T&t$*ieGV8d>B{u8QF^c)TF#mC?L5X!AR0GglJSrnoSKzB0(_=C z4JZH!WPrvzYb^cv6jr>D^zcBD^ML-j0ZRIlx8LO##Vvyss;8vois15Mm^9I%IM#c1 z?z0QNYOYnlo2NAJSb=sN$|7$y*J77ee@edxl6B>7^5q6kJSN@NB7m4S-Bq1d+ss(> z1+|(Fm+wqZ(gqDN`r>bj=jqdLF^)?9^NP+Fi&Pah)T{TJgr0u z3UbVhIa>}m{ggP-zIK;63+WvM*;@-%;wjR1^dZx+$CC3`hBp^S1NsBCBN zz=0nxD4U9TFUHDcG{$-iN@hwKiovrD0N(Do7=XmUxlQa)9U(XQBfavvUDJdxH@x7+ zqo$3qk0cCGPT4-&@|V6JwM&1r(HMX9Q?SC&Wq^QJW^CINYi<`gZ}m>v!Z}(frzXF@ z*jfBICnrWBlwY{^!itbSNu`n}Ua}*prd?@KX-02~{^K*5ev|EkxX)1p#n_5YX>-v_ zAVr%~{}iJ*u=p`y=8^sdXf1`v9#fh%!Y!*lnS(9qlg%xxH$1_I)J)qUsc>3*?92gv zX}7y^1Y?#_=L{CO@l2)EiImwzg3}owpv@w9txAzl{v-!*P8U6Kwei?wQ>N@J7zvta z2;Gn{W$I@VOH8pST;@7E(_W8%0j!}xk_m^^<4TY*|dGHK$Huq@kN-9cuQh?QC zf`!BF;)=Ou?UUZ*F}3K_+qt{Yo#I{QjZ3mSL3JPI?U4_u9#wBWDNBudG`pg18_fdm z*nxLmsza8uC^{xI>7Uk#Qhczg<)Enxzm5NLtwUl%FY6SP)4%L1*q^m{6j}Ml`y~T7 zkdqWqX(Lf>yc3)CYlp#bz)J6ORgH>ODBORc=zVWE(=!uP$stcha8kyE{yPTP?dE~#EukU}a+@X#$+vwBT zGw#k+<%G_2CL&+z*DaCSVNKH?KbY=uLB8m{&6y(?<0_f|-a1i>1Q0D}ZYvquG`z0} z?gI74maR_G)ykrva9^AgMjOl>O{*?NgpmcP2M6AYaKncvo+*_NQtdjh&ZV$~jkO3x z$g}cvwTPNlMqqvKZfoK%1m$R@2YXnC!%A4k&4Qn`aJj+|jnnyOYsXr7Bn#(W##P=# zvC~Af(i48@9S51%(Nh{JXbVao?}nkSX>iv|aeZU+c`KCHjH4-xVtPz?w&9J@a!+VY z3tmbeFA#D<=1&khms;b7rARI?xTW3QTj0N+8|hD+iF$mu>402yPP0~3Qk6H!>FCu{ z+Dqb&Im2MS^D+&tLb`$SaUaQf$sqXv&m+;NorS@H5b+hzGz-eC-MfD9a#k z?vu+md1#g_t#CXgP+X7}G*d#@gGZq_WyC=d6XJh9T~CyQA6F>SS6sV*L+WsR8cIMpURedKEB(%bPLp?$R}-P>PN z?+D&EiEN@Fo4?KMB!=^%?Ucn{*&vzN`*Bn%Wi&JQxl%yE<`cSX8lweucZy{=n& z%APhd!A2Z{L`U;!Wvrd2DFeZ1MZL73v9kaQ5kBObr9Y9G+o$1qn=gl*C#lYF{X|cP z27{vcpruF&s=Y|y+4hIQS;{1%g4&S7?11Y^@gfzAwQP^esJ|T3$?+jY?4q#f}XYlWNqazh*7wQSJnIzDhznxzHj`_epD1*$6vQX!t z886PzCHqn7cql?bjKZdH)lf0UC5I4Y*p&rGIS?9j9C^WMfZDPq`oYulp*W5kja=7s zC@O|<`wf}l4?+^tXCF75Hdzk6Zf8sp+KX=)6uqh)(``ZW8;zwpXFzKFsoe~FlTNf0 z*}=pT?p=r(qNXB7gfl~qx4>enY(dF14-*scC-~stK=6(k6EE8-uepE?$j|%D*a*#v zaZvd$SRr#OX;#S>F|88caW1#(_{709^S1PJhti4|h06?c_5Q)2J^G_|tgjeL*ywiVoKDCgQA;*P$q&z-aGR({Z{P;@*w$)iZich++S$k`2ooX6NxR)DEcmGox z@PDj+|Fa7FpS_2@sZOK{T8N#&wWsgTDRyWdF(<$MIp)G{Abe!hS!o#!l;WedYhsjl z3Obn>Fn1qjO}Lh5=FzB@1;{06{x)!a> zZM^PDlaki`@i;DRuAlHrN&G_`C`r@2qKGZ{< zEEL&duedB+gEkHFqra|7JK$JWNM*ixCf*8Jw$aSaXx9>2vdxxo#-c}Fhqj?mv%+};9&T})bLM7bTlD~t5tNo|BzwMM=rU_(bs#KmT<)GUCYtz2 zC4=YTL5CxM$E@{H#JMDW=)3(BQIERp{`{3lGgjSg|Hq~pgC#WI8Hpk|&u@-iSVFa0 zbZNa6Qa&*7s=~HN2hyf}0tCkKLk@EPW6|J$rStd~BFDeouFfZFCC-+yc<-?;(VZ4u z4T4rVp<5DEU9vgaJ{0AUMGE42dA(F>+9sins5>4S$ngQ35eIaKD`R!mvx6OZ7fB1} zAy#~(pSy=psV=okj4Bf8f_bBrxF5`m9@Q@+wNbw^Cs&tp@xMwuV?yf1L({*LUp-#qt*6(5jLi{tL$qwAIiD*|C>3*^tZeQajEUZuGkZa?Inwp&~uv@de- zIRLSi;u?C3wE8p6Ey?+kl|iin74UM0AST=mSYy!*$)_bqfAs8xVkqBqI@wJ9P6-)a zF?#gv=lkD0wA=6@YKjAPT%*1p`EJo|ip^grc~}2mnA80;8vcKs{?lwvX?Cq$KsDlj zdD#53Ln`3v`?Y8HUtQgQx{&{Jkyhoy;JbF91}#_EG2lOaw*T<8zRvko?!Ji&;@bZl zrX{B+DJ#z`gK%)Rurgt}?_g{8(=lBV;bQ7+<>>0*OvV4R!Ji)U*Hy0lrk!0}B`i&x zso=1OCcj=e;lH_FyXaBzaPUxZa_~{{adLt0+*BOg;3pS96(>J06*oT*6*n&j6`UQ; zL&d`ndfan?2|U+c_q<>MP7H3))0~?VPQ?Wl;sf7#xcR6!!3Ma&hIn{5skp%MoLpdo zoM1iBwVs2ApNj8iIxm>Vd+m8|55}vpvbUgu|27rByN#N!7puR0m$WiBM}R>Az$gHE7;u=YhXc$J;cVq#29t!jS~?>TFqfZR z>1r@nmKg|sxWhyGX~5Q=3;B&VhK}%Il`=9-Y_$m9LyHx0CRykxjBGg zcdnh*VIDBY>$n592y@rp-hL(<{HgEP;Ql?D%E}4~5)Xez^Y4L#Uq=H_{TT@0XVyPK zl8f_iNOEw&!H@|2RNQ!kJYFs;P7o|k5O5A&b}C+QHSqKDP;r4vl$#d>8w8Y#A6x|7 zJU{cfdEgkl;BRq(i-Hea-#lO$POuGbZVqt4fPLWv>v8jgeY^e*a3R6j{{sAPOZHy? ze_h~z4g7B?tHUfojLks+Z9&wmK&&l5#N9wRLG&>|;7vh#|nbO$^mXkettd>1#W}u&`%aFR6H0kaTn9;cnM%4 z7;?eH@mEj1K-gQk3m>d{c!pc!!7veBpR)T`gj5yDOeW&$zUd72VX7l-(GD zAyR|Xgq#{E$Bym$yHG$KPh*nfz(z1eho z<#*zqj005H_^34??6tn&As^FOH++2Kuu$7{yhR2WTMBI%X%f^su(k{w;mm{T8B z%@KRQiAc7ERlg8r0=&NC z7YkIN^*RcJM`Ifkt2=}Q281jfCTRa2tz3B z+tsE~r?FlJUO3)4Bx4VZ!NY7Ec@3EOsaHPUCX|kaVBCO(B?$|ft%x>Csw}bVl4zbxuOl3r-0CY50Y)F4UyE)slhs4L%P1WRM+ zTkNGgv{gba7+vdor5v+r-Ba^a-a~&JrMJ8@g~=#{_&r<8Qo4=KtEX<}jTB#AZuNgA zUt;JTnn~Or^$~bM-Q{+dtOMALEU`9SA+m7kV3H3L<6Xj_3D%x1WTA0SBHPg*&nq9v z(lJ7pF*Y$cQ(U7nm3~^h6WN?9_Xe-CiCNm0*06l6nMX$2U6(^`9{qvNSz;qo;_m}rX1r0KKzX@7VZh(+ZAFm~ zKRp_q6MiLJaq@8l@yKohrJU7ji7;GND}yNz1FDSEbl^Ie`_AhTZ?{8{89s4FmU^2uflbTe6jB9+|n7m03DV z_t@6q=C?euIn+Ct_^|rY69Ze%4@t5#;!c5U*+ASaa4Ha{Gc$RBqb?mtf6l*b*m?D zbAGwiNHd8H&JXtWoN0oR& zS-L+1>ly;6yoGsRlH%v^M;Q3{J`XjvGe+Ky3z&sq<`8Lz8;fSV6f3(aG*0OA@xTtV zOw`<0PgZLADsZ>zbgFsTsG0K9-Ap6NlN>F4uGOo>FZZ417kq^eOG-ZHg`cDOJ?4ME z^*Kbw^Zzk+54^%PVYrz0ez$Ggwr$(CZQHhO+qP}nw%xBdZPT2z&6=#AFf+MxJ#)Y9 z*5%)I>GALVsYCp%JHYLH@{a-ez!~G><6@x9hl$xd7kEitc**-9yC@!|I#_?(^2qG* zIKTCF;dN)_-C?mwrT$F48I|F-F4Yx! z|2x6MQ(1t+VQHFj|194{H%N23FV{ve@P#Aug%dzqS=kJM>w)9iz41cxq6f*!&lFJ$Jbpg#4@IMPhW`2^L;i9?Qce|!2 zc!N%v?-G+dM{wR@q0V=M^*XPEvpTPXiMrTwTQ~oJ^#p4*T9^F;cHw1*4hwsJt}&;M zT2tC;`5bq3?z0g>{X|XX3jGYNU&j)%)93BmK1xYRp473;7@}y~FhQ;APEW;G(RHE zAet0UU^R}4m^Ovs5z{RqySz`sz8Z)(!T`GLk6MolKYH;`lUIiSNC6P064==a+%!2R zzoCY;DDb>9pB3qmH5bYMBO||!fyi2ClNawdRql%Y^ws-*i=TJ9+DRXUF&1)?$K=M?$A(CBFNH|G5QH^t~0Ch$co zCxh+!1#4{AJIKi}gYD+UND#Fl_oJnrzHRRcCp|M%AL|Q@^lBUFldNmKtYCR=mLCKY za(!VpCW#3Ogbi5*aI}&XFP_|A6zzpgA{xNjRM-1Rq?Q~`9Yr++2?ajp)0qp-dsG*& zvOcqy!41~##0OS6pJ8;nI9PT+LD}CT5s;popoC65QiTp+DjxK*LXlFkrWBk4VA%=A zIC5BAbvdL_GV!5wOh}qM2bATA<>=y(AF~tN7EKjFRaF!_1rt$8UJ_@wlk4p)rK;6N zs|E4V&F|`K?@#qM+i@#~ZLq_x59VyQ-9Wr!I9DsT3!~)3HCTgZaC*`}6_099*cwq^ zc9EzGzia|@TmWOjFQ8dmFCK+xyO}VJoKTa1Pwauv{j!wdYD@*U$AEkKwT?e#d3aWxQhpL+VG+6ogjo8GL+$>nQ)lANMTrmjtLiJ z+FeKM3uC{4Tb&a{W(}WRYE@C40ZG--Ok~}pE#cd+oC1CT0TQu%HcjAlSv_Km?@RWv zVdTOF7ObZutCl)7OSPG~H2)$DkN<0XwDaiVHb|lS_WC+&a`Mb`d#X_AM=+w#$g{DA zioyMJJ>xk>*G;~x&M|5uCCuq7T%q}KV?Sa&U%tPJ6$L8o-wNPd6R8zjo z*%fS|;LhG-v?P139oD{_MAWneZY>A2H(e7ad~4T_z7_bBD-> zWy|VyZJ*`0KUWF?J!0L;zQBR4p8|X6frG{=U$IvVJ*k{BB-)L8GLe#}c3DJW!j!Z| z7}zL

w8wF;X1;WH_D#&Y*?kk#R7w3_Ub3f%|9MWFCr2{;A5mV#PrwHgzOEUb*e` zcBbd+XS+)(OVjg)&?YdWwRd^ST)K32w18xbwOZ>USCzWexq-M;ukWmDfwFGtpv&o_ zdZ@m|xc%ucueX6_qlI3hcZ3?TtNWLRX90ya&26fHNUIH!q*Z`U>NP7*GB^-x%KM6^ zfWtD9TwqcK3}7S5^??)~I6|EtT%2(t??&D9v;!6uXYD8;=Go8dF%>mV@{qB82kK=#pLM`xsiWUZv&CNfK<;9tb7$vxsL^m9 z+Uc&xgY<126dJ90C}A)6alfBgl`=W8`qT7WuC0&J-|pMloL=D2oUi|Mis;(tBwD23 z{fBuDb9z{^uy6LD#Wnk8!dP*nq`fj9aXx+(AL(+i*oo?9qy{kHfagHGKRkFfDizo4 zlp}A>?>JU*V>y5{t9p{i)}cgAN2e@I8FA+Kr}O*w+*nN2ifqw)6fgkBDB5WoI(wxjG({VN z$H=;=Jo@WLK@dW859+Vg-6${07}^zXCsk|RT33u_+$lpI+d#u|kHd=3CY|S(lFnl8 zuJ?f;8X2&-ALu|arzdQI5dMIsLHzu9`|nW%dC3&vC!#nUPAp45zbLPuWV*ojxZCXn z9v3>Qt%is^q`UjkerS8vR?Y$q5k%s7!axqTPd27cgso3uWFV0D;=1J=y79w zuT+S&VxuV%cKoaG6HB1N_2gID@C$|G{)G6CxzJ{`2nGp75fd#5i}@soQPknjz+i#j z&Xu4pa*1hXP8}bv+`|cUcC;FUsRS6tG!#};R6>+oV1lMXNoPaFAH$+{UR@l$@}1+| z#-GtcJzck(o>p7C8DFo{v1`4}CQDLFX4I(El(}bgd%fm2w^^+-9c`brLs6KTbX|tM zz@S*MBbS?@UTD_jEU73WdOHpK~aG!?cnD-ylMJUQUJngL?@yb*6_+3}7p&7C>$73%;OaoO*bkyr@0JPx{Bq=d% zN@!<9IWL4nUL?sdpc&m?4~_-!S8?6hLWj#66hhAq9XHu86JoR;X1X3Lr?QDQm0pV| z1|FTE`n#?bE!B8LquyCpt*=(%S}{Qy+uMGF1~j$?souOY_$exBU!_zINs0wgvUXa@ zkt^Ba&bV1DO#_zfUFM#!%@S1Nr=WH zLHS&&cgUi_6i1dv`S$a2mZB|bF8t+VI$_hv;>hSDEe_2QBh*~|O$MCP!CHe~gTd#; z>US8ZR&szFycVp&q zDr&40-NSunx?PLB$5d6tuQF#37D1H(p?yh7FmJOx-(TAZIScE&R-#oQ&6JUZCseEh zd)+x-8M+mYlD11M+zx2JsA9JtYLn!A+EfOeZFGl3A)NUPFW_pgdcTmwVtdS_3m?}8#hg5DQ@{FUX+1xwwymt zw zun#N$xb+3987ZsiY^Zb;IOQjqqCL5by_6*sQhA{wuV=!rwPtR7t6 z!Q%|VjtkR1*|CJD=+Y0mQAAOR>~J3=|3H>Bp=ahZ&n{IQNLM$ytS(*4ZhdJI!r4Zz z^mcv6GA5k8wTTZ!+6MB&<&f~NtNd9GZ+~OHTLpxdF~Bq(*3@e zL`ARLRUHJr^^1OyzYcu`Qj9cAmqi|G&y=huI2JG@UnP(@Fo4T1UiHFu%rQ`wn>uHN z(l?gYz(a3*uwldp5-B6E2*PZ=kOQDOHfd&z zd7=Am?IC3D@cagntbhp9N)46_l+XFf8PZ;sHd_jHOc8kt!kf-kd0NcPa(n1}S;6c% z2)&8q^4n@;-8y`Y1a|B$03`ZA>_h6RfBp2cvbb%Bb(oJ_cgK{d$7L7LnYQukw^kh& ztC)h6jEfPRT7v<#7i5vn2_)NQNGL`G&*$HFb$H#18cd*~o?CzMW{zEU`lceBq97Ql z+sVwGy=4SWK@BPq-;js)qoQN2015^A__^%{9vK4z;EVDD?d%t7;GKt5Ql?6AZ0S@q-ZB3< zVNc0cBj8w;?SN%bBIsFI24V0uZvL08&I+xu1%`(T3=NM?4rJ=?o@?12I)XP41GmMW z4rz1{hV*;zsYWHM%tz~|UOK+Wv$vkVvL%~0j*;yPf!7ds zgj)yYU`MKsA+?VZ2`L7m%X!&xUGgnDQ9xgk2f;ANY&oo+s}BbiYL;8x3BUDU?C|Oc z9p?~ODkfRvw_b)x>$BoK=-O+_Xm}=AaE)RY05kcA3ilhhnDl(&Vj@>b$QaYuP(W&`s*wwHobbpf}TTA;_-CQRZ=*ovL?@M<^DblhMbb^EdT=Mc1aA9d?RdarBR2R}I)O z?`-!+W6ZS8pB1C4W`ofOsgbXJx4gEc|Lr(%GfY$Wm)=d-im}wNDRZV+rXa4P(=PDt zhuv+%X0pc&PHI@vdKOz+I+DuzJ(#6DkrJM&0e@rkSPue4SH1BQ=6 zow^YH4|v3chXG8tl6FM%{^Z@$8=TBu+gR+3+Yn!R(#{`FlXCn9{|duIns4`HEoh?dp7 zt5wrZwy~T>#Ji8p1&q7J(kMsMjV*{O&h+oUiXL^?I>{y?P5G}b%`{u#tC}o#BtIMp z0d(v*o6`&tf*K9H$V;FA;3RNrLsSq|Myh{6zXmJ5r?>qMPeap41<@ZH@u!6s&hSMG zsm^$96eW^kGQ5=H0GXYgBs;Q+*QL(L<~L*urO(AnZ!AbJE%{5rpm{OQ!uSq|HX?F~ zs4dGwfPbKUex{DH66z(a z6)PpYZ!j`mavUv7PIA?p*io!2XlXp{FT)tKXR_TvTkeL&K$?btgrv!8kt4~;esSWp zOBHYjnjP59fw1(&V=o>_OOx5OlSJos3*Q$RWs`aluRFDZ-!K>f~0RnEgO2U&JgYZvfsXk zNRLL}H1Gg;qquKT+KKfovzg5_t>gy^N7|Uz~G7lz4)l}&gP%Y?V>=-$(Tw&(LYPTWzx+vB#D+&==u$bl?pA`4Z3J1 z?Q5`QU&Hh{fzq7AJ%6%H;@FUNIJ6BTR9qk$xLPRDcgWJZ@ewj-ZT;64XhL1!vOUXG z-JbFx%Gs$MWP1P!cxfzHNH#LbdY?CQeD1w%eM%ke&{_svv0|8YRUS`X8x|~pys$W) zJVFEi)meNjZP|Mh6p08 z`oBAXY-}Ti<1G;|f!241+%${^<6%a{{3`l=&}I++lyF5w#f+4kuum{?7A^rA;IvG{ z_+XYVAmb#WpNxodM0kCl{KHBg807@WI7-F1GTV3Uicvu$7N?GYQ0_vKx>` z)kf6*C!P@9V8ae4O)99R zJk{LKY`w`0E3=#C0Q~CTGshcoNnA^vyZfvsmhRJ&Y`7s|_)XV~V(@~)un^*t(UGLb z_h^3@>{np1z;tEy8uo7uBo$7aMLDO;Qlz9s$!)0v>%Rzo)krGg-xdv&DJ?C^!7HIp zM)r|C@uzg{8(*L6veu!xk`*L+v$NB7C($p3_TLph$RP`}1qh|d;auGIhhf|9v=XKW zVeDKs8UcJ>%zo^;wl-zb&9t8qVTq;%#cATIKH!CRwQA<*snmuXe^#-gCa?%0g|9jx znCRrX@)kHl`J!XS zGLh`)1A4{5o%rDK^xT{gcc7Xn16*=J3hijs!B@9j0HnI1I0hpvxpb|Z2_4Ei;6$d`!~!# zxFa(0^8^Yo(VEnAWvAWIxun+RYXi3iweQ>i<@dYtEp@~Tj3?_22tXD#P!V0g;YW1@ zHXxdO)tB8+q!0JYqn|y^jlIn;?SKEroFo_6NoV#=EB~aDu??1})PXeYaf1IEXMD(& zA99v~PG*QdVFfC2h%bI9%P(tst)ibZIY;!V;UC8q-G~}f2V}^KRMIbo4qs~$ZW#j^&NY2Qu)t(|(jz z{vQG{P>uj#TvWAcqm_YG*Ms1nsijt@-uO)x*BT0N9xrdUV!1rfO7#cr z|442Lrl#k2wEF$i8(Z7vvJ+||<#>}S2cVDJN`KJ;1+U?y%c&t*G~$?;!4g(tuL~ zpu;Qxtbx4qA)|Z1wO~N{pzes*|FZbefaT*n^MLpOZt_vE0o{Ts0GN7HG~@e*p32Q= z)j?MSZ}uh5gLvm7di(g@Yy|e~g5Ks?qUk|vf%N{h1!B^}JfDj30sF%J0|#Cd)?svA-}ceEr~kE~@CTv15b2Y6vrAzN`>D^*fSL75Qt z$WYfrj1wUmIgnS72Y>hJUORH-z;Vr5F=WMn!LC}fV6jY5BuHam1`)uHUB~j0<3eUg z+q=7?nrO>eYZW^uye0Yrr`Kf~f9Xit&a?93U3-;nHj4px5|5f+52o|RZ0_dyCrvcZ z{qJ5U--5JL>9_dw#46;Jg(!CP1PJInnl#`krrDeP6fZ>kt6B8F*`!+ewBZ4Z7El$I z^3YM(+)$dAc;)ic)66Ag`Af+|qS1A5mF4KR7NW&pKxKr?`ILEtr4^#HQMzs34|x7E z6w6X=nAmYFG34XkUKXOoM1=Z1H%?!Jy_+-kdT0Dn9a?I=MtB8V2+z?ae_Bi3otTUD z=(d2BUIb8`Y-HNyBP-pTEi!#a<~m&dlKJ4-&q3ei8q4dQo4$QahC-jGa01rFNYUn9 z?+uYVtJb=QgVSE0E*~N>vo~4Z%P|u@y*wPA`(9H(9eAB;4;vs)lrD9|xgMolfGM!m zL$6w$z`Q1p8oRw;lw%D$vN`@x);iH>jmJ3Ao2|8OZFAjTV-KO*%NQa>wvB+>kDg-? zgp172N1YGAb8w+pXy%s>r(xT>OPgUhQfdpwlXcz&ruaJnoSBdy-f!Tgc@RB=92`hT{Oej|C6HO=x);mDw2` zKK=b~9YOV@EHD&8Nqwo!SXk<6F#=ZmoZLlj-@_{!(p(jMIxtTP9b|BXGIiMuqY6A7 ztnmy7NeqIcV#Yn`+Alx7vjGVJPIs{jd|tnG^`3emD8^ro_mHOm5nT)>E`2;y1_k*9 zSzO;_an<_~gAriLK-$e-Eg)_)nEKwh!>rG9Z+FQk#93gOpL1Xfje5>}0K|*a27B6( z*u2{THyiYMB`7A`*E}Hn2^^CNaEg1bv4M-_G|q$cH#jKQ#!x=Vm&cAji5J88NQMFa z<)2TO0?7Bo`_O;YThbETAI}kP3%LZ<0fIErGCI>hijT7bUiLgV9Ix?DEW-E#e6$U> zkzb^kOQ5t*>%8ieCOnHrOyw3W_xrTos7`gxogboA<~)9 zx@a5vJ`2V6y(cF9F>Kyj3`&uTtrUq3eglbRf2%G0j`JA&&VCWR#orZl;THP9;rN6P z9%;iBIF!Lse0;%%eGG+RfA^SkIp@Xo-NO!#&5RwDsEpkyJpKjG@`bl{Erh}$?-2!8 z7N049RtA`X<>7dyzVbL-U`Tq3&Zs(^9rS+;JPDogvHu--VBlV}mo2JF$hqa{$%*`I9{dwj0R_FmSE@p)%^eiP62xh@Xmv1jaeblrAF zdA^hf{b6o~#tPo2zpsam#qy5&muxFu9^?u9&J3_sob_EC53mt@>HKyrhuCp^+~wbF znR7kS!2Q+W(E9%IChSGE{+E~f&ebRG@obOsJ>YxWuTSi;V(<4gR%NQ0xVl^yUb@1N zedOie794biR_z#XShImB)hE_b*~_v$c&QD z%_HO*6>(2tG;8MQRw-IFJBBDqF)`BkC#Swe>PDssDdZ-&ja#ImY(Q@fFk;x0rC#P2 zfHUR;b;w3nsZKPpNaoh6KH}FLb0ygiufuM3VTnq^-<&Z=6C2%}CFffaBUs@nxX_;S zz#QQCl)D9Q1}PVM1#bj3=VNWOd_Z~534rl(OHX50<^%~1fx))RGJR<3=}j{=cKgWQ zjpvuy5eHDYd3^Fi`>>wyHuODk{BQ=<>?J0wg_JJYb<0v8jAM<9|AKO0F8(5Pdgvds zoySA+oj%rrwqJ;PZ^kspG+?of8Ah?v2-eJ~)rG;@btY`vDcP=*ZucUjF(?Oamt#G3 z%^=N*v9tRiYd{}Ud0CY#7>Gg?6J~H+V*pUl@2p93{|LuUsvfxp4vTA_O-8xs}z?{*rKnud~b z0?DHsd}RRn6U<-USV);sKWHZ%GYT3hHD$Wqqu|ll&{NS`CT5OBNt0!3@x1C0QX84N zX+B2kyw`DWa;ad+<9X?C9Kd%>fe=6lU?Lxw6CcWD0>6i_2e$+JN%gZ_Wb`NKGL(il zEa#*D&GSz0X$b=^C96wPNxmY*9KWm$$uZ+7D?1^gBN~S1$qYKY;QKhNhWB)^aq(>3 zZ6oRCyie)6uNKocvOPHs%8Kv5lE`&Q`MB0j`D0V5DphBhD{{eUxbP8Ij{VvLe5*`r z2RV-|Q?RO)s8>~DY0+6OzId*<+ml0mtUhk(Kv%V5E>AIP_=6>86k3da6&DjYZ4SzZ z+D!PTSsQ;81{HSyR8o0Qw1|=npHFRS#Ask_PQg@FaWa^yLXlq;4FKo5Uy~Yg=r%xlMkl?;OpIAL9jplwNkGH;YL}2*t3t36j3RX;zq*d zOID&#CRDO`gSdmic9N>BO}T1EuUgb5T$dpoONvPNnU6~7N0@|+fcEn{N%`%W0$RYB zQ7%bx;utkx-J(&$%E{O|-nBIQ^_QsmHYeg_&T;2p`_Qb&8PZuC{ORgucCkE`HI@sn z$QgqGBBaj~$!&A)Guba%p;QXsP?a4OHQ92_x-weWb6rp<7Rii|r<$jjYynNt0gxj* zgdiXm@g~${QwIj{qq`s!0`4bR5S;FBNSJgkK99|~Zs_O7XPVT|>B&qHS+sJFQpr>v z`5v-cdymSX72lt z-5S!Ax&bC=6$Q4*I*pmlxlX^{VH0a6>a|}L7!R01sjh;;lekb(&%A61TBV#yv_27U z57f*CbVRKyUe{>Udb7qr+ne zhwR13gTupIwL_K0Y%NJUDq7`)EVhipvRR4H;ozgpOt@G?>l_@9H??N&;^LvfM{O>Z zyQoSnqFxFvG*1u^uAa;BNVgwXlkW~0#j>Wq&lMG0yreWqdBup@Q8<2Lm2k-_e00K^ z+gEF~*6qV)r=hlB?(m)r>0~Nt;vwVm5^>f0+_ISHzeQ%9nYNozZ9Vx*?j=u6tD8f4 zR}S-$s;R6?V?`aC=4;`xvT4uWdU5}=<;L0k>A$#!M^l-TP_9lE$+AS57&H=r!r)Mv zlY|mR)JENP4>(wKdyNwvcOf{g4Xv9@_)mIJRJ_YPA#y>bQqu~ou@Gk(`bvAn`|7)& za^+=}B|wI9Hmbv|CsSsFM7iDJ;Hz@9dTlC4EFOiAaH1glUWwM+n*32!9x`O0bcCnX z2f+S()^nPRzY`!lQh{8QX(5*UW`hhfKLFf6U?`gLV1TP*L^@qP09|e5eYH+A!RII1;dhP~u`{-g z`xnf->z!1&$Mx?u>$bz~t6FP`S%e|ZjBeL`$mjJ#8_D^%(8Th!i%!7p)oX+5PH<&} zX-v0#mfp3h7rkd`Y22_|yf1({6sEYCF!q(;*jHlY5p_XOL-)VupRA_A?89TTKl<+& zHuHe4Bv^;(H*&kfZ6wTGT+9Y)=be^va@u`QZmLjCzs&%~VaIVn4d6k2R3N&tLB^(6%h*;vJw8i9<;%xVnNX*5=8 z&Rs5XaE$Ia%{U^DQm^n^3Xy&TVL+^xaE$1_uUAwv)+!w<4Sc39-K!_%vCTBKD3H!7 zYy2@U0mr>lVbeCANx{d;=hyp)&NEnkOQlJ?ppygZB=k}*G@wR+v2C63ktMt+e@qe@ z6EAX2`;ZePQM=5(xPYw6PcE}6W-hUcOrweq6tSYYVv-dG8LgNx&Bp|2n9ze7)E9R8 zk-1RMVlfUuml%#Z7#pA;w@y^KhD-K=YA}hBc?OLW!MLa zjmH{9#141*6hajuDy!*F2}o0zAqWCR1O94L<~cFdC9#t`h66N#QR(`m}~S}>x8T)r^x+@>MTv? z-fraa)}OKQIDsWDyz2|yuaw1C(+N7dYPRCF2Y~0MRb%4m7`ljlV0;_;Hr#RcDYBb$ z*z&ObRxxSvs6>6-XvR97DA~XPc(zxQtf3+P0D|_fIHc(eqJXm~|O*$sW z==4TML|H(Cf5w@LM=a#(hwj%+19k`Cd$%vfDQuUQlgqE`&myV0dd|IEHQntoxsEQ) zdFr$R?41^`!uT}Wf8qFLGt_*H(21yWtk&**+-!bYUTVpi{4u-Px>bo5NP%L&mmjM0 ztZXihf3+)KqV=#Sr#iVEsD&1Z~v>%e7x+5R;t$E%iFliPh~aYWw1{3=OC zJ(cF=C2flR8=HIz3fsmbGcduxXileHwhUh+ziStlxW?gJ4OPaA`JBnyW$D>i$CZ@IL?5 z%8UV+E6;Kg%I*3Hm@W04Jt65{kSc7{oX9_#*yl(EZzInwh6afTq*Kc=TZo8vFNsTN zHqK|Mw}0OyuBILY`U(3Zb?GegqSUn0EDMPBvMNc-SedFs*>Y;=o=&4k-c%XGz^XS4 zuvb#mtW;BrplrVMXM^?qy@f|j{}%c>#T8%Z&1e`4H#>P+^p#!gY1zR%*?GM6FmQ+f zY%>Ws5=|w#E>I1Jt6lA&%4Cw@L}#){vUGayK<~ON1?gFp^66mAM9_{z75%^80Q_>V zV<<3jdKRa&N6F&d*8bi_8KeNj@AI2pFSqN5W3(`dINB&;QcxltDe22mgO)ZGjc*;x zq0WAR>myHZ{1TBm>KB8DGsy@vQF!)hU_^nERn>Q1%tuo7A}|6h9lye)0;8EK>}^sc ziD%_J?gcdU6C5xoP~@VQ|6m!q@;oGnn&Z4zcG*klkv4eHHONE=GhRAGZ6I;haT4-Z zi{c3Euhq7~7O;l3FtiN_L$eHjRZRTItFqxg%`wC@r+RwHU2o@RdPnq`uBeh7;mS)mqRvaSMam10jR?P_$c`S236g?GDj#0@?mh;NBT zHt}~If82WiqA7+#(t0*2V(^;{r=veY9N`|yjKXovqguFt@;d0{ugT)u%iGVFS%LZ> z3}{%?op{XZon(fN51f{kjGU7C=H_756{r`L7L^y87Md5HmdH)d-I;N!I|@_K<{(YU zs+IKHgsj6cNy%ZOl9v|e%mH8-BsaF6r5Ub&C4eeQzEEZb4`>cqN0uATJAMqyRm(9cG!Ho**uaQODs@aM;RAuLHs zz-#`_umuZozY-GEU$=2+eq9`^6?r%2(kj<_>)n%QM_{OOKcC4pLoiB~xR)BADin(q z*)}pGMtZq@@2$DD7GRi{tnyz-Y%bhy4mQr1-d?{u8Jns3dwXh3Z~EgI4r)9O`mZ+? zk(-@|rPju;d@C!=d)r<1TI%AS*)ZXD)QQh~_!4pcM~GeF8pPFR|J4|#;{;*{&&Bjn zWUzE9vsXLdQJ2P~9+@T$J5n&EJ^@tr8|~9BRc+$b&NtlaoT>#xIl*+?Ix^70LUlli zBnfxxsCTfW!_gv2ch(D8_fOtBjues$N7KaKL^G(N^!`1L6>Xzak}l&=@FPxn$69kl z{C01j0f{~%LHnMv%}_2J*}ObOMOtnD*UxEZUE!6qGPiaQRrIS`*QUHO*gaYvG<8@q z!x;K;ZR;1QvffZ`rPBEb-jtHcP(wmO^?i>@y|*;B7av2wsWOCEL8y)$t(3BG(fQHY z(wXJLC`So{e&&FN(_(mpOOlaF&LCcXeB83o<)Xg!AfPxPW-r2cSj4#`b-}izthtCW z9|G@?w}_>Puk+tFUR1`AG8yKNT@@>tCHNH=@iBxlA({;jOO=(qPS+9l-hDReN7M0; z;pjo5MYU>MUa}(=#HB4ZC7oEYB%DQh=$(WDlmMZJq6ei?D-P<-gEWDZzC0|z!S*|e z;mxrGj7!8kJNEOQgdIg9Y}}R56u5)F9b#j|cd8DUL{>7{i(a3U_k>_#bn6n}?_^;% zhLj_;XJ={M2ob9RTRFsohQTYQ@ZLIebr&P@k5_m7v?$Y9hP|@G>Sf(W3s+7pB4%X; zjO#*GfcXkWt!5`|Wvfb&hGs=Y>#{r*2CN#EZ3S}wJ>}sI<&I!Cb>OCqf?I(&b5JDMw+ zGprx1Gpsu@)MJ;-g7PG4>){|$3GvJWq{G3<=+gA8-QN7OpM;Z;I!K6anrPOj`Wg5T zSsCh?Jj?Qh5g1%veProtw zeSLLwWK@tGB2e&<$SF(D+1c>v%$fR#su)lCnip!_XXf@$47D#ES7@eq{Dl@6mWj+3n(NZOsqwdPtl^;TbMO3aCfQ3IMe-e%Xm*AAr@O=tUrmvBPf85{m3 z!3~@%zAkpJo!6YBPaAQ(0a?9^d)9Etlv)m{9ZugkpFW>*`MAwOKOXc*3X|a}VQQK% z3MS5R8T-LlIHoQc4T3Hc&o)rGQAa96oG2~Hfg=rJccJ5%Vc zVpUG%+?q0od+g`~pLuu07AojelNGJ_eVOG#2jZ2Eg#Ao@4v2JkIsJl}|AYX3de=1> ztMth7Jnu4L+&;ZyYxNuHpmoj!8v?jX|~ zDIs+olRG2xnit*?)yfwQENd*q0??i306`1B5a7=G){qJZ(oX^$a&w`uSo8%br?`O00 zqZDDl8*nfN}6-2A+OE zVUhes#(bH}6OP>Dd!wXiLV2wq3QZoNwLp^(f6Jp#L+2#;VY9;3_Af^8?=d zFXoym;+*v7SxV|$Du5J~r_I=CtW_QcW}|I4@0Z_2MoSX=D_7^SJ>;!k#1_jh%NPE_ z2;Ez4QkeLAl=9qJ>ph)od`&?@W;AEtOg^d{W?5Ep42&ezkVKjlwPUl~_$Ew6oBo^+ zxLPD#(NcQ4kg(~2Ow-W1BN;Vog8p`O^x>ps6C4YHC_`M#*g^PoN5qO@!-6V9V9+Qc zb_v@QP^LN3w>ABXot2_h%rn^4<1?V5qg!0zVqz=xT2cyz&rR?RtST>am(%WVb$Gh7FmXh67g2%)ck6Nb z4vi+#b|cKb+UA>)DY%#mx|ILMBVIu8a>#fB9}Najx{y>M!B**|a<$yH**d52Is379 z5KTDqm^Dy6-MwEO<g&SJ;$*5ph?ZwD|v^F$Peo9Uo zPMV+(ZV9HP$&9|@8%eg{FC#krVjSA|d{SpvA`*UF@o33qcxC|s@$4!|09q$H_^#9J z3hKa%_KA()JVH2dUhmue853|ULPVpT@muZ5qg5g>_lZ3US}$4OAhc1>ECuCl?$)=y zwAJHtnW7Ah6)vp<=XQj`J>G>w#%A~0XEJ|^sjARcq@DZ}3SzCzt(HOa1*#(R;znj> zZ_9fpq;*tw0OU+0D~1$E$#bCL8aUTX8BapY0ZV7@(MZ3|W`0Z<6nG8q=n zlmpMy2BR^kg*WdE-sXkppZ_0~Dqe4CN_(SfA@J(ip|y6j+wevC4Hf0I3~rN{NP88b ztBA;Y5n;3f57v~DA9bGK1elVsCAd}s_Y0ILv2#sA{X1EP`i;4Bp!o&Mm51+Q>9ka%DPA03@DZj3h*W1=ss5O{uF~|kwEZ*(4`s0Gm z%u$Z3{pn}<>gLl3B5<*5fO&!r*oAu?iN5q*{<~re&B0pETpHe@Ih9>}Uarb+(PET0 zaY`IT;7nBxB4Gy}@89CzH3-Wv-gl<^bQzJyq!F6$pnI$sqYG!{f?C{FkGE>AL(HkUGb3 z)2#D7B99uNn|wTP@(&99w@O3SI-XvkR;6B#Tt??SfKrF0a*MDW!Q|JJ_!S9sQAtU~ z>S3v5=k>7?;xe)n$p)@2{k6a$%C)QowlevX6P;=lxl(nrZP9vmad~!eurg>ZoDxCx z(+EjeJ4(rUPRo(vC{c*)ikFJ>|00HV_NMe)udGz?y46ge?^FXg-YKEdt&U`|X(faq z4`?MMj&^pPsA2VJ6(Z}s@P-mTiQ+F%>RtDK7$Z=#9qbqudTxU zsy4eOl0x0NY4LAgJ&Qj(w72jW)Fy+8>D*r^l$6^swi>N{8pPIVp7-;~d48*_SH%Xy zwb&FbW6zW+^NLv2*N^o?pr5uphD)&HhLs4dIr5@`!g|38{_;>|29(WiKJ4eWG`n`6 zzw~t5B?il>MvcH@$4@pxD;hBW4hW4LPj;b23B=L{Veg7br=zr7)rN}y+JLAU0&x!_ zk;godfk;5?BQ_+FWJ}1FNo+>eWUC}1ZaPDYegCOj zEv;u#4_3?Z5ml&5B$r$2ZsPM$FJM?SIF~WgG86(g6h|n6y7=Kw;E)}b9*jf0Nvwnd zRHFiA^dI(11w8mi!`KIK0l|T#5A@6L2La&VbMyd2JZUI?DA8UZ;3J~+%ZcZb@LQu; z_Q`$M_Y2GSJcNq;L^)#gY5!Eh3_ZB=o>^0`Q(q^^$OfK1;rUVFgRR_+@egE6p~ z?6fi~o|x32_z2>cyLQwRV!e<++p^u!)7lSRy+rp>;%bukoWO>SsUFWh-)2>V(E6s5j&z$5-EN(>Bl@GU(8YDe>hr>J>FmR)rlI|my>C+h{ zg{TchBl!LEa&)&*y4L*EyCRuSD$S^{SH%y%7_bGIobKs7570q_W|#82!bS}o`S`+ z(#=kp(}!o8lVT~Z1jED%Qd3gWQlA{zWafzt3Wo#MeZP!n*%+o1O0b0#z0q=J1HI2@ zvwYwQv_c^!z(?IVUm%HnTC*tz!^)H+l#b_2j<#Mo)4Cc@kEg7Op447;uZmCRsFF+_ zvr|nIRmK0xVIpjr7eq0#N*SdFcy=|Evc2rC%fv^i6of1-x~SnXH^&z9H77BLV0F}wf_qUIGu%}y}Tu#;lulk=>kso?n-fM7NOF!L?9CzxVy4`{Uzt?(4p;bFOn;*EwVPobL^@7IR1& zju64#Y&jHFR{lt)PRqLw_F3<=heEJd1}iGa-g2>gJKx^fq4bA667^RObh--TM?ETr z!mkwwq&XK(wYb?6KZu!N3qfYplBVwX_3r+$o3eCGHHUORBnnIKzMa%{J~IO)D@m#^ zh+iaD&=zCrloZodsC<*JkHv?{K!+7oN?qDB1e9WTym-Sqd)9J3`;a2@%7Z7x^91nD z24%SQ=#gy9-Fm>zb!A#^ZMT$Wd)^SbdBD|h(4lQkb73+ytADkf&|Y_Q?>p&J`lo)LF*JfoIkdrA*_1EiWt6hGFEL*fw1hkE1ug)f?sYDA0T%;&A%vcb%*#JmyK42 z&h@-9axtxt7RAcV=9}UrT#5PXu}xeHsm9UE3>^ua_1xaLhZ5KNPK-I-C8a&0?W6>B zcxDV8AABX&*T=igC{2gz@HG-qrV8JvhcdSlp`NB*rs zX$F5c_N7^f;JF#`Y#ysGYE88L3KyG?#%F~6iQeD-$yoD^Ew3E7wSCt~f#t-McGT7y zD*WT~>=Yvez4N+gZpOKsY)Bq-U|)_hD%Wl(Xt%}0=bTf?Zg1?yF%j573HzlMf< zM)}9pfO`t5xU72Yl?2^|s)RYOP@~Q-Svrx`#UY{P8cVY-nZ}f;q@=S>m-emSb^`6w z^VjJXZ?r`~&dt*0{M8o0)O}eFh`?voZhjvtG5wdlCr;@L=t3kcrBz%L)}v0Iibd{{ ztkri2Fp;SO&fr*mOjG=mjHjfyp+F&s)W#j65?q(#n#aV)_*`2BI!%M3=ROGVKlQnX z^V?G9C)qyLx^W#W;nID_VE}+XG)1?q3wyYZ#vjfqG8t6& z@Y^%;+K(k3Gc15B*30f-^=wR)L&cyxvU?Gq@#4MCJPJn+t=mNn3Q~AX1b&OrDqstF zU7RY(>VTFmo^7`!Hergiton}cy=qacZlw-SvPl_VC{#kFojId z*D7U}b(Y%g^mrlTpmf^aEVldTe00Nl4}#9NC6IAjT%v5E5oX$#t4>V`Iu@1waFd58 z4xX&9Ib(v9j@*%cJE@VdE+;DcVvWRb=FJ^lM_;~~R~G3lv(q=zvuztR_@o5FWQXbp{o=0UV@$>*hIp31QT6I}Rs{dpe+N0}$I;f7jo(e5xwqz)~% zK|ssv@ltS|zk0T$vs}_(SC(9k>rDx%Pp>0Ll-a5qVGxPoH-npk9-ZyCe-QI1wYB^z zeR;k=4a@Q2WOQ?!T9K;V2Hww7@O+67bctT3tXZ7lz!}9@8Bs7JRWaAIgVJu| z)p{Da09n1ZrLaHGlJRbJCIQ@?d%X1BPIVrzH6}g7t@QGHw8b7i7ett!%R9+W{=uhg z^Yt03CmZgTTzbwY@HV}`b~89X#-(bcq_?n^Dje#VZLUU;2vNSDx!>NooGFm~HbCy6 zqf($IC1p^R4bv5cwnC-B1U-X;aPe!qnXthr9A zNN!w3z_GGF1iEe8#rNzTLkQ?Qe0(i0Lb>28|lV{onUwTPO$$Ky=) ztlz?{3#LiDCW7{7z$CX$qDO(D9#e;Md+PpnZVF!#H@7YUyXxM$m%TGC#w@k!^z^-{ ztgIx0$cSob*=DYQHn)qqu46)?1J`xRyg-UQ4}5R1H5LuV7GB0rd~qcAybs#B66TR6 z)l~cB#b<2bWWVaT#iVS6gR*5-LSEF1eV5FRy^TxmE4IFM7m9v0nAtWhq-@`Ga<(}$ zcu#hw-Jxd}%+9>9cVPv+l9Q%As`-=;e;xRN_g3{8L1)8(nYDlnle^V}m;I zNwY;M?+)7j_sJ^$@ty-?dHC^qv4!xdBbAy=6Cvjd2V_=s9Gyg}dqvbcCmEgf5w}=- zx~5)~ZckT{5Qg+n3*<)bzr!EJZ^pe6d(U@Hx~b+EGIIImbVEBO?hlGDF+w>?>Z}8? z@WB!N-C3drh68Qq?QxaR1im?kk)ekAz!!fxaPi9*&Sn}RO|DxO^Ek+DAKj?It>yem zYgJ=WK?&nYX>4*J)=;CVS+OY{{nAE_n*LrT(WlWW)P7T+d}XD;BALpr^i6){p~-=z zl=9mSbq)=t5jR7##;Ged5m}6zk=~8h2Gs0%E7sHHj~9j^y|zzfIn96Jso_x|w{a`9 zc${zO3U4Rg3Gz;17xZCRevfQ~?@k~I4GA_G0u@G9e6>RQ<}?!v0n>b9<}{Y;y#H+U zX@@uS_GU(-XA~Zn-HB&kiF03Z8#G^fHa`j7Pz4yYPMj9=5pPp&l&0~ zOnX*5s`ngXYdL;c&z(K=D8fAD(3xN6Y^GZI>C1g~{>Oh#dk{nU~~2a+~wzRiBcHs_ucEZd>yl^R2|}w;AP@%6IIN zPT%`XOlcJfpDK&4EiQHGyUPM;*5b@C^b=x_s1*`JcXQ$5=XQ0ZR$s0|YTPCk} zbQLS9jT!Ihg=&Y2vOC0jjSt8{pKH?hA{=ZcAsW+y&gaSsuRL%%cyK%*gm;i`<4v`P zIL1gLt}DyNkD0nxnLjZV={brdo^ecExDjpdBPE}YeB>gWv=y|)+BxOH!i&6K>2kh` zqV|4PKP=dK^K>#1bFZtgdZXFtd#|1!bZI$Ik&p*{BlJ+>ONWeXdt|hVWJabVd-s6^ z?&iMYxEuZLeEsn&%VYe15NFLs=WCy;hrBf@A)Dqj-vMzChMjxNz#dMwS4ppu?Eb4W zVrNQu){x-QNoKd|$jnM@%uOb`MzbdJ;~(sRmVS?*^!6*E_CD61>u6m8)p~YfpUm5A z~AigEN^9A_PF!R=w8vq`t++k)t1CXUaRU=>|bX6 zNuf_wg!?__=17o@BiUh%b;Dh^pNfakmr)%jZ#2utY7!?>Ca(-yl>2)iqb0ZoJ1$*ba z9Hsrv$rM^_PpaGY&Q!eHTRqtWSzA?_`XOBgd9CKswmVGmT?v=O2;KK9MxH0lM$Xr8 zjR@_F+S0Ru5o=pCUm7uSbZ);&WTx-~zRbOWTO(gT5x<_0c&6cg+QKz2ixsPtm_^>M z?$yO>D=8mH;Slb@H+QbbLRzzkIx~3J)wnSoLp1J3gf@?aVD^ zudGZ>MMkpO3t}agXV{;bg*;~V!?QikjQrr2bfPBXi*NuqzYHHe>Jki~UK2r|NWS)7%%B4k3AOoLN7WD>r0+bYKr(L9|xS zy78k@E;<>Tum;e#>t^-0c<@0kN(Af6mVt!48l7PLb4QPZgoQ2&8cBzP_+p}XY&P$@ zRn4`fBGZ%^p!7vgiK{)DJ8D2ehHh6GyF80n=2s3{HypX}JR+Pva$@xEv^Um;v@LsR zbZJzt^1wY8fizzK-c_z=8PB7w=RiaEXuOk|mHLa#KIpdvv5gNSz0W|T*_h$=Ftc>t zM3t&Um-HjsH1q2&zGV13xX~CB@3YgcWtjo|Iv zkJ4vTmcxgiytMsj9I3lX2Sf(6@!TwibYmhQcuz3>1cdA`lQj1L7Y-XOV&-bblpayfsHW?S7ci z{k7J7(gH}hP)|E3Tnh&MKG64rR-Ozxh%+$Bl@Y+ggOOxcSEMe60wKY5Nf0=kOo0Fh zSr~+(>xO_+;S`KElJb2d-@keI(@?+zU@UDQ1V9UG!DNsF+$c;ucvk?87V7B>vH&J& z!Stj9VFf`!oXNRZLbEY~>UC@nIL2C@N)k`o-17D5a0b=)=MZ@M@rC#`=A z&uApR` zbg`zw>Eegsf5`2c2-rS=G(&%EP6R>FZ@PY8NB`cD0lGdO9K_jg|H0Wu{CmUyzJI9x z#_dB5z-x1o0Hm3-C{+!yoD~hjz~OKm6nbqjgJCc*pav}Z|Ds_K!17=)RlnyBu(9IQ zi|Y3=fM@^rx>X|(fVp$%=)p`1RS+Py!v#4T?$-+d@}sm-AS&pa3^*|g1cq~g=-*^W zgf?*ZvkV5uptb)YL+b+c@}F#h8_xR-ewM+}0ObD{83K;@bzC?a0|(Bz{$vZ+)y4d3 z3)j^~1Fr`7Ne_Vry#7T7Lm&VK{m*(Z0CoOPJvhL@|5*=?)&=qo2* zXdUFQGPo`r;N<_T2iMiX{OSu5hW`6JSWFVYP-hC_a3H9qXD9{88wh%k0eniWasNS3 zQ#zFa;#BcL0M>2@a>OBYb|H2fBeVf@KV~;t8-vn8>l*49=^7&8yOAi25#hgU91;Xz Z?fxthleJbXNZ?%$2tj3Kqy5H${{k~{=?(w@ literal 0 HcmV?d00001 diff --git a/Supporting_Documentation/Skein_Submitter_Statement.pdf b/Supporting_Documentation/Skein_Submitter_Statement.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cc96accf836845f2a117308119682ef508f8987a GIT binary patch literal 22471 zcmeHv2|SeD`?oA5JE4?itRc+42^nM0zGkTm#!?u@SR?zIrG+-iT8LCA6e=VMQBsr= zk}OeaSH0(+sGfR$zyI^R|KIz5-sPF;ZtnXm*SXGhu5;Yib*}T3x6svBMXF(VkeKG4Y-N7AC*G)4yU@qkZ=4iSO<*@u@aluq@4T0&xYxe$vVBxo4=!2G_a;W4P+(g;Z6 zZ)rr#Z*`FfBL3HMNDTJ3x=14Cw{lnn4*hFgEP_b*H4TSGA%9IHB9NHh(omS+(=fQ- z=pi8aeU|})PVx4k(s{YSKv;t@fT0j@3pzCzk~azV8Ga{W4-DvJqxDkv<09phxtzl@QmR2BxPNnz)Wf*jIaT)e-ahasDu*xHs+gxF9 zRpAj|u|{Ywdws#-7}AlLq&M0rhEICgR;_7h*dNOC7NyFv8G%BSsw7U*y^eiZz)Ib;*aBy!E=1+x_ zL7orIlbK;cai#hM!oV=YtbS-U7`Y{4WID}XhZgFD07*C`3Wg`5wt|{;svl#KCYk2P z08fAjP-f7E6^#L80z<$}sqWsC-|AXX18G5YH^34k*F}~f9)C3dlfbu_yY8U6F@TB~ z`69t|f~sg~!O1iq8r|BT;zk8IF(AE#OnDiE9Y#rpvfMjVy527-K z3bm_C4RoV>`!i^Cuu3r7HlYME$X*oij6%Zgz129o5z z`eKPV7!F!kz!!^$#tw`y0RclZ`2%wwJR=dHBRJv`2`mb_LxYtEhhJg`ivtRwLkXlp z5`Y^}X>?C-imDFH#~sWO`eOIdD7c;rK@6X@G_miZd;rgqR{@F*;dfCO!P7a0+ShvC84;lUD#V`>nfhy!)OvVzBclm0iV z-}qYM4wOP*KuH7)16siXJ~0F|3=Qaml^TX6pkOE>r1M~1gP{mG7=eHUoPrAtashA5 znxH)l7)LY))B?T6fPn*l;Q$5b9tFcd%l6;;qEPtX2m8M<&XAlJ$JylD(uziazq`LL zu0K{&uSdBKTMrcV3r5~MGhtw8RG|iodE50UeI_7VqNG@3=6y`e2HtJ#sQi7|dHc5K zez_QpIbf7~^wd`NL!WvhKh8fNZ+Yj$mgM(##_uhqmiy`zzfI_yE(e;0zK$ucly>lo zHRZfC>a#M*;jHsf&fpI|G4_^Rmu^n)sr|g;=118p7jA{`stMvZx1U>G4tx92ll#Hz zJLih?%I>|pz%ds!^f|d&)x@Rn&D!9+mn+Qbiw893LN51axDM>&*+V9`j6LbfsmY}3 zoLRBC`eA#bJbc zf05A_e|XV3Ma1dSdfC&lyf5Tik4!orJZmB8o!Gk3*6vKt6e9*V7Z9->_eLCbk(|_W zp3BQjvk-Q*b#LSTWH@bsd@INzO5?qOW$G1ry2RQG%N#t?Mjrx|wIvoigRXXLD<2q`A@g{wp)iZ5ZPfL+a$!`Zz3a8eZMCE?k^d z*s$Sav+5>fqPjhf(kD9P@FX@sMv$IM-k`reM|R%#*aN{l%q0?gTbbCcgM4?C!`ZKN z<5c60KhZKN7T{-G_Ui~~pKF_;B~4B^q}a{-5sR3DUVn~3Z(&#Uk5rYCZql2UpZ+e`M4 zzJ5SejkRU%Ti?K}Yn=+ovNo&K=$$NUiz@upH&KR99ClJ#=n!YS{Gy}zf{s zm57e(mWZA^UmnV+M|5PJ=2qG|B`JHRaNN8#f!uPULb#kx)!Fe^!S3dFzWsM3A z>gBJkswpSZ^s|(SSqfJ$Rw3b5>8==w(c`OI7D({ZjVzRNbm5Ph)Pg~(%Z=WnhEL^u zaM#oE8m|=4nU#{fxeqSD-`S6k6O$_`nT4|b-t&_DTheALrB-p29a~Gy;9NHdpNwUj z5q1dCs(7EtZg4^3)NPBy3Vtxw> zcra9dwRpQ>ELInb?6038$Y!o9>u9{Dq8M^*!!38^{c9gcU5FM?mD`YOP|4GENh~oK zb}DCVB81<)BK_S>(Y^W>wrP@%`Ea53-?+b~&t~yygyRV9br;JW0PxKI)C;0ygQ*! zM}-^Bmml<&iX?3tMqh6~SbM8$;Ms7?6ZIKE+L}mjT`{SNsFK26&%0ttuzS@nYEzF( z8|b3;Hl&3}1QlsiDW)J~oorW43}Oa~SLL5h^wr(*Ncz^kl2aW)43XbE8};#>Yy;qxt)zJCNFxEv_!Vgjjc-WJSmtE7d|vRzVKPaPSq)^!+bxfOf}~CKmlbGz5Z9$k>UM7#?Z5B`diaL*OdaP5eIXwC{F+$g@u}n-JCo8+rW!XKbx#|cRK;uz z;a}eKYR`r@fxC97KOkbG`%Da-!niN^vcHut?`GJ@Y}j9^dyibGaA}3%sju5#kERQK zs2HsC528N{ym=$2Y^(r2bfJLl)$2IrSqU1=YI)u2skW|MC#}+~n$+Cp`Ys<6hC$A% zxEnr+%hcaa*EzkN2+C|qw(qX>k)GNucsEx`#hO3c{9WfpMaN?qPkF(-udP3#>kDw;=^NM`7g zuSxKrkgDN_z4GkV6}_Ll!iOK~YYZ~Ut8Bg-DX)Fndz)_1nw{(9ZF)5y`Ul!x*oVWK zC%0DPW@T<;ClJUjCaUBd*_)Tuu`zWlg-Vm4fXKqd%LF&c}C``=qu6ekjW0F%(M$$`v``wAm|y4}auACz=R zxN`x1!ru_@%hmYF;dUhEw#!1=3!U|fsAlZhCZqR(Hq#PYe2s(?=DTxH?J?u&t&XCe7f~y{mPlVr?0!aj&$bSzH#;09Z`0X2j-h@@hl{3f8<3> zK8Zb^m^Xa$%x1+o(#q<`S2yk5$X~6wrHZsk7_@Z%vL;yo{(YPR!jcjc6DWOZw z8^{e{N(tNvV*ryLuH(%Jw4l<-G+%$3ALQ#;G~aK zzJ9<70`h?y!jS6i=>JDt|CF~LuigL4!2nR`rE1Sbu zD8GQiIEG>s%XLshnJY^kN>%0(iWgBdj*Yb`v#NmEo>*xGJ&KBkoOF{N?@epg5U9^F zU}N{!877TzZ{U2dLvR-pw<4^DR~)`Y9T1NZxWN%ZcvhhK@tEZd_n7FF)9nHp?OjKE z``V}5B38@p6Xy@)eC8#iewd!5)F+@)D^t&gz>62OliFqXITy6+S=DHWj-kldn}I}$ z94;J!WPxgUzvY>Dw|R$ndMEK=j0(>>(%x~in^gp%&Wv%rIN4Y!U4Qvy8fP5UP{YO? z-dmLf+bDupvewAne#kBRY~K}Jj7Cn_8h3wOqJz`{N^t~AJ3oWZfDKXQdbT#(sre%2aig&vQlW!JoBybvVZC%5&w@>Y~7*{Kz$6Xdj<#p1(b{A@zf za%A8W+k0dARn&@@uq_tJD)l<8qRkoB2cOz#&v^~D8wp^@`{`#1MgrSfNrfvC4;-q> zieYbGo7nTz$D2JXQCo>*E74BsB$AW;_nX8FcM^r^7UmvYvBT%O#~vmc;`kyo7_ipWWkMYOX@!$|Qd7o82*uI@yAVm#QrBR4ouM43+#ba( z`1t*Xat?NE!UBe{y?Xh9#rt7;@d9KLEQUSSSjb3DV6%4YN=}jv_eypVIUy2<5zL6g zNRF&wx7Toe{;B=K0ydli`5=j1Se{cZ)==4qW2Kdmz@T^xpYDAur=fVK8zpiHid$)A!Ay={wUyQ`g`JJv zOybbqUz`PtJPB5+oPXG6 zl}%!7eyOsJ2>8cAval&tBti3xWOX8e6H0|9A()Y{G98emU7&0u3?`>V>-QT+goVvMefsWt4$sS)NH4Bx%fa!h!!Ne4T-fu7XPMzb6LsRbc}d(?^V=0RO)MuTPdNfNwrHihW;l(V$cq#muP)SGlSW@LKa1$y*1#^-QxAOBb1Ygow{EO@0 zgmG?B|0?zoa(!{J$Lsolm#i1nHfQ?n@a@XqMMoVVu*MW@I*{EyR{dN}=|-8!HXO?y z&o3%s0lv=*Wt@&WIytUADt2VIW$D+8@A2|4W@H+<-akYq_rFV;IkR9j$>P$Px}$oE z->Gvqi~0ptSjboYoN;LuIlai8mtS5B^BM_DSx3JNGHR$z#EGa2gKfvyMH}iY^pbCKP7z@Xg~- z<8k~PBRQ*2s-Gjg%j5X`snS2?{l4z>PvXyn)|hy%{2;bx=g7{LJ<%+3%S77QMDmw0 zVx$plc2`6RtoXC6?_*p9`1)BzlULk`=0}m#?(=YI3*P5=8*6OAHc688TDD$$1%X#B z`sT2D-O3lyD_q#q;-0xIZ&>zPfSn?Z(v2CDwl(BNMR&rMyYttRv@Br4e0t;n0e&H! z`X13zPUQsU0>@IpkD}@vTcay=*m^X*gpbDVPI%P9X%bs@7FEH!u7&6R%D0JePq6;t zxyzqj`7mcz$66Ved39!vte)0pVBg+F; z)rB<(Z{w!Ly=$p{Iq+U6LbHiGifvOy-g?7T49?(q7adj#x*)bkr%pH2FjP-Y=x$1X zD~rsbx|A&>4YLLvarq|M8++%C)QS!F@2xq&h-VyUWX`9WDC|5SaYN93S=jpeeG&T> z4lmfn%24E<$X`8pU(r&&R}HE7Qb!10CjVTn-VS9@<@~C6jQ57OQbX2-tUbckJK}7z z`;^KkWe(n{HDxtCLaR*pj8172YA= zK@cI|EeN+uA;xZo>?<-h>3*xMBV_&!u&3ouJg}lGu040%9 z{iIZ|tNps-tgmWhY;$QlWZW}dM5LGZzc%#5?LN&2JJbonj$d?@_X>F)-)Fodt zTnPTL*CX#Q^dnbM-BocnUpQK*M5tbHU^qq8)KrZ*UD#sg;O(gAWb4F>F6D{RM~etPQ0smZRUkd?wdyP z$FDF-14BnjpR@=2_~-iH4m9>xAD?d?XgxCfa9rb=>uA-ZrwtoMCZ9M?49+TyijLSl ze|vxHkY;z|KxA7>v)i%jVdga@wa@%+QQ%bgLHnY6om}N2Q}*+r2O8oUHZ_@#ciGVj)p^mJ=*J>}-HMXe&T z;+c;@u-L9tx9dtzO4;x9YNboR?liuXtdTA`E2rLWl(}s zLXp^8FQ z{0^Va$AqREZg+aS;=59EqT10nHgxT3l=sOlflanqi(W-MmvzLGP)aRI0+b#g#eFundGOphB^i2>mp-IV;g6{%s-8bx(_c6`TsQZ=npSjK zLj6f(;U~FK_nRS(k8Nu$2e}>;yq$h~B;$R?zV5m180!sYK_LUd=ObzJpZDF_QBvOB zeq4#TnUE6|{KUDte&BW4vl4jg)-_WZle_%`?p}MKxlHr(p$7QY`sv*tT+dj~)^7;5 zH`{)`K74ZRsN?mZn^QB9qLIFvwk-7Y(*_>zZkg17W;&I#mF?z=Za#IMZFNt>r(XG< zd2nyGq5+K%MlnJN^w2T2PcOzAi+90iJs;*poqaC-k@7^g%;0Hew&cCfrSIIPlApF8 zvAoyYw4P)xnRc%2_}8p2camN%l#G=;U3F?z`T>svB_$6_{Ir}t`z(Y_zW(q$Ht$4K zQZ!hlZk9zwuXAJ*3FoE{;v1H`+(0iY> zra!kvjIWqZO3e(Hx}+5r6<)Wu;p|)v2q$sAYe%fgMDQv=;*-PT@on*HEulomrPyXxv;n9ozLxR2F z|Be0oDU!q7!W4Kh%q?IZ6rVsUoaVoH!}L#lkA8q#lNQshO<@Q%1nMVY{m=-rAm2a` z5&~`CTKyCa_0x8)HC*>cO=zQfQJ2WHAf`8k*(x(M1-{waJ_ zwwRhMIdafov;EsRF9o~&FnKgKg@VA4om2+QmFns3rztag>9P#W+g($}9&3s)^(Rxk zybZ(YRO@gv8@KSCZbWyPOpVUY`~+CDlqFHKbW~21BO6?xF!S$P{FGs zG3scdj20APq(OK0P`A?6Uu+9JY07vp82;*Tct}WyS_oPVgi*keM9@nF3XVdlf)c8M zVSWrssH$I}>^CAybe6(SylH+gh%UvI7R1n$k@-U`(_*J6{@)w&3shrvP|c0z3lF9E z!;xwT_;<<>ageA^1~E(^pv8tp^U<=fW`?(5)!co2V0Kh`Ak-pO4W)+CfC&7d?2pEo zWK2Qqhewz$uu`>vI0&lX9|~`SM5?0^>R9lIRYxGc5&5a)FEp5}Q2z>zpGq#$(11gd z`AJgDVfderpu5|50fH?9ALcl@yTPeGR9`3v3J4MTM3Ij+D z?(goQ0soQpQ%^x6!<*qlU1AXMxWp!~^#7Q;l;PtIDU!Mm#m`eyCREj(>Ol$eVaRBi zdb`nSfiw>W%$`PfhpED>4N0nK7)ni74XNf%bNeP0AMZa>{g(R2#+c0h%9jS<)$N;p z{6YZ|wly{2kEuVn)pGTqdsE$gDSrQsgFg~ra_~!Q|FKa8!|{Ds|1d8=1{hNGABPkq zs)Il>_aHYaU5f-n>`Gz4biD%^-aa6p5k>&QMZgq6oRd2(Bv45M8tfnC|I-QyCZ4J3 zpEJKSseOI_$Un1+x-QKv2+~<2U9F%XZ+CTff*XN=Mj%z)s2GAO28*VuQZV4r9qUHG zxVfQFC^XR7-|GIPy5I7Q{6Ju(pBr_FE`f@2b;Y8D$1A1r#jY@b4@4Z|Vlkod5XeM~DC8q<;|nr;#7B_zQRci0dy1 z=|_=&#PvfKf8p*Qas34${V4K}xPHjuFWmhjuD>9pA4UG1xVV1Ue&ASyrc4O1?EWN_ z;HUjHrlkO0$RVpjQCA(ZO%MbW9*IOFm}bd(1Y*4soD{_HqR|za^M$qXa92Sel z!ocYy=0O|;bk6C=nVh9GJUBQ2eSVx(BH|FhivKkYIx6soG$h#70wLu~X}_KE!2$Mu zK6nGdCB7fM`7ca<5OM#H$-n42_{os{zr^Gxsv?N0NTdx4t&Rp(KlXpU$q$x{Z?1>M z<4{YEgMZ7(&t(@^n*W%Q`H$=1$J+gm>);>P!B6HoG*L}W)qgVonJx)51&?;~aL1_P z-EbJlC4mF310q3{fc5aeAqW^eo=E-2b@1ceh1h(w~m1`E^1`sTm*+pdG3!Up~Z00i(WK!6K> z6#xPpo%k8v0iX>)9)REB9jrmlZ}1KfHUYCW8ykStz=U^TfaVf=Td~NQ8fd`ha4x3IC2ofWl&zun3UAB?4IfKlVQqz)Ap01H%9m z4;%)~#3~?wfE+v!GYUd9SVT_10Hg>g|Ig49Fu?V*gr9)L0n7mu0pJke0NwzKiJ}1A zLpTc1#cTs=2V5B7ZX%)p0>dwu3OFnp)cYf<0v3!GjtH<606N02*a{HF0v>AXCwv7Q z6mJ7yXP|!IKtFIGAL10)9sM*{i< z2n#SQz=ps8ehCOXkPX0T02jm`5(NTDpw3}HM~DEghglKo5U99R86p5xhAw74Xn=r) zKpzk=B4`8xgn;T3K)ec+4}m~Hf>}}kEEWg_C_Mxs0i{EXeiP2O`pkO(Ho;^Zz*0ao znS%(JhJKgIGJlza4?%4JPzt7iAt8ec)JGz)(1cj}57GhV_(RS9auK0ts2QLSKVJ~a z#?Kdo!?A?5!PEsv9~Li&HRe6Q+<&liNi z@$-cRqWzA+0pW1`bTLK4#OC;h#R1`Qe8c1bbpYlFz~lJ0$Qp>h1)#!Qh?W+u|8QIz z{^z*8e-qdCEs*Zt#O)E$-{bZ$ghk)(Uklq~{-Igy-r~_d6prv`CuDwvg&C%#+27dz zKwqfw^@dlRW&&P54;j_P=lU|vwykQ-?bC|Z-p9+$op6GfdnV8M6c_B8-++W@jB?-p zBk=7_5qX`GS~C%Z;OZo~m(tN=h}W8r`DF}I{?Czv+VsRL?=GyHdue{WU%KM+=~`at zGLP-YhVZt1vn;`pgXP9|HK*Z-kV9r)D_ro?<*qL(bDQi{uPodet36`==~GQLzuz!N z@7n#D=oRBPwllJ|wN^Hhg}*yMXv+&B=6Yk*`JNa0zF_JvLp5 zCG$boM}-Dzk__li6W@gkUV0z5>@jkD&8%~SM}!smRLGhcPJ@kh{d~)_Do)?tqsf3L zxp&?4qN<@+!zv_>R{3=M=tstX4c(aOc}G3!@THw4-Y@sqiCkZ6mfUx7_R-5nK`l7t zR~O!K=|;s$Z2Zpxg$P%XcziL*BVaTHgQ@SkyZ1h{!Iim zc`y=#X?##uAD;6v@y^81_$^Gw83S5rAaccxqUsczXKrRTtoBn}yJbU65Svy$KHBnZ z*HqWw{hLYjH9b)-bGJBsLxmCi>b9rSG}5CJ3%ffC-0r_#jm_}4S?^hS9nF@KC$66U z*o+!ys`B2abLuc_%Fxbn%-WqFZjiZ#&vaDAkdj*~zV>b^`=B`@O&V4_F{IC@fA2(R zLSxXJyB*06cg(#@qGS5K&wOY}WAw_(!9wXT=kon|`~9p%0=l0xU#sg;Z+Oo8JXi3B zc)Q*H>O}Gb8#g+;n>Jhbb(g7Y`*yN zc%rCVlI)CA#Ox&cIgPQ$uw7PsBRP%4n_?`>7i>ogO43_JDgUHy8HIplji9QEK0Y| ztalU$yO}L7nvl}>CefoT*m_7oXsnq&R+6AKL5w_ige03(pkj#-HmPLoKA~o)q^irG z_rBnsNNt==b5YT$SyX!7$uJL|LK?mBr2pA4hm#8&tJp<}7zgW&r_Q4}50v+aUFJId zhQ7HUe)v+z-Z(U;1ihkC3Rfp-s9W#h9jSey9JTk7$|KIQ@>%404XHuabmUQFb)EQN z#?F1jMio9T?&!2$5iHxZ)-21ptUX^s<94vo5+?6js7ef0CtJ=<$vz;1hB^@@GzM~e<0eDNy1JoDI5pBB9mtprN)LQhJvdRYbG@CdJY z#;Y}ZB-s|^2Kr?`@5GsMPfa^L$*ANrxN>ieqs>00&(`H1I*5l>Ylt-J2T7%9wX8@} z>8>GEdWmmKtU7LO-0pJmdQxGoEUP!#@hj%pgRb+&4>a*?uO(6?2J>&{eP$ObFk9a7 zW(&(zD;{%A;SR0~vgJB7`g~totH=Dllaa3&M|P|{hH8Vm z2?a+w-QVe-b@Ze0F#ph&Qor5+sUwxnqqfOsDLNg+L-I3pR{sK;&w=GN)wd7TDBg)I zn+VSB@%L*}jK7%qYV^ca^lAw1dn(w+XTX~V>Ii4xmm3Xnfke|*O zHeIwjV6|}s@xtED7T&F64J?Vgmur$6^6U+Y+)O@PLAdFgl$%#@8y`$o=LLr|>rvYxRF5e<&Urz|{mIrtFy;F1#*d0rw{E-P(85@~C+^-_5ei4{{&TXl>&RfscFn3|h7&wpGsG zl)YKks+~WkcjL+Oy*pywcLeKm z)*y4s<_bZ;wXxEtFhc@v+bIdKzMn!sZeXlS#hfh9eTfn_tD5ZUy^lb zDJR~gy01-ewp1B@1=%akw1W9ul{+;Eefvt4ps0Mdf_+1%V<@eJ*~TxQK?9zCL6^ z9p;GmF3p_X6DtmFt4Xg^FFZGJb>hgjo*oLTV{lOa#fmbsQ&F<{=W0_brrIgkiw~l! z&;i#Cl+)ug^)kA%w!kHWmb1rfm(uqDqUeMc2OF!7jS3Mh?ek-afv1xE5s$7-- z_Os0qdP6rJTpO!BF`4As?%`aLxnX+4bfW z@z4OCCy5M<*p{ufty1&2<(o1NV{w+S4Aaymn^~6OmcRl&=LO5h6x!gPFXrq5hC2oV zhqAg}vVI-8b=JvxhV{|;>TP=Cm8J5#zFz6y!}@X~3bXqiUT4N?_MAiPrB{Wf#UHME z_0IR@`)s)~D|`U%wjTE!<4Rk1MxN*DeJ>Zjh&&ED%2i|)2mhOw>>b^zMOnjND4xSj zpXNADJHnR@u|z%jdiiRag0e}h{Fj5(609G;vhuJvw|`XrlQ@)b0U7{-a4{Hzi9h=z zc!C*4fy847KLuzIV92Gycs#J+m}Q_S4Pdc-Cxa&fL*;wUH!?&d0k}D!4^$ewK!2wL z@KJ&LWGUyjhz&IEzXipLCXx{P`uYR}hCoCjFgiL&lD;ku9F`!F$V7xbMhl>cGC^NK QXMTwdL4gB7KY&^P2ar)AF#rGn literal 0 HcmV?d00001 diff --git a/Supporting_Documentation/skein1.3.pdf b/Supporting_Documentation/skein1.3.pdf new file mode 100644 index 0000000000000000000000000000000000000000..844ba9e925e9d4b63408568f81c38b64f59f62cf GIT binary patch literal 479368 zcmb@u1#l!uvMnlRZZT7fnVFfn#mwAdW@cuVTFlJM%*@QJ7E8ZpcJ}V<+xc(je*0x* zR$*!B8JTY39_JkQtgmuHBGmLWOi*8^=LeRd81d=wZS>9YxwxQcrHrgi98B?9m>KZ@ z@qwZhF|%|qvcsnpvD9-g5;8KdF*JhW;eoPuurt!Lf^u2?kuYYxMu#B$_zXq;6<=R0 zHeon(r_i;*3|Q3)RTwQmN~7Fpb~?Yr7wG*Vu5f?V7YY1$5Emn>#iY{MI4xUCGna}F zL^qCvzY_vs&&M9uFa}smE@sN#Mo)W`Y!Cz1qzocoj^YB(Mo97Hm|~as++l_yAKnYm zLfI<+>Um=T1=@0lNjwwc0N;Qt+YkoKNMzb)C>AX+z@5Wwqp(<>ptt!#GqA~>Yb zbn+k*AA;?0EF~_H0D!b9f(bI;pvSYUgi54zJz-LW`VXTe)sn0We7v6dK{g+r8*P*Z zv+=xdo+hWTY*&tReOy5RLmpytGB&liar}CVd({zuB1l-ee2|YLbt*IgKkM<@2z$kw z&1LdsgujJVhIW}gFiI>>PW+S=B7hN&{%p2as}Sa11tvtsPl5qW#{n;H)nAa}sMsYV zF?*^X1Gi^il@s`K#=&iEs_qaQp38*^yMh#Ct=q2Z*wQ^k~*pjhB5)KkQ)RuKmdyoA$Nz2Z zD?rgI7}?u6+8G$xJ;j{gI77q{p zAM^ZX_0JFz`E~7IvGglo=vncZemn8kkPV;tj}ap?KFdFcEcmRy4}XOdt(={WfufNE zzQ(V66cWLwRWfpMz}NZ}Y5z0FpC|bvPTBs5QhfaX9;d(nzdnHf=_C5zp75VH|Cdgp z|HCQ&@)G-Rw+%)6zx2{?R{xyn|9ISgd(hvU_RsVFcu)pL2A02@kOEc5;;`CaH(n`? zlT{YPE+2{1CLGVt3hWdsUD_pTbLJEJn>8v3H5w0Yhrqk}AH@t-v|~Z|V*q@CW4gFI zU7krwF5^5B4~*O%n>)&8+bFD6zGtm-H#&btX?xeC&mHqn*Amx;8>yhoyee*Cd27Et z-dxdc%{z;?o9~kB4BTum95lc#c78UCoej74`3*bn12rid(s3&a90v)iQYX14343EHyIABn|i6uI2IuKs0`gH@`C zapSp6Yoc|&jET0ZE4Q{AR=$l{&#Y-m+vIpbR3baf%kI)ZygT)_^tKn1Dc44S7Rv;O9`Y zxV+U4Zw@L9!$ij6U+IiET^xG7t|iXUBzrp$$aZ_*rLA)MuF*i&l$-J4iq4Ks#2Fmj z6QK&(-htC-fA*FApaKOjH4Jl_74cWdi?PrUN}8TOO&~cj5yi2kUy20+w(f zbf2%RyfP4}+R8A&Xxf()!Bc#7^jNwOnkYVE^bibD0f0S?Flso$aikncGV5jTc(P;M zyS(6~s~%JMxMD|wT=6C>vdl|&sjCTTe8ydxm4}O(QVCkeX5sXH)9mt1Wb+41$oMqM zvN@N=m%yBz=@ZM^(*CR3<6J8o-SETVH6agI99E2>>jmrGY*HmOMv60)rkJ%eX0!x| zrOZMP5P}zbs0J?w7E?WfKq2QWe0{;bZ8*-zm!0m9wOO3yF-$)O-rRm|%7AAPFHm^b z)R503qlnw!om>)^Pao=3IADe%V`vgP^6ji`KNAxzfbjz9%>r9I?cBCn3--$W>JY8f zFJxhi^lLu@FlCD(;}*7?&c_cC?Y}Aub?SSjY-5CiF_JA6?~(i7N5EPTladjQAXanDOH zJ{jSB%SGRJDrG*PxvPId)S(n}siqD>(pXQm0Mh^7AGbnkn)HCKTBXKoz=q){Y^!q+ z*2hYL!k0%8nMv?9WeI3Mtgrz>)FW-HRp;;>tg_V*eJ2kAcr-;+Ce2K1WHQf|alvzZ z!x1SU8#skVnqQDjg55nC0oIfn^vF*C{^xS)gr=JhL^xuZ3%H`gswZal?nTT|zFP#2jW)?*e zjN?3kWGZ6i)deGb-Xu(XO*m-6*9e0PQ(sq5@WZ(gxECg?z6U~UL|YrMFsBi|m;fpXC`l+U^$ zPYH#GK!Qc{ZN#DgM>R91$@=kHt$Bo}?Jdb8=zGHQ zoO72jkYVXyz<5$N&o?ne7AbN^<;>GEf6P4}A;sd)njGYd8E32V&=R_WeG4}_EM%Ru zej^P!YK}|(g2f^3+aQoZsd*}Uvf7|x0-JUr9wFJGE9$ThmTG(H&gu419>ZPvNsIqL zC|kO2YnR_?rZ0?mIh=nPL~-XoB0CjGlH&meOW4=e zQj3&Dt?t(oE3+=?Rz>lU%}>L4^oV!RLOh#bqD;fd;@n%EDRGxpw8D9M_)5f5?MV{KJrO3yljnJBqk3oK3^1$UIGMw`js(LTh;Tqj}5TXKcooQ z=y^)TzM#*pJRgYnp8yL$8BOLq1oaKe)A>iS^h`7 zJVhbQ1~n)F7|-{m4*)JnYDGD46!iuAvoxW4OH|{uelIS3M90Pr^7_eztv7H4$(lt2 zbxpcM=mv!@I~{s&D-I&o5AOAGcK=LOZ+&}9`lslZ1AiP)UYbu=j(1cM`k$fo&R%R% z4XH2AX+O%^X)cq@*2_&>y!h_c!4}AFl3lRPdm;dv+f9;Xq;rhRM_cN7)2m7% zK>KaX2cLJ&x(>24nl}R;@=gxBM3tb8+SnbT~!>%C8SGirzOny>ZRF2_xEQxmM#S`@^A=J?GQtd*MkMZCu_37 zU+l*-Fw+r#2P$Mf^cs~!Nivi&??Khmsf;=(ss2Qo^P6VI8)5))&hE@urcto z+fz;GGZX8$q#wF~Hvq?^NuTh}=dKTm69BH{Ztcn zhuRx#9*Qn#D!W(fPkX*YtNGDM*Z9(^%7STR2%WMl_TW+ZifA@yiU)U2h2^6O7U@m# zfzVyy4oO>r)ZM*{escJZP&G?9G3{Ct9=!N{>Cj{6NHO$I1v9o>O2Y19VHBcZaZe?S zG{j95;3J<*YHFsKEj}Noe&qcsiFHN`GQ zz)0+O#gcSK3a!1bQxacVq>C0k^ax}(9#H9&Imbt{wb(rR0Q|O20@yN_4eN@&D4h1M z=xM#`E@UI+Y@6ga^989HK6Dn@71;}wc-z1K7_nBn_s4ga5$4@ks9Tp4Ek+q_7(O-L z&iA4ssyI$RbW5Y%PFTNb=#b8k?IZkJPRT8fwG zG7TpwYHVRvWSdGBi~g94b-2kyCcufTfd+{=X^l(cC^etH{@u`e;--I+l5v?_tR&^W z?!+Yxg<36g{M{}0QN%1^_=}@OxujkB{y}ds4Zzy{WdE9dAeh)Dic5PX;KcJJui0}y zpAC!kB#~=X{so2|xFSv%;ppeYGi;T`LRcsvzGt$Zpi45wj z0aw&)W<7{`o6Q{=wm417V^21<8I_k4-frp^TO3m@j#+_YFB+3~s*YQTxo^ThhHzUn zeq2g_Gd8Y0tvMVXPZCcmY8)uIJ=XM=m*NnR;EF8B!qz^S=K`6&VqWd8y-Br<_6S#d zyD^8rde!05>_}4&A0uEQoUBE-1i&yj53O_FtNR2@;p36|i=E-mJpC{8;O}Ya-({13 zC?fw`MS%V{vw!BnzuX!AB$xbCL-|{q1jC=Vw|`~MKlX|Lu9jS+{!%sBkb@pwp{_*K z#SXOg8DvO00Oxei^TJQ4l+r^3SK|mq$Y+c2v{8l%PnqI@9*hh|TIJ@4>_0wZFr%il zwQc}GN&tZ123dh86~~A1feAr{a8ltuLd8b+K!5`BW|q)z#88bgbfxmzXEd-6oa}M( zrEn}dPr^&x(qQgHWG-4BZ#OsX$hd;XFw7T|Kz^pwfMO9