diff --git a/x86/sha1-compress.nlms b/x86/sha1-compress.nlms new file mode 100644 index 0000000000000000000000000000000000000000..0f0ec49f578e16522c4455de177a7708a1be8ade --- /dev/null +++ b/x86/sha1-compress.nlms @@ -0,0 +1,1684 @@ +/* sha1 compression function */ +/* !!! arch x86_32 */ +/* !!! signature ppn */ +/* !!! count 0 mod 80 */ + +/* Run with loopmix -f -s -m -i -b sha1-f1-noexpand.nlms */ + +changequote(<,>)dnl + +define(<SWAP>, < + mov OFFSET($1)(T2), $2 + bswap $2 + mov $2, OFFSET($1) (DATA) +>)dnl + +dnl ROUND_F1(a, b, c, d, e, i) +define(<ROUND_F1>, < + mov OFFSET(eval($6 % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 2) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 8) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 13) % 16)) (DATA), T1 + rol <$>1, T1 + mov T1, OFFSET(eval($6 % 16)) (DATA) + mov $4, T2 + xor $3, T2 + and $2, T2 + xor $4, T2 + lea K1VALUE (T1, T2), T2 + rol <$>30, $2 + mov $1, T1 + rol <$>5, T1 + add T1, $5 + add T2, $5 +>) + +dnl ROUND_F1_NOEXP(a, b, c, d, e, i) +define(<ROUND_F1_NOEXP>, < + mov $4, T2 + xor $3, T2 + mov $1, T1 + and $2, T2 + add OFFSET($6) (DATA), $5 + xor $4, T2 + add T2, $5 + rol <$>30, $2 + rol <$>5, T1 + lea K1VALUE (T1, $5), $5 +>) + +dnl ROUND_F2(a, b, c, d, e, i, k) +define(<ROUND_F2>, < + mov OFFSET(eval($6 % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 2) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 8) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 13) % 16)) (DATA), T1 + rol <$>1, T1 + mov T1, OFFSET(eval($6 % 16)) (DATA) + mov $4, T2 + xor $3, T2 + xor $2, T2 + lea $7 (T1, T2), T2 + rol <$>30, $2 + mov $1, T1 + rol <$>5, T1 + add T1, $5 + add T2, $5 +>) + +dnl ROUND_F3(a, b, c, d, e, i) +define(<ROUND_F3>, < + mov OFFSET(eval($6 % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 2) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 8) % 16)) (DATA), T1 + xor OFFSET(eval(($6 + 13) % 16)) (DATA), T1 + rol <$>1, T1 + mov T1, OFFSET(eval($6 % 16)) (DATA) + mov $4, T2 + and $3, T2 + lea K3VALUE (T1, T2), T1 + mov $4, T2 + xor $3, T2 + and $2, T2 + add T1, $5 + rol <$>30, $2 + mov $1, T1 + rol <$>5, T1 + add T1, $5 + add T2, $5 +>) + +dnl Loop body, expanded for the loop +dnl mov 88(%esp), T2 +dnl +dnl SWAP( 0, %eax) SWAP( 1, %ebx) SWAP( 2, %ecx) SWAP( 3, %edx) +dnl SWAP( 4, %eax) SWAP( 5, %ebx) SWAP( 6, %ecx) SWAP( 7, %edx) +dnl SWAP( 8, %eax) SWAP( 9, %ebx) SWAP(10, %ecx) SWAP(11, %edx) +dnl SWAP(12, %eax) SWAP(13, %ebx) SWAP(14, %ecx) SWAP(15, %edx) +dnl +dnl mov 84(%esp),T1 +dnl mov (T1), SA +dnl mov 4(T1), SB +dnl mov 8(T1), SC +dnl mov 12(T1), SD +dnl mov 16(T1), SE +dnl +dnl ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 0) +dnl ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 1) +dnl ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 2) +dnl ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 3) +dnl ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 4) +dnl +dnl ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 5) +dnl ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 6) +dnl ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 7) +dnl ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 8) +dnl ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 9) +dnl +dnl ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 10) +dnl ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 11) +dnl ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 12) +dnl ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 13) +dnl ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 14) +dnl +dnl ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 15) +dnl ROUND_F1(SE, SA, SB, SC, SD, 16) +dnl ROUND_F1(SD, SE, SA, SB, SC, 17) +dnl ROUND_F1(SC, SD, SE, SA, SB, 18) +dnl ROUND_F1(SB, SC, SD, SE, SA, 19) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 20, K2VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 21, K2VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 22, K2VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 23, K2VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 24, K2VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 25, K2VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 26, K2VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 27, K2VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 28, K2VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 29, K2VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 30, K2VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 31, K2VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 32, K2VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 33, K2VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 34, K2VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 35, K2VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 36, K2VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 37, K2VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 38, K2VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 39, K2VALUE) +dnl +dnl ROUND_F3(SA, SB, SC, SD, SE, 40) +dnl ROUND_F3(SE, SA, SB, SC, SD, 41) +dnl ROUND_F3(SD, SE, SA, SB, SC, 42) +dnl ROUND_F3(SC, SD, SE, SA, SB, 43) +dnl ROUND_F3(SB, SC, SD, SE, SA, 44) +dnl +dnl ROUND_F3(SA, SB, SC, SD, SE, 45) +dnl ROUND_F3(SE, SA, SB, SC, SD, 46) +dnl ROUND_F3(SD, SE, SA, SB, SC, 47) +dnl ROUND_F3(SC, SD, SE, SA, SB, 48) +dnl ROUND_F3(SB, SC, SD, SE, SA, 49) +dnl +dnl ROUND_F3(SA, SB, SC, SD, SE, 50) +dnl ROUND_F3(SE, SA, SB, SC, SD, 51) +dnl ROUND_F3(SD, SE, SA, SB, SC, 52) +dnl ROUND_F3(SC, SD, SE, SA, SB, 53) +dnl ROUND_F3(SB, SC, SD, SE, SA, 54) +dnl +dnl ROUND_F3(SA, SB, SC, SD, SE, 55) +dnl ROUND_F3(SE, SA, SB, SC, SD, 56) +dnl ROUND_F3(SD, SE, SA, SB, SC, 57) +dnl ROUND_F3(SC, SD, SE, SA, SB, 58) +dnl ROUND_F3(SB, SC, SD, SE, SA, 59) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 60, K4VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 61, K4VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 62, K4VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 63, K4VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 64, K4VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 65, K4VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 66, K4VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 67, K4VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 68, K4VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 69, K4VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 70, K4VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 71, K4VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 72, K4VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 73, K4VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 74, K4VALUE) +dnl +dnl ROUND_F2(SA, SB, SC, SD, SE, 75, K4VALUE) +dnl ROUND_F2(SE, SA, SB, SC, SD, 76, K4VALUE) +dnl ROUND_F2(SD, SE, SA, SB, SC, 77, K4VALUE) +dnl ROUND_F2(SC, SD, SE, SA, SB, 78, K4VALUE) +dnl ROUND_F2(SB, SC, SD, SE, SA, 79, K4VALUE) + +define(<SA>,<%eax>) +define(<SB>,<%ebx>) +define(<SC>,<%ecx>) +define(<SD>,<%edx>) +define(<SE>,<%ebp>) +define(<DATA>,<%esp>) +define(<T1>,<%edi>) +define(<T2>,<%esi>) +define(<COUNT>, <92(%esp)>) + +define(<K1VALUE>, <0x5A827999>) +define(<K2VALUE>, <0x6ED9EBA1>) +define(<K3VALUE>, <0x8F1BBCDC>) +define(<K4VALUE>, <0xCA62C1D6>) + +dnl Expands to 4*i, or to the empty string if i is zero +define(<OFFSET>, <ifelse($1,0,,eval(4*$1))>) + +.text +.p2align 4,15 +.globl loop_entry +loop_entry: + push %ebx + push %ebp + push %esi + push %edi + + sub $64, %esp + + cmp $0,COUNT + jz loop_end +.align 32 +loop_begin: + + mov 88(%esp), T2 + + + mov OFFSET(0)(T2), %eax + bswap %eax + mov %eax, OFFSET(0) (DATA) + + mov OFFSET(1)(T2), %ebx + bswap %ebx + mov %ebx, OFFSET(1) (DATA) + + mov OFFSET(2)(T2), %ecx + bswap %ecx + mov %ecx, OFFSET(2) (DATA) + + mov OFFSET(3)(T2), %edx + bswap %edx + mov %edx, OFFSET(3) (DATA) + + + mov OFFSET(4)(T2), %eax + bswap %eax + mov %eax, OFFSET(4) (DATA) + + mov OFFSET(5)(T2), %ebx + bswap %ebx + mov %ebx, OFFSET(5) (DATA) + + mov OFFSET(6)(T2), %ecx + bswap %ecx + mov %ecx, OFFSET(6) (DATA) + + mov OFFSET(7)(T2), %edx + bswap %edx + mov %edx, OFFSET(7) (DATA) + + + mov OFFSET(8)(T2), %eax + bswap %eax + mov %eax, OFFSET(8) (DATA) + + mov OFFSET(9)(T2), %ebx + bswap %ebx + mov %ebx, OFFSET(9) (DATA) + + mov OFFSET(10)(T2), %ecx + bswap %ecx + mov %ecx, OFFSET(10) (DATA) + + mov OFFSET(11)(T2), %edx + bswap %edx + mov %edx, OFFSET(11) (DATA) + + + mov OFFSET(12)(T2), %eax + bswap %eax + mov %eax, OFFSET(12) (DATA) + + mov OFFSET(13)(T2), %ebx + bswap %ebx + mov %ebx, OFFSET(13) (DATA) + + mov OFFSET(14)(T2), %ecx + bswap %ecx + mov %ecx, OFFSET(14) (DATA) + + mov OFFSET(15)(T2), %edx + bswap %edx + mov %edx, OFFSET(15) (DATA) + + + mov 84(%esp),T1 + mov (T1), SA + mov 4(T1), SB + mov 8(T1), SC + mov 12(T1), SD + mov 16(T1), SE + + + mov SD, T2 + xor SC, T2 + mov SA, T1 + and SB, T2 + add OFFSET(0) (DATA), SE + xor SD, T2 + add T2, SE + rol $30, SB + rol $5, T1 + lea K1VALUE (T1, SE), SE + + + mov SC, T2 + xor SB, T2 + mov SE, T1 + and SA, T2 + add OFFSET(1) (DATA), SD + xor SC, T2 + add T2, SD + rol $30, SA + rol $5, T1 + lea K1VALUE (T1, SD), SD + + + mov SB, T2 + xor SA, T2 + mov SD, T1 + and SE, T2 + add OFFSET(2) (DATA), SC + xor SB, T2 + add T2, SC + rol $30, SE + rol $5, T1 + lea K1VALUE (T1, SC), SC + + + mov SA, T2 + xor SE, T2 + mov SC, T1 + and SD, T2 + add OFFSET(3) (DATA), SB + xor SA, T2 + add T2, SB + rol $30, SD + rol $5, T1 + lea K1VALUE (T1, SB), SB + + + mov SE, T2 + xor SD, T2 + mov SB, T1 + and SC, T2 + add OFFSET(4) (DATA), SA + xor SE, T2 + add T2, SA + rol $30, SC + rol $5, T1 + lea K1VALUE (T1, SA), SA + + + + mov SD, T2 + xor SC, T2 + mov SA, T1 + and SB, T2 + add OFFSET(5) (DATA), SE + xor SD, T2 + add T2, SE + rol $30, SB + rol $5, T1 + lea K1VALUE (T1, SE), SE + + + mov SC, T2 + xor SB, T2 + mov SE, T1 + and SA, T2 + add OFFSET(6) (DATA), SD + xor SC, T2 + add T2, SD + rol $30, SA + rol $5, T1 + lea K1VALUE (T1, SD), SD + + + mov SB, T2 + xor SA, T2 + mov SD, T1 + and SE, T2 + add OFFSET(7) (DATA), SC + xor SB, T2 + add T2, SC + rol $30, SE + rol $5, T1 + lea K1VALUE (T1, SC), SC + + + mov SA, T2 + xor SE, T2 + mov SC, T1 + and SD, T2 + add OFFSET(8) (DATA), SB + xor SA, T2 + add T2, SB + rol $30, SD + rol $5, T1 + lea K1VALUE (T1, SB), SB + + + mov SE, T2 + xor SD, T2 + mov SB, T1 + and SC, T2 + add OFFSET(9) (DATA), SA + xor SE, T2 + add T2, SA + rol $30, SC + rol $5, T1 + lea K1VALUE (T1, SA), SA + + + + mov SD, T2 + xor SC, T2 + mov SA, T1 + and SB, T2 + add OFFSET(10) (DATA), SE + xor SD, T2 + add T2, SE + rol $30, SB + rol $5, T1 + lea K1VALUE (T1, SE), SE + + + mov SC, T2 + xor SB, T2 + mov SE, T1 + and SA, T2 + add OFFSET(11) (DATA), SD + xor SC, T2 + add T2, SD + rol $30, SA + rol $5, T1 + lea K1VALUE (T1, SD), SD + + + mov SB, T2 + xor SA, T2 + mov SD, T1 + and SE, T2 + add OFFSET(12) (DATA), SC + xor SB, T2 + add T2, SC + rol $30, SE + rol $5, T1 + lea K1VALUE (T1, SC), SC + + + mov SA, T2 + xor SE, T2 + mov SC, T1 + and SD, T2 + add OFFSET(13) (DATA), SB + xor SA, T2 + add T2, SB + rol $30, SD + rol $5, T1 + lea K1VALUE (T1, SB), SB + + + mov SE, T2 + xor SD, T2 + mov SB, T1 + and SC, T2 + add OFFSET(14) (DATA), SA + xor SE, T2 + add T2, SA + rol $30, SC + rol $5, T1 + lea K1VALUE (T1, SA), SA + + + + mov SD, T2 + xor SC, T2 + mov SA, T1 + and SB, T2 + add OFFSET(15) (DATA), SE + xor SD, T2 + add T2, SE + rol $30, SB + rol $5, T1 + lea K1VALUE (T1, SE), SE + + + mov OFFSET(0) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(13) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(0) (DATA) + mov SC, T2 + xor SB, T2 + and SA, T2 + xor SC, T2 + lea K1VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(1) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(14) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(1) (DATA) + mov SB, T2 + xor SA, T2 + and SE, T2 + xor SB, T2 + lea K1VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(2) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(15) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(2) (DATA) + mov SA, T2 + xor SE, T2 + and SD, T2 + xor SA, T2 + lea K1VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(3) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(0) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(3) (DATA) + mov SE, T2 + xor SD, T2 + and SC, T2 + xor SE, T2 + lea K1VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(4) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(1) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(4) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(5) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(2) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(5) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(6) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(3) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(6) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(7) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(4) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(7) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(8) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(5) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(8) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(9) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(6) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(9) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(10) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(7) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(10) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(11) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(8) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(11) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(12) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(9) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(12) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(13) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(10) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(13) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(14) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(11) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(14) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(15) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(12) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(15) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(0) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(13) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(0) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(1) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(14) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(1) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(2) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(15) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(2) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(3) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(0) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(3) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(4) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(1) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(4) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(5) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(2) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(5) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(6) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(3) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(6) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(7) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(4) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(7) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K2VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(8) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(5) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(8) (DATA) + mov SD, T2 + and SC, T2 + lea K3VALUE (T1, T2), T1 + mov SD, T2 + xor SC, T2 + and SB, T2 + add T1, SE + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(9) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(6) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(9) (DATA) + mov SC, T2 + and SB, T2 + lea K3VALUE (T1, T2), T1 + mov SC, T2 + xor SB, T2 + and SA, T2 + add T1, SD + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(10) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(7) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(10) (DATA) + mov SB, T2 + and SA, T2 + lea K3VALUE (T1, T2), T1 + mov SB, T2 + xor SA, T2 + and SE, T2 + add T1, SC + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(11) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(8) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(11) (DATA) + mov SA, T2 + and SE, T2 + lea K3VALUE (T1, T2), T1 + mov SA, T2 + xor SE, T2 + and SD, T2 + add T1, SB + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(12) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(9) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(12) (DATA) + mov SE, T2 + and SD, T2 + lea K3VALUE (T1, T2), T1 + mov SE, T2 + xor SD, T2 + and SC, T2 + add T1, SA + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(13) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(10) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(13) (DATA) + mov SD, T2 + and SC, T2 + lea K3VALUE (T1, T2), T1 + mov SD, T2 + xor SC, T2 + and SB, T2 + add T1, SE + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(14) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(11) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(14) (DATA) + mov SC, T2 + and SB, T2 + lea K3VALUE (T1, T2), T1 + mov SC, T2 + xor SB, T2 + and SA, T2 + add T1, SD + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(15) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(12) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(15) (DATA) + mov SB, T2 + and SA, T2 + lea K3VALUE (T1, T2), T1 + mov SB, T2 + xor SA, T2 + and SE, T2 + add T1, SC + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(0) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(13) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(0) (DATA) + mov SA, T2 + and SE, T2 + lea K3VALUE (T1, T2), T1 + mov SA, T2 + xor SE, T2 + and SD, T2 + add T1, SB + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(1) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(14) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(1) (DATA) + mov SE, T2 + and SD, T2 + lea K3VALUE (T1, T2), T1 + mov SE, T2 + xor SD, T2 + and SC, T2 + add T1, SA + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(2) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(15) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(2) (DATA) + mov SD, T2 + and SC, T2 + lea K3VALUE (T1, T2), T1 + mov SD, T2 + xor SC, T2 + and SB, T2 + add T1, SE + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(3) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(0) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(3) (DATA) + mov SC, T2 + and SB, T2 + lea K3VALUE (T1, T2), T1 + mov SC, T2 + xor SB, T2 + and SA, T2 + add T1, SD + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(4) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(1) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(4) (DATA) + mov SB, T2 + and SA, T2 + lea K3VALUE (T1, T2), T1 + mov SB, T2 + xor SA, T2 + and SE, T2 + add T1, SC + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(5) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(2) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(5) (DATA) + mov SA, T2 + and SE, T2 + lea K3VALUE (T1, T2), T1 + mov SA, T2 + xor SE, T2 + and SD, T2 + add T1, SB + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(6) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(3) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(6) (DATA) + mov SE, T2 + and SD, T2 + lea K3VALUE (T1, T2), T1 + mov SE, T2 + xor SD, T2 + and SC, T2 + add T1, SA + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(7) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(4) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(7) (DATA) + mov SD, T2 + and SC, T2 + lea K3VALUE (T1, T2), T1 + mov SD, T2 + xor SC, T2 + and SB, T2 + add T1, SE + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(8) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(5) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(8) (DATA) + mov SC, T2 + and SB, T2 + lea K3VALUE (T1, T2), T1 + mov SC, T2 + xor SB, T2 + and SA, T2 + add T1, SD + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(9) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(6) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(9) (DATA) + mov SB, T2 + and SA, T2 + lea K3VALUE (T1, T2), T1 + mov SB, T2 + xor SA, T2 + and SE, T2 + add T1, SC + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(10) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(7) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(10) (DATA) + mov SA, T2 + and SE, T2 + lea K3VALUE (T1, T2), T1 + mov SA, T2 + xor SE, T2 + and SD, T2 + add T1, SB + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(11) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(8) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(11) (DATA) + mov SE, T2 + and SD, T2 + lea K3VALUE (T1, T2), T1 + mov SE, T2 + xor SD, T2 + and SC, T2 + add T1, SA + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(12) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(9) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(12) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(13) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(10) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(13) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(14) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(11) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(14) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(15) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(12) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(15) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(0) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(13) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(0) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(1) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(14) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(1) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(2) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(15) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(2) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(3) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(0) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(3) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(4) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(1) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(4) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(5) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(2) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(5) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(6) (DATA), T1 + xor OFFSET(8) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(3) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(6) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(7) (DATA), T1 + xor OFFSET(9) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(4) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(7) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(8) (DATA), T1 + xor OFFSET(10) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(5) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(8) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(9) (DATA), T1 + xor OFFSET(11) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(6) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(9) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(10) (DATA), T1 + xor OFFSET(12) (DATA), T1 + xor OFFSET(2) (DATA), T1 + xor OFFSET(7) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(10) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + + mov OFFSET(11) (DATA), T1 + xor OFFSET(13) (DATA), T1 + xor OFFSET(3) (DATA), T1 + xor OFFSET(8) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(11) (DATA) + mov SD, T2 + xor SC, T2 + xor SB, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SB + mov SA, T1 + rol $5, T1 + add T1, SE + add T2, SE + + + mov OFFSET(12) (DATA), T1 + xor OFFSET(14) (DATA), T1 + xor OFFSET(4) (DATA), T1 + xor OFFSET(9) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(12) (DATA) + mov SC, T2 + xor SB, T2 + xor SA, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SA + mov SE, T1 + rol $5, T1 + add T1, SD + add T2, SD + + + mov OFFSET(13) (DATA), T1 + xor OFFSET(15) (DATA), T1 + xor OFFSET(5) (DATA), T1 + xor OFFSET(10) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(13) (DATA) + mov SB, T2 + xor SA, T2 + xor SE, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SE + mov SD, T1 + rol $5, T1 + add T1, SC + add T2, SC + + + mov OFFSET(14) (DATA), T1 + xor OFFSET(0) (DATA), T1 + xor OFFSET(6) (DATA), T1 + xor OFFSET(11) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(14) (DATA) + mov SA, T2 + xor SE, T2 + xor SD, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SD + mov SC, T1 + rol $5, T1 + add T1, SB + add T2, SB + + + mov OFFSET(15) (DATA), T1 + xor OFFSET(1) (DATA), T1 + xor OFFSET(7) (DATA), T1 + xor OFFSET(12) (DATA), T1 + rol $1, T1 + mov T1, OFFSET(15) (DATA) + mov SE, T2 + xor SD, T2 + xor SC, T2 + lea K4VALUE (T1, T2), T2 + rol $30, SC + mov SB, T1 + rol $5, T1 + add T1, SA + add T2, SA + + + sub $80, COUNT + jnz loop_begin + +loop_end: + add $64, %esp + pop %edi + pop %esi + pop %ebp + pop %ebx + ret +