diff --git a/x86/sha1-compress.nlms b/x86/sha1-compress.nlms
new file mode 100644
index 0000000000000000000000000000000000000000..0f0ec49f578e16522c4455de177a7708a1be8ade
--- /dev/null
+++ b/x86/sha1-compress.nlms
@@ -0,0 +1,1684 @@
+/* sha1 compression function */
+/* !!! arch x86_32 */
+/* !!! signature ppn */
+/* !!! count 0 mod 80 */
+
+/* Run with loopmix -f -s -m -i -b sha1-f1-noexpand.nlms */
+
+changequote(<,>)dnl
+
+define(<SWAP>, <
+	mov	OFFSET($1)(T2), $2
+	bswap	$2
+	mov	$2, OFFSET($1) (DATA)
+>)dnl
+
+dnl ROUND_F1(a, b, c, d, e, i)
+define(<ROUND_F1>, <
+	mov	OFFSET(eval($6 % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  2) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  8) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 + 13) % 16)) (DATA), T1
+	rol	<$>1, T1
+	mov	T1, OFFSET(eval($6 % 16)) (DATA)
+	mov	$4, T2
+	xor	$3, T2
+	and	$2, T2
+	xor	$4, T2
+	lea	K1VALUE (T1, T2), T2
+	rol	<$>30, $2
+	mov	$1, T1
+	rol	<$>5, T1
+	add	T1, $5
+	add	T2, $5
+>)
+
+dnl ROUND_F1_NOEXP(a, b, c, d, e, i)
+define(<ROUND_F1_NOEXP>, <
+	mov	$4, T2
+	xor	$3, T2
+	mov	$1, T1
+	and	$2, T2
+	add	OFFSET($6) (DATA), $5
+	xor	$4, T2
+	add	T2, $5
+	rol	<$>30, $2
+	rol	<$>5, T1
+	lea	K1VALUE (T1, $5), $5
+>)
+
+dnl ROUND_F2(a, b, c, d, e, i, k)
+define(<ROUND_F2>, <
+	mov	OFFSET(eval($6 % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  2) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  8) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 + 13) % 16)) (DATA), T1
+	rol	<$>1, T1
+	mov	T1, OFFSET(eval($6 % 16)) (DATA)
+	mov	$4, T2
+	xor	$3, T2
+	xor	$2, T2
+	lea	$7 (T1, T2), T2
+	rol	<$>30, $2
+	mov	$1, T1
+	rol	<$>5, T1
+	add	T1, $5
+	add	T2, $5
+>)
+
+dnl ROUND_F3(a, b, c, d, e, i)
+define(<ROUND_F3>, <
+	mov	OFFSET(eval($6 % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  2) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 +  8) % 16)) (DATA), T1
+	xor	OFFSET(eval(($6 + 13) % 16)) (DATA), T1
+	rol	<$>1, T1
+	mov	T1, OFFSET(eval($6 % 16)) (DATA)
+	mov	$4, T2
+	and	$3, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	$4, T2
+	xor	$3, T2
+	and	$2, T2
+	add	T1, $5
+	rol	<$>30, $2
+	mov	$1, T1
+	rol	<$>5, T1
+	add	T1, $5
+	add	T2, $5
+>)
+
+dnl Loop body, expanded for the loop
+dnl 	mov	88(%esp), T2
+dnl 
+dnl 	SWAP( 0, %eax) SWAP( 1, %ebx) SWAP( 2, %ecx) SWAP( 3, %edx)
+dnl 	SWAP( 4, %eax) SWAP( 5, %ebx) SWAP( 6, %ecx) SWAP( 7, %edx)
+dnl 	SWAP( 8, %eax) SWAP( 9, %ebx) SWAP(10, %ecx) SWAP(11, %edx)
+dnl 	SWAP(12, %eax) SWAP(13, %ebx) SWAP(14, %ecx) SWAP(15, %edx)
+dnl 
+dnl 	mov	84(%esp),T1
+dnl 	mov	(T1),   SA
+dnl 	mov	4(T1),  SB
+dnl 	mov	8(T1),  SC
+dnl 	mov	12(T1), SD
+dnl 	mov	16(T1), SE
+dnl 
+dnl 	ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 0)
+dnl 	ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 1)
+dnl 	ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 2)
+dnl 	ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 3)
+dnl 	ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 4)
+dnl 
+dnl 	ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 5)
+dnl 	ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 6)
+dnl 	ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 7)
+dnl 	ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 8)
+dnl 	ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 9)
+dnl 
+dnl 	ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 10)
+dnl 	ROUND_F1_NOEXP(SE, SA, SB, SC, SD, 11)
+dnl 	ROUND_F1_NOEXP(SD, SE, SA, SB, SC, 12)
+dnl 	ROUND_F1_NOEXP(SC, SD, SE, SA, SB, 13)
+dnl 	ROUND_F1_NOEXP(SB, SC, SD, SE, SA, 14)
+dnl 
+dnl 	ROUND_F1_NOEXP(SA, SB, SC, SD, SE, 15)
+dnl 	ROUND_F1(SE, SA, SB, SC, SD, 16)
+dnl 	ROUND_F1(SD, SE, SA, SB, SC, 17)
+dnl 	ROUND_F1(SC, SD, SE, SA, SB, 18)
+dnl 	ROUND_F1(SB, SC, SD, SE, SA, 19)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 20, K2VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 21, K2VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 22, K2VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 23, K2VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 24, K2VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 25, K2VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 26, K2VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 27, K2VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 28, K2VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 29, K2VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 30, K2VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 31, K2VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 32, K2VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 33, K2VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 34, K2VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 35, K2VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 36, K2VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 37, K2VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 38, K2VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 39, K2VALUE)
+dnl 
+dnl 	ROUND_F3(SA, SB, SC, SD, SE, 40)
+dnl 	ROUND_F3(SE, SA, SB, SC, SD, 41)
+dnl 	ROUND_F3(SD, SE, SA, SB, SC, 42)
+dnl 	ROUND_F3(SC, SD, SE, SA, SB, 43)
+dnl 	ROUND_F3(SB, SC, SD, SE, SA, 44)
+dnl 
+dnl 	ROUND_F3(SA, SB, SC, SD, SE, 45)
+dnl 	ROUND_F3(SE, SA, SB, SC, SD, 46)
+dnl 	ROUND_F3(SD, SE, SA, SB, SC, 47)
+dnl 	ROUND_F3(SC, SD, SE, SA, SB, 48)
+dnl 	ROUND_F3(SB, SC, SD, SE, SA, 49)
+dnl 
+dnl 	ROUND_F3(SA, SB, SC, SD, SE, 50)
+dnl 	ROUND_F3(SE, SA, SB, SC, SD, 51)
+dnl 	ROUND_F3(SD, SE, SA, SB, SC, 52)
+dnl 	ROUND_F3(SC, SD, SE, SA, SB, 53)
+dnl 	ROUND_F3(SB, SC, SD, SE, SA, 54)
+dnl 
+dnl 	ROUND_F3(SA, SB, SC, SD, SE, 55)
+dnl 	ROUND_F3(SE, SA, SB, SC, SD, 56)
+dnl 	ROUND_F3(SD, SE, SA, SB, SC, 57)
+dnl 	ROUND_F3(SC, SD, SE, SA, SB, 58)
+dnl 	ROUND_F3(SB, SC, SD, SE, SA, 59)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 60, K4VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 61, K4VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 62, K4VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 63, K4VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 64, K4VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 65, K4VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 66, K4VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 67, K4VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 68, K4VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 69, K4VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 70, K4VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 71, K4VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 72, K4VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 73, K4VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 74, K4VALUE)
+dnl 
+dnl 	ROUND_F2(SA, SB, SC, SD, SE, 75, K4VALUE)
+dnl 	ROUND_F2(SE, SA, SB, SC, SD, 76, K4VALUE)
+dnl 	ROUND_F2(SD, SE, SA, SB, SC, 77, K4VALUE)
+dnl 	ROUND_F2(SC, SD, SE, SA, SB, 78, K4VALUE)
+dnl 	ROUND_F2(SB, SC, SD, SE, SA, 79, K4VALUE)
+
+define(<SA>,<%eax>)
+define(<SB>,<%ebx>)
+define(<SC>,<%ecx>)
+define(<SD>,<%edx>)
+define(<SE>,<%ebp>)
+define(<DATA>,<%esp>)
+define(<T1>,<%edi>)
+define(<T2>,<%esi>)
+define(<COUNT>, <92(%esp)>)
+
+define(<K1VALUE>, <0x5A827999>)
+define(<K2VALUE>, <0x6ED9EBA1>)
+define(<K3VALUE>, <0x8F1BBCDC>)
+define(<K4VALUE>, <0xCA62C1D6>)
+
+dnl Expands to 4*i, or to the empty string if i is zero
+define(<OFFSET>, <ifelse($1,0,,eval(4*$1))>)
+
+.text
+.p2align 4,15
+.globl loop_entry
+loop_entry:
+	push	%ebx		
+	push	%ebp		
+	push	%esi		
+	push	%edi		
+
+	sub	$64, %esp
+
+	cmp	$0,COUNT
+	jz	loop_end
+.align 32
+loop_begin:
+
+	mov	88(%esp), T2
+
+	
+	mov	OFFSET(0)(T2), %eax
+	bswap	%eax
+	mov	%eax, OFFSET(0) (DATA)
+ 
+	mov	OFFSET(1)(T2), %ebx
+	bswap	%ebx
+	mov	%ebx, OFFSET(1) (DATA)
+ 
+	mov	OFFSET(2)(T2), %ecx
+	bswap	%ecx
+	mov	%ecx, OFFSET(2) (DATA)
+ 
+	mov	OFFSET(3)(T2), %edx
+	bswap	%edx
+	mov	%edx, OFFSET(3) (DATA)
+
+	
+	mov	OFFSET(4)(T2), %eax
+	bswap	%eax
+	mov	%eax, OFFSET(4) (DATA)
+ 
+	mov	OFFSET(5)(T2), %ebx
+	bswap	%ebx
+	mov	%ebx, OFFSET(5) (DATA)
+ 
+	mov	OFFSET(6)(T2), %ecx
+	bswap	%ecx
+	mov	%ecx, OFFSET(6) (DATA)
+ 
+	mov	OFFSET(7)(T2), %edx
+	bswap	%edx
+	mov	%edx, OFFSET(7) (DATA)
+
+	
+	mov	OFFSET(8)(T2), %eax
+	bswap	%eax
+	mov	%eax, OFFSET(8) (DATA)
+ 
+	mov	OFFSET(9)(T2), %ebx
+	bswap	%ebx
+	mov	%ebx, OFFSET(9) (DATA)
+ 
+	mov	OFFSET(10)(T2), %ecx
+	bswap	%ecx
+	mov	%ecx, OFFSET(10) (DATA)
+ 
+	mov	OFFSET(11)(T2), %edx
+	bswap	%edx
+	mov	%edx, OFFSET(11) (DATA)
+
+	
+	mov	OFFSET(12)(T2), %eax
+	bswap	%eax
+	mov	%eax, OFFSET(12) (DATA)
+ 
+	mov	OFFSET(13)(T2), %ebx
+	bswap	%ebx
+	mov	%ebx, OFFSET(13) (DATA)
+ 
+	mov	OFFSET(14)(T2), %ecx
+	bswap	%ecx
+	mov	%ecx, OFFSET(14) (DATA)
+ 
+	mov	OFFSET(15)(T2), %edx
+	bswap	%edx
+	mov	%edx, OFFSET(15) (DATA)
+
+
+	mov	84(%esp),T1
+	mov	(T1),   SA
+	mov	4(T1),  SB
+	mov	8(T1),  SC
+	mov	12(T1), SD
+	mov	16(T1), SE
+
+	
+	mov	SD, T2
+	xor	SC, T2
+	mov	SA, T1
+	and	SB, T2
+	add	OFFSET(0) (DATA), SE
+	xor	SD, T2
+	add	T2, SE
+	rol	$30, SB
+	rol	$5, T1
+	lea	K1VALUE (T1, SE), SE
+
+	
+	mov	SC, T2
+	xor	SB, T2
+	mov	SE, T1
+	and	SA, T2
+	add	OFFSET(1) (DATA), SD
+	xor	SC, T2
+	add	T2, SD
+	rol	$30, SA
+	rol	$5, T1
+	lea	K1VALUE (T1, SD), SD
+
+	
+	mov	SB, T2
+	xor	SA, T2
+	mov	SD, T1
+	and	SE, T2
+	add	OFFSET(2) (DATA), SC
+	xor	SB, T2
+	add	T2, SC
+	rol	$30, SE
+	rol	$5, T1
+	lea	K1VALUE (T1, SC), SC
+
+	
+	mov	SA, T2
+	xor	SE, T2
+	mov	SC, T1
+	and	SD, T2
+	add	OFFSET(3) (DATA), SB
+	xor	SA, T2
+	add	T2, SB
+	rol	$30, SD
+	rol	$5, T1
+	lea	K1VALUE (T1, SB), SB
+
+	
+	mov	SE, T2
+	xor	SD, T2
+	mov	SB, T1
+	and	SC, T2
+	add	OFFSET(4) (DATA), SA
+	xor	SE, T2
+	add	T2, SA
+	rol	$30, SC
+	rol	$5, T1
+	lea	K1VALUE (T1, SA), SA
+
+
+	
+	mov	SD, T2
+	xor	SC, T2
+	mov	SA, T1
+	and	SB, T2
+	add	OFFSET(5) (DATA), SE
+	xor	SD, T2
+	add	T2, SE
+	rol	$30, SB
+	rol	$5, T1
+	lea	K1VALUE (T1, SE), SE
+
+	
+	mov	SC, T2
+	xor	SB, T2
+	mov	SE, T1
+	and	SA, T2
+	add	OFFSET(6) (DATA), SD
+	xor	SC, T2
+	add	T2, SD
+	rol	$30, SA
+	rol	$5, T1
+	lea	K1VALUE (T1, SD), SD
+
+	
+	mov	SB, T2
+	xor	SA, T2
+	mov	SD, T1
+	and	SE, T2
+	add	OFFSET(7) (DATA), SC
+	xor	SB, T2
+	add	T2, SC
+	rol	$30, SE
+	rol	$5, T1
+	lea	K1VALUE (T1, SC), SC
+
+	
+	mov	SA, T2
+	xor	SE, T2
+	mov	SC, T1
+	and	SD, T2
+	add	OFFSET(8) (DATA), SB
+	xor	SA, T2
+	add	T2, SB
+	rol	$30, SD
+	rol	$5, T1
+	lea	K1VALUE (T1, SB), SB
+
+	
+	mov	SE, T2
+	xor	SD, T2
+	mov	SB, T1
+	and	SC, T2
+	add	OFFSET(9) (DATA), SA
+	xor	SE, T2
+	add	T2, SA
+	rol	$30, SC
+	rol	$5, T1
+	lea	K1VALUE (T1, SA), SA
+
+
+	
+	mov	SD, T2
+	xor	SC, T2
+	mov	SA, T1
+	and	SB, T2
+	add	OFFSET(10) (DATA), SE
+	xor	SD, T2
+	add	T2, SE
+	rol	$30, SB
+	rol	$5, T1
+	lea	K1VALUE (T1, SE), SE
+
+	
+	mov	SC, T2
+	xor	SB, T2
+	mov	SE, T1
+	and	SA, T2
+	add	OFFSET(11) (DATA), SD
+	xor	SC, T2
+	add	T2, SD
+	rol	$30, SA
+	rol	$5, T1
+	lea	K1VALUE (T1, SD), SD
+
+	
+	mov	SB, T2
+	xor	SA, T2
+	mov	SD, T1
+	and	SE, T2
+	add	OFFSET(12) (DATA), SC
+	xor	SB, T2
+	add	T2, SC
+	rol	$30, SE
+	rol	$5, T1
+	lea	K1VALUE (T1, SC), SC
+
+	
+	mov	SA, T2
+	xor	SE, T2
+	mov	SC, T1
+	and	SD, T2
+	add	OFFSET(13) (DATA), SB
+	xor	SA, T2
+	add	T2, SB
+	rol	$30, SD
+	rol	$5, T1
+	lea	K1VALUE (T1, SB), SB
+
+	
+	mov	SE, T2
+	xor	SD, T2
+	mov	SB, T1
+	and	SC, T2
+	add	OFFSET(14) (DATA), SA
+	xor	SE, T2
+	add	T2, SA
+	rol	$30, SC
+	rol	$5, T1
+	lea	K1VALUE (T1, SA), SA
+
+
+	
+	mov	SD, T2
+	xor	SC, T2
+	mov	SA, T1
+	and	SB, T2
+	add	OFFSET(15) (DATA), SE
+	xor	SD, T2
+	add	T2, SE
+	rol	$30, SB
+	rol	$5, T1
+	lea	K1VALUE (T1, SE), SE
+
+	
+	mov	OFFSET(0) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(0) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	and	SA, T2
+	xor	SC, T2
+	lea	K1VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(1) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(1) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	and	SE, T2
+	xor	SB, T2
+	lea	K1VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(2) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(2) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	and	SD, T2
+	xor	SA, T2
+	lea	K1VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(3) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(3) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	and	SC, T2
+	xor	SE, T2
+	lea	K1VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(4) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(4) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(5) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(5) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(6) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(6) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(7) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(7) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(8) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(8) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(9) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(9) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(10) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(10) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(11) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(11) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(12) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(12) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(13) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(13) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(14) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(14) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(15) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(15) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(0) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(0) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(1) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(1) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(2) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(2) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(3) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(3) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(4) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(4) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(5) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(5) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(6) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(6) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(7) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(7) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K2VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(8) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(8) (DATA)
+	mov	SD, T2
+	and	SC, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SD, T2
+	xor	SC, T2
+	and	SB, T2
+	add	T1, SE
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(9) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(9) (DATA)
+	mov	SC, T2
+	and	SB, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SC, T2
+	xor	SB, T2
+	and	SA, T2
+	add	T1, SD
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(10) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(10) (DATA)
+	mov	SB, T2
+	and	SA, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SB, T2
+	xor	SA, T2
+	and	SE, T2
+	add	T1, SC
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(11) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(11) (DATA)
+	mov	SA, T2
+	and	SE, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SA, T2
+	xor	SE, T2
+	and	SD, T2
+	add	T1, SB
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(12) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(12) (DATA)
+	mov	SE, T2
+	and	SD, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SE, T2
+	xor	SD, T2
+	and	SC, T2
+	add	T1, SA
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(13) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(13) (DATA)
+	mov	SD, T2
+	and	SC, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SD, T2
+	xor	SC, T2
+	and	SB, T2
+	add	T1, SE
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(14) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(14) (DATA)
+	mov	SC, T2
+	and	SB, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SC, T2
+	xor	SB, T2
+	and	SA, T2
+	add	T1, SD
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(15) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(15) (DATA)
+	mov	SB, T2
+	and	SA, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SB, T2
+	xor	SA, T2
+	and	SE, T2
+	add	T1, SC
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(0) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(0) (DATA)
+	mov	SA, T2
+	and	SE, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SA, T2
+	xor	SE, T2
+	and	SD, T2
+	add	T1, SB
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(1) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(1) (DATA)
+	mov	SE, T2
+	and	SD, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SE, T2
+	xor	SD, T2
+	and	SC, T2
+	add	T1, SA
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(2) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(2) (DATA)
+	mov	SD, T2
+	and	SC, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SD, T2
+	xor	SC, T2
+	and	SB, T2
+	add	T1, SE
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(3) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(3) (DATA)
+	mov	SC, T2
+	and	SB, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SC, T2
+	xor	SB, T2
+	and	SA, T2
+	add	T1, SD
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(4) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(4) (DATA)
+	mov	SB, T2
+	and	SA, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SB, T2
+	xor	SA, T2
+	and	SE, T2
+	add	T1, SC
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(5) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(5) (DATA)
+	mov	SA, T2
+	and	SE, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SA, T2
+	xor	SE, T2
+	and	SD, T2
+	add	T1, SB
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(6) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(6) (DATA)
+	mov	SE, T2
+	and	SD, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SE, T2
+	xor	SD, T2
+	and	SC, T2
+	add	T1, SA
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(7) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(7) (DATA)
+	mov	SD, T2
+	and	SC, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SD, T2
+	xor	SC, T2
+	and	SB, T2
+	add	T1, SE
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(8) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(8) (DATA)
+	mov	SC, T2
+	and	SB, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SC, T2
+	xor	SB, T2
+	and	SA, T2
+	add	T1, SD
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(9) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(9) (DATA)
+	mov	SB, T2
+	and	SA, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SB, T2
+	xor	SA, T2
+	and	SE, T2
+	add	T1, SC
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(10) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(10) (DATA)
+	mov	SA, T2
+	and	SE, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SA, T2
+	xor	SE, T2
+	and	SD, T2
+	add	T1, SB
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(11) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(11) (DATA)
+	mov	SE, T2
+	and	SD, T2
+	lea	K3VALUE (T1, T2), T1
+	mov	SE, T2
+	xor	SD, T2
+	and	SC, T2
+	add	T1, SA
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(12) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(12) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(13) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(13) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(14) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(14) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(15) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(15) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(0) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(0) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(1) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(1) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(2) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(2) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(3) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(3) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(4) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(4) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(5) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(5) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(6) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(6) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(7) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(7) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(8) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(8) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(9) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(9) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(10) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	xor	OFFSET(2) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(10) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	
+	mov	OFFSET(11) (DATA), T1
+	xor	OFFSET(13) (DATA), T1
+	xor	OFFSET(3) (DATA), T1
+	xor	OFFSET(8) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(11) (DATA)
+	mov	SD, T2
+	xor	SC, T2
+	xor	SB, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SB
+	mov	SA, T1
+	rol	$5, T1
+	add	T1, SE
+	add	T2, SE
+
+	
+	mov	OFFSET(12) (DATA), T1
+	xor	OFFSET(14) (DATA), T1
+	xor	OFFSET(4) (DATA), T1
+	xor	OFFSET(9) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(12) (DATA)
+	mov	SC, T2
+	xor	SB, T2
+	xor	SA, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SA
+	mov	SE, T1
+	rol	$5, T1
+	add	T1, SD
+	add	T2, SD
+
+	
+	mov	OFFSET(13) (DATA), T1
+	xor	OFFSET(15) (DATA), T1
+	xor	OFFSET(5) (DATA), T1
+	xor	OFFSET(10) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(13) (DATA)
+	mov	SB, T2
+	xor	SA, T2
+	xor	SE, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SE
+	mov	SD, T1
+	rol	$5, T1
+	add	T1, SC
+	add	T2, SC
+
+	
+	mov	OFFSET(14) (DATA), T1
+	xor	OFFSET(0) (DATA), T1
+	xor	OFFSET(6) (DATA), T1
+	xor	OFFSET(11) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(14) (DATA)
+	mov	SA, T2
+	xor	SE, T2
+	xor	SD, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SD
+	mov	SC, T1
+	rol	$5, T1
+	add	T1, SB
+	add	T2, SB
+
+	
+	mov	OFFSET(15) (DATA), T1
+	xor	OFFSET(1) (DATA), T1
+	xor	OFFSET(7) (DATA), T1
+	xor	OFFSET(12) (DATA), T1
+	rol	$1, T1
+	mov	T1, OFFSET(15) (DATA)
+	mov	SE, T2
+	xor	SD, T2
+	xor	SC, T2
+	lea	K4VALUE (T1, T2), T2
+	rol	$30, SC
+	mov	SB, T1
+	rol	$5, T1
+	add	T1, SA
+	add	T2, SA
+
+
+	sub	  $80, COUNT
+	jnz	  loop_begin
+
+loop_end:
+	add	$64, %esp
+	pop	%edi
+	pop	%esi
+	pop	%ebp
+	pop	%ebx
+	ret
+