diff --git a/sparc/aes.asm b/sparc/aes.asm
index 11d224cccf8d338d3abc555a4ffc8a1b928576e5..83098520310a61e528c9ac3237e7396ba9673bd8 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -8,13 +8,15 @@
 	! aes192 (ECB encrypt): 16.85s, 0.593MB/s
 	! aes192 (ECB decrypt): 19.64s, 0.509MB/s
 	! aes192 (CBC encrypt): 18.43s, 0.543MB/s
-	! aes192 ((CBC decrypt)): 20.76s, 0.482MB/s
+	! aes192 (CBC decrypt): 20.76s, 0.482MB/s
 	! 
 	! aes256 (ECB encrypt): 19.12s, 0.523MB/s
 	! aes256 (ECB decrypt): 22.57s, 0.443MB/s
 	! aes256 (CBC encrypt): 20.92s, 0.478MB/s
-	! aes256 ((CBC decrypt)): 23.22s, 0.431MB/s
+	! aes256 (CBC decrypt): 23.22s, 0.431MB/s
 
+	! After unrolling key_addition32, and getting rid of
+	! some sll x, 2, x, encryption speed is 0.760 MB/s.
 include(`asm.m4')
 	
 	.file	"aes.asm"
@@ -206,16 +208,13 @@ aes_encrypt:
 	! or	%i4, %lo(idx), %l7
 	add	ctx, 16, %l2
 .Lencrypt_round:
-	! j:	%o7
 	! 4j:	%g2
-	mov	0, %o7
+	mov	0, %g2
 	! %g3 = &idx[3][0]
 	add	g_idx, 48, %g3
 .Lencrypt_inner:
 	! %o0 = idx[3][0]
 	ld	[%g3], %o0
-	! %g2 = 4j
-	sll	%o7, 2, %g2
 	! %o1 = idx[2][0]
 	ld	[%g3-16], %o1
 	! %o3 = wtxt[idx[3][0]], byte => bits 24-31
@@ -274,14 +273,14 @@ aes_encrypt:
 	or	%o0, %o3, %o0
 	! %o1 = dtbl[wtxt[j] & 0xff] ^ ROL(XX2 = XX3
 	xor	%o1, %o0, %o1
-	! j++
-	add	%o7, 1, %o7
 	! txt[j] (old j) = XX3 
 	st	%o1, [%l4+%g2]
 
 	! j <= 3?
-	cmp	%o7, 3
-
+	cmp	%g2, 8
+	! j++
+	add	%g2, 4, %g2
+	
 	bleu	.Lencrypt_inner
 	! %g3 = &idx[3][j]
 	add	%g3, 4, %g3