diff --git a/sparc/aes.asm b/sparc/aes.asm
index bc5ad0e9f9872933cf2559ecb50b96f77035ac1c..ec1283af5e72f1839fdb3b429865775f254c1a27 100644
--- a/sparc/aes.asm
+++ b/sparc/aes.asm
@@ -63,9 +63,15 @@ define(t1, %o1)
 define(t2, %o2)
 define(t3, %o3)
 
-dnl AES_ROUND(i)
-dnl Compute one word in the round function. 
-dnl Input in wtxt, output stored in tmp + i.
+C AES_ROUND(i)
+C Compute one word in the round function. 
+C Input in wtxt, output stored in tmp + i.
+C
+C The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ]
+C the instruction is a part of. 
+C
+C The code uses the register %o[j], aka tj, as the primary 
+C register for that sub-expression. True for j==1,3.
 define(<AES_ROUND>, <
 	ld	[IDX1+$1], t1		! 1
 	
@@ -75,7 +81,7 @@ define(<AES_ROUND>, <
 	sll	t1, 2, t1		! 1
 	ld	[wtxt+$1], t0		! 0
 	! IDX2(j) = j XOR 2
-	lduh	[wtxt+eval($1 ^ 8)], t2		! 2
+	lduh	[wtxt+eval($1 ^ 8)], t2	! 2
 	and	t0, 255, t0		! 0
 	
 	ldub	[wtxt+t3], t3		! 3
@@ -172,148 +178,12 @@ _aes_crypt:
 	add	ctx, 16, key
 
 .Lround_loop:
-	! The comments mark which j in T->table[j][ Bj(wtxt[IDXi(i)]) ]
-	! the instruction is a part of. 
-	!
-	! The code uses the register %o[j], aka tj, as the primary 
-	! register for that sub-expression. True for j==1,3.
-
-	C Unrolled inner loop begins
-	
-	C i = 0
-	AES_ROUND(0)
-C 	ld	[IDX1+0], t1		! 1
-C 	
-C 	ldub	[wtxt+t1], t1		! 1
-C 	ld	[IDX3+0], t3		! 3
-C 	
-C 	sll	t1, 2, t1		! 1
-C 	ld	[wtxt], t0		! 0
-C 	! IDX2(j) = j XOR 2
-C 	lduh	[wtxt+8], t2		! 2
-C 	and	t0, 255, t0		! 0
-C 	
-C 	ldub	[wtxt+t3], t3		! 3
-C 	sll	t0, 2, t0		! 0
-C 	ld	[T0+t0], t0		! 0
-C 	and	t2, 255, t2		! 2
-C 	
-C 	ld	[T1+t1], t1		! 1
-C 	sll	t2, 2, t2		! 2
-C 	ld	[T2+t2], t2		! 2
-C 	sll	t3, 2, t3		! 3
-C 	
-C 	ld	[T3+t3], t3		! 3
-C 	xor	t0, t1, t0		! 0, 1
-C 	xor	t0, t2, t0		! 0, 1, 2
-C 	! Fetch roundkey
-C 	ld	[key], t1
-C 	
-C 	xor	t0, t3, t0		! 0, 1, 2, 3
-C 	xor	t0, t1, t0
-C 	st	t0, [tmp]
 
-	C i = 1
-	AES_ROUND(4)
-C 	ld	[IDX1+4], t1		! 1
-C 	
-C 	ldub	[wtxt+t1], t1		! 1
-C 	ld	[IDX3+4], t3		! 3
-C 	
-C 	sll	t1, 2, t1		! 1
-C 	ld	[wtxt+4], t0		! 0
-C 	! IDX2(j) = j XOR 2
-C 	lduh	[wtxt+12], t2		! 2
-C 	and	t0, 255, t0		! 0
-C 	
-C 	ldub	[wtxt+t3], t3		! 3
-C 	sll	t0, 2, t0		! 0
-C 	ld	[T0+t0], t0		! 0
-C 	and	t2, 255, t2		! 2
-C 	
-C 	ld	[T1+t1], t1		! 1
-C 	sll	t2, 2, t2		! 2
-C 	ld	[T2+t2], t2		! 2
-C 	sll	t3, 2, t3		! 3
-C 	
-C 	ld	[T3+t3], t3		! 3
-C 	xor	t0, t1, t0		! 0, 1
-C 	xor	t0, t2, t0		! 0, 1, 2
-C 	! Fetch roundkey
-C 	ld	[key+4], t1
-C 	
-C 	xor	t0, t3, t0		! 0, 1, 2, 3
-C 	xor	t0, t1, t0
-C 	st	t0, [tmp+4]
-
-	C = 2
-	AES_ROUND(8)
-C 	ld	[IDX1+8], t1		! 1
-C 	
-C 	ldub	[wtxt+t1], t1		! 1
-C 	ld	[IDX3+8], t3		! 3
-C 	
-C 	sll	t1, 2, t1		! 1
-C 	ld	[wtxt+8], t0		! 0
-C 	! IDX2(j) = j XOR 2
-C 	lduh	[wtxt], t2		! 2
-C 	and	t0, 255, t0		! 0
-C 	
-C 	ldub	[wtxt+t3], t3		! 3
-C 	sll	t0, 2, t0		! 0
-C 	ld	[T0+t0], t0		! 0
-C 	and	t2, 255, t2		! 2
-C 	
-C 	ld	[T1+t1], t1		! 1
-C 	sll	t2, 2, t2		! 2
-C 	ld	[T2+t2], t2		! 2
-C 	sll	t3, 2, t3		! 3
-C 	
-C 	ld	[T3+t3], t3		! 3
-C 	xor	t0, t1, t0		! 0, 1
-C 	xor	t0, t2, t0		! 0, 1, 2
-C 	! Fetch roundkey
-C 	ld	[key+8], t1
-C 	
-C 	xor	t0, t3, t0		! 0, 1, 2, 3
-C 	xor	t0, t1, t0
-C 	st	t0, [tmp+8]
-
-	C = 3
-	AES_ROUND(12)
-C 	ld	[IDX1+12], t1		! 1
-C 	
-C 	ldub	[wtxt+t1], t1		! 1
-C 	ld	[IDX3+12], t3		! 3
-C 	
-C 	sll	t1, 2, t1		! 1
-C 	ld	[wtxt+12], t0		! 0
-C 	! IDX2(j) = j XOR 2
-C 	lduh	[wtxt+4], t2		! 2
-C 	and	t0, 255, t0		! 0
-C 	
-C 	ldub	[wtxt+t3], t3		! 3
-C 	sll	t0, 2, t0		! 0
-C 	ld	[T0+t0], t0		! 0
-C 	and	t2, 255, t2		! 2
-C 	
-C 	ld	[T1+t1], t1		! 1
-C 	sll	t2, 2, t2		! 2
-C 	ld	[T2+t2], t2		! 2
-C 	sll	t3, 2, t3		! 3
-C 	
-C 	ld	[T3+t3], t3		! 3
-C 	xor	t0, t1, t0		! 0, 1
-C 	xor	t0, t2, t0		! 0, 1, 2
-C 	! Fetch roundkey
-C 	ld	[key+12], t1
-C 	
-C 	xor	t0, t3, t0		! 0, 1, 2, 3
-C 	xor	t0, t1, t0
-C 	st	t0, [tmp+12]
+	AES_ROUND(0)	! i = 0
+	AES_ROUND(4)	! i = 1
+	AES_ROUND(8)	! i = 2
+	AES_ROUND(12)	! i = 3
 			
-	C Unrolled inner loop ends
-	
 	! switch roles for tmp and wtxt
 	xor	wtxt, diff, wtxt
 	xor	tmp, diff, tmp